数据挖掘 - 如何下载 BAIR 无动作机器人推送数据集？ - 吾爱随笔录

如何下载 BAIR 无动作机器人推送数据集？

数据挖掘数据集

2022-03-13 23:48:20

我正在尝试下载BAIR action free robot pushing dataset. 我尝试从这里下载。在浏览器中，它显示其大小为 30GB，但下载一些数据然后失败。我尝试了多次没有成功。然后我尝试使用下载wget

wget http://rail.eecs.berkeley.edu/datasets/bair_robot_pushing_dataset_v0.tar

即使这样，它显示总大小为30GB，但下载了一些199MB后，它结束说下载完成

wget http://rail.eecs.berkeley.edu/datasets/bair_robot_pushing_dataset_v0.tar
--2019-05-16 12:30:50--  http://rail.eecs.berkeley.edu/datasets/bair_robot_pushing_dataset_v0.tar
Resolving rail.eecs.berkeley.edu (rail.eecs.berkeley.edu)... 128.32.189.73
Connecting to rail.eecs.berkeley.edu (rail.eecs.berkeley.edu)|128.32.189.73|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: 32274964480 (30G) [application/x-tar]
Saving to: ‘bair_robot_pushing_dataset_v0.tar’

bair_robot_pushing_dataset_v0.tar                    0%[                                                                                                                 ] 189.95M   456KB/s    in 10m 59s 

2019-05-16 12:41:50 (295 KB/s) - Connection closed at byte 199172826. Retrying.

--2019-05-16 12:41:51--  (try: 2)  http://rail.eecs.berkeley.edu/datasets/bair_robot_pushing_dataset_v0.tar
Connecting to rail.eecs.berkeley.edu (rail.eecs.berkeley.edu)|128.32.189.73|:80... connected.
HTTP request sent, awaiting response... 416 Requested range not satisfiable

    The file is already fully retrieved; nothing to do.

另外，我在这里找到了一个下载 BAIR 数据集的脚本。但我在这里也遇到了同样的问题。

我现在很困惑。数据集这么小还是我做错了什么？

2个回答

BAIR 数据集可以在这里下载 https://sites.google.com/berkeley.edu/robotic-interaction-datasets

此外，这是从数据集中提取数据的代码

import datetime
import os
import time

import cv2
import numpy as np
import skvideo.io
import tensorflow as tf
from PIL import Image
from tensorflow.python.platform import gfile

def get_next_video_data(data_dir):
    filenames = gfile.Glob(os.path.join(data_dir, '*'))
    if not filenames:
        raise RuntimeError('No data files found.')

    for f in filenames:
        k = 0
        for serialized_example in tf.python_io.tf_record_iterator(f):
            example = tf.train.Example()
            example.ParseFromString(serialized_example)
            # print(example)        # To know what all features are present

            actions = np.empty((0, 4), dtype='float')
            endeffector_positions = np.empty((0, 3), dtype='float')
            frames_aux1 = []
            frames_main = []
            i = 0
            while True:
                action_name = str(i) + '/action'
                action_value = np.array(example.features.feature[action_name].float_list.value)
                if action_value.shape == (0,):      # End of frames/data
                    break
                actions = np.vstack((actions, action_value))

                endeffector_pos_name = str(i) + '/endeffector_pos'
                endeffector_pos_value = list(example.features.feature[endeffector_pos_name].float_list.value)
                endeffector_positions = np.vstack((endeffector_positions, endeffector_pos_value))

                aux1_image_name = str(i) + '/image_aux1/encoded'
                aux1_byte_str = example.features.feature[aux1_image_name].bytes_list.value[0]
                aux1_img = Image.frombytes('RGB', (64, 64), aux1_byte_str)
                aux1_arr = np.array(aux1_img.getdata()).reshape((aux1_img.size[1], aux1_img.size[0], 3))
                frames_aux1.append(aux1_arr.reshape(1, 64, 64, 3))

                main_image_name = str(i) + '/image_main/encoded'
                main_byte_str = example.features.feature[main_image_name].bytes_list.value[0]
                main_img = Image.frombytes('RGB', (64, 64), main_byte_str)
                main_arr = np.array(main_img.getdata()).reshape((main_img.size[1], main_img.size[0], 3))
                frames_main.append(main_arr.reshape(1, 64, 64, 3))
                i += 1

            np_frames_aux1 = np.concatenate(frames_aux1, axis=0)
            np_frames_main = np.concatenate(frames_main, axis=0)
            yield f, k, actions, endeffector_positions, np_frames_aux1, np_frames_main
            k = k + 1


def extract_data(data_dir, output_dir, frame_rate):
    """
    Extracts data in tfrecord format to gifs, frames and text files
    :param data_dir:
    :param output_dir:
    :param frame_rate:
    :return:
    """
    if os.path.exists(output_dir):
        if os.listdir(output_dir):
            raise RuntimeError('Directory not empty: {0}'.format(output_dir))
    else:
        os.makedirs(output_dir)

    seq_generator = get_next_video_data(data_dir)
    while True:
        try:
            _, k, actions, endeff_pos, aux1_frames, main_frames = next(seq_generator)
        except StopIteration:
            break
        video_out_dir = os.path.join(output_dir, '{0:03}'.format(k))
        os.makedirs(video_out_dir)

        # noinspection PyTypeChecker
        np.savetxt(os.path.join(video_out_dir, 'actions.csv'), actions, delimiter=',')
        # noinspection PyTypeChecker
        np.savetxt(os.path.join(video_out_dir, 'endeffector_positions.csv'), endeff_pos, delimiter=',')
        skvideo.io.vwrite(os.path.join(video_out_dir, 'aux1.gif'), aux1_frames, inputdict={'-r': str(frame_rate)})
        skvideo.io.vwrite(os.path.join(video_out_dir, 'main.gif'), main_frames, inputdict={'-r': str(frame_rate)})
        skvideo.io.vwrite(os.path.join(video_out_dir, 'aux1.mp4'), aux1_frames, inputdict={'-r': str(frame_rate)})
        skvideo.io.vwrite(os.path.join(video_out_dir, 'main.mp4'), main_frames, inputdict={'-r': str(frame_rate)})

        # Save frames
        aux1_folder_path = os.path.join(video_out_dir, 'aux1_frames')
        os.makedirs(aux1_folder_path)
        for i, frame in enumerate(aux1_frames):
            filepath = os.path.join(aux1_folder_path, 'frame_{0:03}.bmp'.format(i))
            cv2.imwrite(filepath, cv2.cvtColor(frame.astype('uint8'), cv2.COLOR_RGB2BGR))
        main_folder_path = os.path.join(video_out_dir, 'main_frames')
        os.makedirs(main_folder_path)
        for i, frame in enumerate(main_frames):
            filepath = os.path.join(main_folder_path, 'frame_{0:03}.bmp'.format(i))
            cv2.imwrite(filepath, cv2.cvtColor(frame.astype('uint8'), cv2.COLOR_RGB2BGR))
        print('Saved video: {0:03}'.format(k))


def main():
    data_dir = '../softmotion30_44k/test'
    output_dir = '../ExtractedData/test'
    frame_rate = 4
    extract_data(data_dir, output_dir, frame_rate)
    return


if __name__ == '__main__':
    print('Program started at ' + datetime.datetime.now().strftime('%d/%m/%Y %I:%M:%S %p'))
    start_time = time.time()
    main()
    end_time = time.time()
    print('Program ended at ' + datetime.datetime.now().strftime('%d/%m/%Y %I:%M:%S %p'))
    print('Execution time: ' + str(datetime.timedelta(seconds=end_time - start_time)))

参考资料： https ://github.com/edenton/svg/blob/master/data/convert_bair.py

下载 BAIR 小推机器人：

import tensorflow_datasets as tfds
# to prevent ResourceExhaustedError
# https://github.com/tensorflow/datasets/issues/1441#issuecomment-581660890
import resource
low, high = resource.getrlimit(resource.RLIMIT_NOFILE)
resource.setrlimit(resource.RLIMIT_NOFILE, (high, high))

builder = tfds.builder('bair_robot_pushing_small')
builder.download_and_prepare()
ds_dict = builder.as_dataset()  # ds_dict has 'train' and 'test' tf.Dataset objects

用法：

# TensorFlow
gen = iter(ds_dist['train'].batch(32))
batch = gen.next()

# PyTorch/Jax
gen = ds_dict['train'].batch(32).as_numpy_iterator()
batch = gen.next()

其它你可能感兴趣的问题

上一篇神经网络中隐藏层的作用是什么？下一篇解决网格世界的最佳学习自动机强化模式，帮助我