1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36
| def mkdir_if_not_exist(path): if not os.path.exists(os.path.join(*path)): os.makedirs(os.path.join(*path)) def reorg_dog_data(data_dir, label_file, train_dir, test_dir, new_data_dir, valid_ratio): labels = pd.read_csv(os.path.join(data_dir, label_file)) id2label = {Id: label for Id, label in labels.values}
train_files = os.listdir(os.path.join(data_dir, train_dir)) random.shuffle(train_files)
valid_ds_size = int(len(train_files) * valid_ratio) for i, file in enumerate(train_files): img_id = file.split('.')[0] img_label = id2label[img_id] if i < valid_ds_size: mkdir_if_not_exist([new_data_dir, 'valid', img_label]) shutil.copy(os.path.join(data_dir, train_dir, file), os.path.join(new_data_dir, 'valid', img_label)) else: mkdir_if_not_exist([new_data_dir, 'train', img_label]) shutil.copy(os.path.join(data_dir, train_dir, file), os.path.join(new_data_dir, 'train', img_label)) mkdir_if_not_exist([new_data_dir, 'train_valid', img_label]) shutil.copy(os.path.join(data_dir, train_dir, file), os.path.join(new_data_dir, 'train_valid', img_label))
mkdir_if_not_exist([new_data_dir, 'test', 'unknown']) for test_file in os.listdir(os.path.join(data_dir, test_dir)): shutil.copy(os.path.join(data_dir, test_dir, test_file), os.path.join(new_data_dir, 'test', 'unknown'))
|