|
| 1 | +import argparse |
| 2 | +import json |
| 3 | +import os |
| 4 | +from random import shuffle |
| 5 | + |
| 6 | + |
| 7 | +def main(): |
| 8 | + parser = argparse.ArgumentParser('Subset creation') |
| 9 | + parser.add_argument("-i", "--img-path", required=True, type=str, help='Path to the "images" folder') |
| 10 | + parser.add_argument("-v", "--val-size", default=1000, type=int, help='Size of the validation data') |
| 11 | + parser.add_argument("-t", "--train-size", default=5000, type=int, help='Size of the train data') |
| 12 | + parser.add_argument('--shuffle', action='store_true', help='Shuffle samples before splitting') |
| 13 | + parser.add_argument("-l", "--labels", nargs='+', default=['house', 'birds', 'sun', 'valley', |
| 14 | + 'nighttime', 'boats', 'mountain', 'tree', 'snow', |
| 15 | + 'beach', 'vehicle', 'rocks', |
| 16 | + 'reflection', 'sunset', 'road', 'flowers', 'ocean', |
| 17 | + 'lake', 'window', 'plants', |
| 18 | + 'buildings', 'grass', 'water', 'animal', 'person', |
| 19 | + 'clouds', 'sky'], help='Subset labels') |
| 20 | + args = parser.parse_args() |
| 21 | + img_path = args.img_path |
| 22 | + labels = args.labels |
| 23 | + |
| 24 | + with open('nus_wide/cats') as l_f: |
| 25 | + possible_labels = l_f.readlines() |
| 26 | + possible_labels = [i.strip() for i in possible_labels] |
| 27 | + |
| 28 | + for label in labels: |
| 29 | + if label not in possible_labels: |
| 30 | + print('Label:', label, "is unknown. Possible labels:", ', '.join(possible_labels)) |
| 31 | + exit(-1) |
| 32 | + |
| 33 | + with open(os.path.join(img_path, 'test.json')) as fp: |
| 34 | + test_data = json.load(fp) |
| 35 | + test_samples = test_data['samples'] |
| 36 | + |
| 37 | + with open(os.path.join(img_path, 'train.json')) as fp: |
| 38 | + train_data = json.load(fp) |
| 39 | + train_samples = train_data['samples'] |
| 40 | + |
| 41 | + if args.shuffle: |
| 42 | + shuffle(test_samples) |
| 43 | + shuffle(train_samples) |
| 44 | + |
| 45 | + train_size = args.train_size |
| 46 | + test_size = args.val_size |
| 47 | + |
| 48 | + small_train = [] |
| 49 | + i = 0 |
| 50 | + while len(small_train) < train_size: |
| 51 | + sample_img_path, sample_labels = train_samples[i]['image_name'], train_samples[i]['image_labels'] |
| 52 | + sample_labels = [label for label in sample_labels if label in labels] |
| 53 | + if len(sample_labels): |
| 54 | + small_train.append({'image_name': sample_img_path, 'image_labels': sample_labels}) |
| 55 | + i += 1 |
| 56 | + |
| 57 | + small_test = [] |
| 58 | + i = 0 |
| 59 | + while len(small_test) < test_size: |
| 60 | + sample_img_path, sample_labels = test_samples[i]['image_name'], test_samples[i]['image_labels'] |
| 61 | + sample_labels = [label for label in sample_labels if label in labels] |
| 62 | + if len(sample_labels): |
| 63 | + small_test.append({'image_name': sample_img_path, 'image_labels': sample_labels}) |
| 64 | + i += 1 |
| 65 | + |
| 66 | + with open(os.path.join(img_path, 'small_train.json'), 'w') as fp: |
| 67 | + json.dump({'samples': small_train, 'labels': labels}, fp, indent=3) |
| 68 | + |
| 69 | + with open(os.path.join(img_path, 'small_test.json'), 'w') as fp: |
| 70 | + json.dump({'samples': small_test, 'labels': labels}, fp, indent=3) |
| 71 | + |
| 72 | + |
| 73 | +if __name__ == '__main__': |
| 74 | + main() |
0 commit comments