jcjohnson · mattbierner · Mar 11, 2016
diff --git a/scripts/preprocess.py b/scripts/preprocess.py
@@ -36,18 +36,18 @@
   train_size = total_size - val_size - test_size
 
   if not args.quiet:
-    print 'Total vocabulary size: %d' % len(token_to_idx)
-    print 'Total tokens in file: %d' % total_size
-    print '  Training size: %d' % train_size
-    print '  Val size: %d' % val_size
-    print '  Test size: %d' % test_size
+    print('Total vocabulary size: %d' % len(token_to_idx))
+    print('Total tokens in file: %d' % total_size)
+    print('  Training size: %d' % train_size)
+    print('  Val size: %d' % val_size)
+    print('  Test size: %d' % test_size)
 
   # Choose the datatype based on the vocabulary size
   dtype = np.uint8
   if len(token_to_idx) > 255:
     dtype = np.uint32
   if not args.quiet:
-    print 'Using dtype ', dtype
+    print('Using dtype ', dtype)
 
   # Just load data into memory ... we'll have to do something more clever
   # for huge datasets but this should be fine for now
@@ -87,7 +87,7 @@
   # Dump a JSON file for the vocab
   json_data = {
     'token_to_idx': token_to_idx,
-    'idx_to_token': {v: k for k, v in token_to_idx.iteritems()},
+    'idx_to_token': {v: k for k, v in token_to_idx.items()},
   }
   with open(args.output_json, 'w') as f:
     json.dump(json_data, f)