PaddlePaddle
diff --git a/‎datasets/criteo_autofis/Criteo.py‎
Lines changed: 544 additions & 0 deletions b/‎datasets/criteo_autofis/Criteo.py‎
Lines changed: 544 additions & 0 deletions
diff --git a/‎datasets/criteo_autofis/get_data.py‎
Lines changed: 70 additions & 0 deletions b/‎datasets/criteo_autofis/get_data.py‎
Lines changed: 70 additions & 0 deletions
diff --git a/‎models/rank/autofis/config.yaml‎
Lines changed: 2 additions & 2 deletions b/‎models/rank/autofis/config.yaml‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎models/rank/autofis/criteo_reader.py‎
Lines changed: 4 additions & 4 deletions b/‎models/rank/autofis/criteo_reader.py‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎models/rank/autofis/data/sample_data/sample_train_x.txt‎
Lines changed: 100 additions & 0 deletions b/‎models/rank/autofis/data/sample_data/sample_train_x.txt‎
Lines changed: 100 additions & 0 deletions
diff --git a/‎models/rank/autofis/data/sample_data/sample_train_y.txt‎
Lines changed: 100 additions & 0 deletions b/‎models/rank/autofis/data/sample_data/sample_train_y.txt‎
Lines changed: 100 additions & 0 deletions
diff --git a/‎models/rank/autofis/data/sample_data/test/test_x.npy‎
-305 KB b/‎models/rank/autofis/data/sample_data/test/test_x.npy‎
-305 KB
diff --git a/‎models/rank/autofis/data/sample_data/test/test_y.npy‎
-7.94 KB b/‎models/rank/autofis/data/sample_data/test/test_y.npy‎
-7.94 KB
diff --git a/‎models/rank/autofis/data/sample_data/train/train_x.npy‎
-305 KB b/‎models/rank/autofis/data/sample_data/train/train_x.npy‎
-305 KB
diff --git a/‎models/rank/autofis/data/sample_data/train/train_y.npy‎
-7.94 KB b/‎models/rank/autofis/data/sample_data/train/train_y.npy‎
-7.94 KB
@@ -0,0 +1,70 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import numpy as np
+import os
+from Criteo import Criteo
+from tqdm import tqdm
+
+batch_size = 1024
+
+train_data_param = {
+    'gen_type': 'train',
+    'random_sample': True,
+    'batch_size': batch_size,
+    'split_fields': False,
+    'on_disk': True,
+    'squeeze_output': True,
+}
+test_data_param = {
+    'gen_type': 'test',
+    'random_sample': False,
+    'batch_size': batch_size,
+    'split_fields': False,
+    'on_disk': True,
+    'squeeze_output': True,
+}
+
+dataset = Criteo(initialized=True)
+train_gen = dataset.batch_generator(train_data_param)
+test_gen = dataset.batch_generator(test_data_param)
+
+output_dir = 'data/whole_data'
+xs = []
+ys = []
+for x, y in tqdm(train_gen):
+    xs.append(x)
+    ys.append(y)
+
+x = np.concatenate(xs, 0)
+y = np.concatenate(ys, 0)
+print(x.shape)
+#np.save(os.path.join(output_dir, 'train', 'train_x.npy'), x)
+#np.save(os.path.join(output_dir, 'train', 'train_y.npy'), y)
+np.savetxt(os.path.join(output_dir, 'train', 'train_x.txt'), x, fmt='%d')
+np.savetxt(os.path.join(output_dir, 'train', 'train_y.txt'), y, fmt='%d')
+
+xs = []
+ys = []
+for x, y in tqdm(test_gen):
+    xs.append(x)
+    ys.append(y)
+
+x = np.concatenate(xs, 0)
+y = np.concatenate(ys, 0)
+print(x.shape)
+#np.save(os.path.join(output_dir, 'test', 'test_x.npy'), x)
+#np.save(os.path.join(output_dir, 'test', 'test_y.npy'), y)
+np.savetxt(os.path.join(output_dir, 'test', 'test_x.txt'), x, fmt='%d')
+np.savetxt(os.path.join(output_dir, 'test', 'text_y.txt'), y, fmt='%d')
@@ -15,7 +15,7 @@
 # global settings
 
 runner:
-  train_data_dir: "data/sample_data/train"
+  train_data_dir: "data/sample_data"
   train_reader_path: "criteo_reader" # importlib format
   use_gpu: False
   use_auc: False
@@ -24,7 +24,7 @@ runner:
   print_interval: 1
   #model_init_path: "output_model/0" # init model
   model_save_path: "output_model_autofis"
-  test_data_dir: "data/sample_data/test"
+  test_data_dir: "data/sample_data"
   infer_reader_path: "criteo_reader" # importlib format
   infer_batch_size: 200
   infer_load_path: "output_model_autofis"
 
@@ -22,10 +22,10 @@ class RecDataset(Dataset):
     def __init__(self, file_list, config):
         super(RecDataset, self).__init__()
         for file in file_list:
-            if file.endswith('x.npy'):
-                self.x = np.load(file)
-            elif file.endswith('y.npy'):
-                self.y = np.load(file)
+            if file.endswith('x.txt'):
+                self.x = np.loadtxt(file, dtype=np.int64)
+            elif file.endswith('y.txt'):
+                self.y = np.loadtxt(file, dtype=np.int64)
 
     def __getitem__(self, item):
         return self.x[item], self.y[item]
 
@@ -0,0 +1,100 @@
+0
+1
+1
+0
+1
+1
+1
+1
+0
+1
+1
+1
+0
+1
+0
+1
+1
+0
+0
+1
+0
+1
+1
+0
+0
+1
+1
+1
+1
+1
+0
+1
+0
+0
+0
+0
+1
+0
+0
+1
+0
+1
+1
+0
+0
+0
+1
+0
+1
+1
+1
+0
+0
+0
+1
+0
+0
+0
+1
+1
+1
+0
+1
+1
+0
+0
+1
+1
+0
+1
+0
+0
+0
+1
+1
+0
+1
+0
+1
+1
+1
+0
+1
+0
+1
+0
+0
+0
+0
+1
+0
+1
+0
+0
+0
+1
+0
+1
+0
+1
-Original file line number
+Diff line change
@@ @@ -0,0 +1,100 @@ @@
 +0
 +1
 +1
 +0
 +1
 +1
 +1
 +1
 +0
 +1
 +1
 +1
 +0
 +1
 +0
 +1
 +1
 +0
 +0
 +1
 +0
 +1
 +1
 +0
 +0
 +1
 +1
 +1
 +1
 +1
 +0
 +1
 +0
 +0
 +0
 +0
 +1
 +0
 +0
 +1
 +0
 +1
 +1
 +0
 +0
 +0
 +1
 +0
 +1
 +1
 +1
 +0
 +0
 +0
 +1
 +0
 +0
 +0
 +1
 +1
 +1
 +0
 +1
 +1
 +0
 +0
 +1
 +1
 +0
 +1
 +0
 +0
 +0
 +1
 +1
 +0
 +1
 +0
 +1
 +1
 +1
 +0
 +1
 +0
 +1
 +0
 +0
 +0
 +0
 +1
 +0
 +1
 +0
 +0
 +0
 +1
 +0
 +1
 +0
 +1