@@ -154,9 +154,13 @@ def setup_processed(self):
154154 print ("Load data from file" , filename )
155155 data = self ._load_data_from_file (filename )
156156 print ("Create splits" )
157- train , test = train_test_split (data , train_size = self .train_split )
157+ train , test = train_test_split (
158+ data , train_size = 1 - (self .validation_split + self .test_split )
159+ )
158160 del data
159- test , val = train_test_split (test , train_size = self .train_split )
161+ test , val = train_test_split (
162+ test , train_size = self .test_split / (self .validation_split + self .test_split )
163+ )
160164 torch .save (train , os .path .join (self .processed_dir , "train.pt" ))
161165 torch .save (test , os .path .join (self .processed_dir , "test.pt" ))
162166 torch .save (val , os .path .join (self .processed_dir , "validation.pt" ))
@@ -179,6 +183,21 @@ def processed_file_names(self) -> List[str]:
179183 """
180184 return ["test.pt" , "train.pt" , "validation.pt" ]
181185
186+ def _set_processed_data_props (self ):
187+ """
188+ Self-supervised learning with PubChem does not use this metadata, therefore set them to zero.
189+
190+ Sets:
191+ - self._num_of_labels: 0
192+ - self._feature_vector_size: 0.
193+ """
194+
195+ self ._num_of_labels = 0
196+ self ._feature_vector_size = 0
197+
198+ print (f"Number of labels for loaded data: { self ._num_of_labels } " )
199+ print (f"Feature vector size: { self ._feature_vector_size } " )
200+
182201 def _perform_data_preparation (self , * args , ** kwargs ):
183202 """
184203 Checks for raw data and downloads if necessary.
0 commit comments