Dataloader problem #3270
Unanswered
shrimonmuke0202
asked this question in
Q&A
Replies: 3 comments 2 replies
-
You can transfer your data to one hot tensor (e.g., |
Beta Was this translation helpful? Give feedback.
1 reply
-
The custom data = Data(seq=„122345“)
loader = DataLoader([data, data]) |
Beta Was this translation helpful? Give feedback.
0 replies
-
class TestbedDataset(InMemoryDataset):
def __init__(self, root='/tmp', dataset='davis',
xd=None, xt=None, y=None, transform=None,
pre_transform=None,smile_graph=None):
#root is required for save preprocessed data, default is '/tmp'
super(TestbedDataset, self).__init__(root, transform, pre_transform)
# benchmark dataset, default = 'davis'
self.dataset = dataset
if os.path.isfile(self.processed_paths[0]):
print('Pre-processed data found: {}, loading ...'.format(self.processed_paths[0]))
self.data, self.slices = torch.load(self.processed_paths[0])
else:
print('Pre-processed data {} not found, doing pre-processing...'.format(self.processed_paths[0]))
self.process(xd, xt, y,smile_graph)
self.data, self.slices = torch.load(self.processed_paths[0])
@property
def raw_file_names(self):
pass
#return ['some_file_1', 'some_file_2', ...]
@property
def processed_file_names(self):
return [self.dataset + '.pt']
def download(self):
# Download to `self.raw_dir`.
pass
def _download(self):
pass
def _process(self):
if not os.path.exists(self.processed_dir):
os.makedirs(self.processed_dir)
# Customize the process method to fit the task of drug-target affinity prediction
# Inputs:
# XD - list of SMILES, XT: list of encoded target (categorical or one-hot),
# Y: list of labels (i.e. affinity)
# Return: PyTorch-Geometric format processed data
def process(self, xd, xt, y,smile_graph):
assert (len(xd) == len(xt) and len(xt) == len(y)), "The three lists must be the same length!"
data_list = []
data_len = len(xd)
for i in range(data_len):
print('Converting SMILES to graph: {}/{}'.format(i+1, data_len))
smiles = xd[i]
target = xt[i]
labels = y[i]
# convert SMILES to molecular representation using rdkit
c_size, features, edge_index = smile_graph[smiles]
# make the graph ready for PyTorch Geometrics GCN algorithms:
if len(edge_index)!=0:
GCNData = DATA.Data(x=torch.Tensor(features),
edge_index=torch.LongTensor(edge_index).transpose(1, 0),
y=torch.FloatTensor([labels]))
GCNData.target = target
GCNData.__setitem__('c_size', torch.LongTensor([c_size]))
# append graph, label and target sequence to data list
data_list.append(GCNData)
if self.pre_filter is not None:
data_list = [data for data in data_list if self.pre_filter(data)]
if self.pre_transform is not None:
data_list = [self.pre_transform(data) for data in data_list]
print('Graph construction done. Saving to file.')
data, slices = self.collate(data_list)
# save preprocessed data:
torch.save((data, slices), self.processed_paths[0]) I want to covert this into torch_geometric. data import Dataset. How can I do this? |
Beta Was this translation helpful? Give feedback.
1 reply
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Uh oh!
There was an error while loading. Please reload this page.
-
I want to store "MABFTSABN.....", which is a protein sequence in original string form not in one hot tensor in PyTorch geometric custom data loader. How can do this type of change in my custom data loader
Beta Was this translation helpful? Give feedback.
All reactions