PROBLEMS WORKING WITH OWN DATASET (GCn) #5519

Akashkalakonda · 2022-09-23T15:47:06Z

Akashkalakonda
Sep 23, 2022

e_x

source target
0 0.0 1.0
1 0.0 2.0
2 0.0 3.0
3 0.0 4.0
4 0.0 5.0
5 0.0 6.0
6 0.0 7.0
7 0.0 8.0
8 0.0 8.5
9 0.0 9.0

e_x=torch.tensor(e_x.values,dtype=torch.long)
e_x
tensor([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[1, 2, 3, 4, 5, 6, 7, 8, 8, 9]])

x_h=pd.DataFrame(([0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0174, 0.0000, 0.0248,
0.0000], [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0174, 0.0000, 0.0248,
0.0000] , [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0174, 0.0000, 0.0248,
0.0000] , [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0174, 0.0000, 0.0248,
0.0000] , [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0174, 0.0000, 0.0248,
0.0000], [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0174, 0.0000, 0.0248,
0.0000] , [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0174, 0.0000, 0.0248,
0.0000], [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0174, 0.0000, 0.0248,
0.0000] , [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0174, 0.0000, 0.0248,
0.0000], [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0174, 0.0000, 0.0248,
0.0000] ))

x_h= torch.tensor(x_h.values, dtype=torch.long)

from torch_geometric.data import Data
data = Data(x=x_h, edge_index=e_x)

import torch
from torch.nn import Linear
from torch_geometric.nn import GCNConv

class GCN(torch.nn.Module):
def init(self):
super().init()
torch.manual_seed(1234)
self.conv1 = GCNConv(10, 4)
self.conv2 = GCNConv(4, 4)
self.conv3 = GCNConv(4, 2)
self.classifier = Linear(2, 2)

def forward(self, x, edge_index):
    h = self.conv1(x, edge_index)
    h = h.tanh()
    h = self.conv2(h, edge_index)
    h = h.tanh()
    h = self.conv3(h, edge_index)
    h = h.tanh()  # Final GNN embedding space.
    
    # Apply a final (linear) classifier.
    out = self.classifier(h)

model = GCN()
print(model)
GCN(
(conv1): GCNConv(10, 4)
(conv2): GCNConv(4, 4)
(conv3): GCNConv(4, 2)
(classifier): Linear(in_features=2, out_features=2, bias=True)
)

from IPython.display import Javascript # Restrict height of output cell.
display(Javascript('''google.colab.output.setIframeHeight(0, true, {maxHeight: 300})'''))

model = GCN()
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)
criterion = torch.nn.CrossEntropyLoss()

def train():
model.train()
optimizer.zero_grad() # Clear gradients.
out = model(data.x, data.edge_index) # Perform a single forward pass.
loss = criterion(out[data.train_mask], data.y) # Compute the loss solely based on the training nodes.
loss.backward() # Derive gradients.
optimizer.step() # Update parameters based on gradients.
return loss

def test():
model.eval()
out = model(data.x, data.edge_index)
pred = out.argmax(dim=1) # Use the class with highest probability.
test_correct = pred[data.test_mask] == data.y[data.test_mask] # Check against ground-truth labels.
test_acc = int(test_correct.sum()) / int(data.test_mask.sum()) # Derive ratio of correct predictions.
return test_acc

for epoch in range(1, 101):
loss = train()
print(f'Epoch: {epoch:03d}, Loss: {loss:.4f}')

AND THE ERRORS I GET ARE:

RuntimeError Traceback (most recent call last)
in
25
26 for epoch in range(1, 101):
---> 27 loss = train()
28 print(f'Epoch: {epoch:03d}, Loss: {loss:.4f}')

6 frames
in train()
9 model.train()
10 optimizer.zero_grad() # Clear gradients.
---> 11 out = model(data.x, data.edge_index) # Perform a single forward pass.
12 loss = criterion(out[data.train_mask], data.y) # Compute the loss solely based on the training nodes.
13 loss.backward() # Derive gradients.

/usr/local/lib/python3.7/dist-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
1128 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
1129 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1130 return forward_call(*input, **kwargs)
1131 # Do not call functions when jit is used
1132 full_backward_hooks, non_full_backward_hooks = [], []

in forward(self, x, edge_index)
14
15 def forward(self, x, edge_index):
---> 16 h = self.conv1(x, edge_index)
17 h = h.tanh()
18 h = self.conv2(h, edge_index)

/usr/local/lib/python3.7/dist-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
1128 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
1129 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1130 return forward_call(*input, **kwargs)
1131 # Do not call functions when jit is used
1132 full_backward_hooks, non_full_backward_hooks = [], []

/usr/local/lib/python3.7/dist-packages/torch_geometric/nn/conv/gcn_conv.py in forward(self, x, edge_index, edge_weight)
192 edge_index = cache
193
--> 194 x = self.lin(x)
195
196 # propagate_type: (x: Tensor, edge_weight: OptTensor)

/usr/local/lib/python3.7/dist-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
1128 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
1129 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1130 return forward_call(*input, **kwargs)
1131 # Do not call functions when jit is used
1132 full_backward_hooks, non_full_backward_hooks = [], []

/usr/local/lib/python3.7/dist-packages/torch_geometric/nn/dense/linear.py in forward(self, x)
116 x (Tensor): The features.
117 """
--> 118 return F.linear(x, self.weight, self.bias)
119
120 @torch.no_grad()

RuntimeError: expected scalar type Long but found Float

I AM COMPLETELY STRUCK HERE. AND MY COMMUNITY PLEASE HELP ME OUT WITH SOLUTION

Akashkalakonda · 2022-09-23T15:56:34Z

Akashkalakonda
Sep 23, 2022
Author

@rusty1s Can u help me out with issues

0 replies

EdisonLeeeee · 2022-09-23T16:46:14Z

EdisonLeeeee
Sep 23, 2022
Collaborator

x_h should be float instead of long:

x_h = torch.tensor(x_h.values, dtype=torch.float)

4 replies

Akashkalakonda Sep 23, 2022
Author

thanks sir.....but the problem hasnt solved yet
there are more errors:

KeyError Traceback (most recent call last)
/usr/local/lib/python3.7/dist-packages/torch_geometric/data/storage.py in getattr(self, key)
60 try:
---> 61 return self[key]
62 except KeyError:

4 frames
KeyError: 'train_mask'

During handling of the above exception, another exception occurred:

AttributeError Traceback (most recent call last)
/usr/local/lib/python3.7/dist-packages/torch_geometric/data/storage.py in getattr(self, key)
62 except KeyError:
63 raise AttributeError(
---> 64 f"'{self.class.name}' object has no attribute '{key}'")
65
66 def setattr(self, key: str, value: Any):

AttributeError: 'GlobalStorage' object has no attribute 'train_mask'

EdisonLeeeee Sep 24, 2022
Collaborator

You were doing a node classification task without providing supervision data.y and train_mask/val_mask/test_mask that specify the training/validation/test nodes. There is only a graph with x and edge_index provided.

Akashkalakonda Sep 24, 2022
Author

y_label=pd.DataFrame([1,1,0,0,1,1,1,0,0,1])
y=y_label.to_numpy()
y.shape
(10, 1)

from torch_geometric.data import Data
data = Data(x=x_h, edge_index=e_x , y=y)

KeyError Traceback (most recent call last)
/usr/local/lib/python3.7/dist-packages/torch_geometric/data/storage.py in getattr(self, key)
60 try:
---> 61 return self[key]
62 except KeyError:

4 frames
/usr/local/lib/python3.7/dist-packages/torch_geometric/data/storage.py in getitem(self, key)
80 def getitem(self, key: str) -> Any:
---> 81 return self._mapping[key]
82

KeyError: 'train_mask'

During handling of the above exception, another exception occurred:

AttributeError Traceback (most recent call last)
in
25
26 for epoch in range(1, 101):
---> 27 loss = train()
28 print(f'Epoch: {epoch:03d}, Loss: {loss:.4f}')

in train()
10 optimizer.zero_grad() # Clear gradients.
11 out = model(data.x, data.edge_index) # Perform a single forward pass.
---> 12 loss = criterion(out[data.train_mask], data.y) # Compute the loss solely based on the training nodes.
13 loss.backward() # Derive gradients.
14 optimizer.step() # Update parameters based on gradients.

/usr/local/lib/python3.7/dist-packages/torch_geometric/data/data.py in getattr(self, key)
426 "dataset, remove the 'processed/' directory in the dataset's "
427 "root folder and try again.")
--> 428 return getattr(self._store, key)
429
430 def setattr(self, key: str, value: Any):

/usr/local/lib/python3.7/dist-packages/torch_geometric/data/storage.py in getattr(self, key)
62 except KeyError:
63 raise AttributeError(
---> 64 f"'{self.class.name}' object has no attribute '{key}'")
65
66 def setattr(self, key: str, value: Any):

AttributeError: 'GlobalStorage' object has no attribute 'train_mask'

????

EdisonLeeeee Sep 24, 2022
Collaborator

You also need to define your train_mask/val_mask/test_mask in advance. For example (in case you have 10 nodes):

data.train_mask = torch.tensor([1, 1, 1, 0, 0, 0, 0, 0, 0, 0], dtype=torch.bool)
data.val_mask = torch.tensor([0, 0, 0, 1, 1, 0, 0, 0, 0, 0], dtype=torch.bool)
data.test_mask = torch.tensor([0, 0, 0, 0, 0, 1, 1, 1, 1, 1], dtype=torch.bool)

Also, y should be a 1D Tensor with shape [10] instead of [10, 1].

Akashkalakonda · 2022-09-23T18:20:36Z

Akashkalakonda
Sep 23, 2022
Author

more errors sir:

KeyError Traceback (most recent call last)
/usr/local/lib/python3.7/dist-packages/torch_geometric/data/storage.py in getattr(self, key)
60 try:
---> 61 return self[key]
62 except KeyError:

4 frames
KeyError: 'train_mask'

During handling of the above exception, another exception occurred:

AttributeError Traceback (most recent call last)
/usr/local/lib/python3.7/dist-packages/torch_geometric/data/storage.py in getattr(self, key)
62 except KeyError:
63 raise AttributeError(
---> 64 f"'{self.class.name}' object has no attribute '{key}'")
65
66 def setattr(self, key: str, value: Any):

AttributeError: 'GlobalStorage' object has no attribute 'train_mask'

ANY SOLUTIONS?

1 reply

LeoGori Dec 5, 2022

It seems that you have not extended your data with train_mask attribute, you can generate it through the definition of a RandomNodeSplit object, e.g.:

from torch_geometric.transforms import RandomNodeSplit
rns = RandomNodeSplit()
data = rns(data)

In this way, you are generating random train_mask, validation_mask and test_mask attributes on your data object

Akashkalakonda · 2023-01-02T14:54:36Z

Akashkalakonda
Jan 2, 2023
Author

hello sir @EdisonLeeeee
need ur help sir

0 replies

Akashkalakonda · 2023-01-03T06:04:33Z

Akashkalakonda
Jan 3, 2023
Author

import pandas as pd
import numpy as np
import torch
from torch_geometric.data import Data
from sklearn.preprocessing import LabelEncoder
import torch_geometric.transforms as T
from torch_geometric.nn import GCNConv
import torch.nn.functional as F

Load the data

x = pd.read_csv('tempdataset.csv', index_col=0)
edge_index = pd.read_csv('tempdatasetsourceandtarget.csv', index_col=0)
y = pd.read_excel('y.xlsx')

Drop unnecessary columns from x

x = x.drop(columns=['ID', 'Name', 'Screen Name', 'Location'])

Convert data to numpy arrays

x = x.to_numpy()
edge_index = edge_index.to_numpy()
y = y.to_numpy().squeeze()

Encode edge_index columns as integers

le = LabelEncoder()
row, col = edge_index.T
le.fit(np.concatenate((row, col)))
row = le.transform(row)
col = le.transform(col)

Convert data to tensors

x = torch.tensor(x, dtype=torch.float)
edge_index = torch.tensor(np.stack([row, col], axis=0))
y = torch.tensor(y)

Create a PyTorch Geometric Data object

data = Data(x=x, edge_index=edge_index, y=y)
#Data(x=[114, 4], edge_index=[2, 114], y=[114])

Split the data into train, val, and test sets

data = T.RandomNodeSplit(num_val=0.2, num_test=0.4)(data)

Set device

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

Define the GCN model

class GCN(torch.nn.Module):
def init(self, in_channels, hidden_channels, out_channels):
super().init()
self.conv1 = GCNConv(in_channels, hidden_channels, cached=True)
self.conv2 = GCNConv(hidden_channels, out_channels, cached=True)

def forward(self, x, edge_index, edge_weight=None):
    x = F.dropout(x, p=0.5, training=self.training)
    x = self.conv1(x, edge_index, edge_weight).relu()
    x = F.dropout(x, p=0.5, training=self.training)
    x = self.conv2(x, edge_index, edge_weight)
    return x

Move model and data to the designated device

model = GCN(data.x.size(-1), 64, data.y.max().item()+1)
model, data = model.to(device), data.to(device)

Set up the optimizer

optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

Define the training and evaluation functions

def train():
model.train()
optimizer.zero_grad()
out = model(data.x, data.edge_index, data.edge_weight)
loss = F.cross_entropy(out[data.train_mask], data.y[data.train_mask])
loss.backward()
optimizer.step()
return float(loss)

@torch.no_grad()
def test():
model.eval()
pred = model(data.x, data.edge_index, data.edge_weight).argmax(dim=-1)

accs = []
for mask in [data.train_mask, data.val_mask, data.test_mask]:
    accs.append(int((pred[mask] == data.y[mask]).sum()) / int(mask.sum()))
return accs

Train and evaluate the model

for epoch in range(1, 101):
loss = train()
accs = test()
print(f'Epoch {epoch}: loss = {loss:.4f}, acc = {accs[0]:.4f}, val_acc = {accs[1]:.4f}, test_acc = {accs[2]:.4f}')

this is my code
and this is the error i am not able to solve
RuntimeError: index 114 is out of bounds for dimension 0 with size 114
any suggestions here? @EdisonLeeeee and @LeoGori

2 replies

rusty1s Jan 3, 2023
Maintainer

What does data.validate() return? It looks like edge_index.max() >= data.num_nodes, which is not allowed.

Akashkalakonda Jan 13, 2023
Author

i didnt get it sir @rusty1s . is there any problem with the dataset i am using

Akashkalakonda · 2023-01-13T14:49:00Z

Akashkalakonda
Jan 13, 2023
Author

import pandas as pd
import numpy as np
import torch
from torch_geometric.data import Data
from sklearn.preprocessing import LabelEncoder
import torch_geometric.transforms as T
from torch_geometric.nn import GCNConv
import torch.nn.functional as F

Load the data

x = pd.read_csv('tempdataset.csv', index_col=0)
edge_index = pd.read_csv('tempdatasetsourceandtarget.csv', index_col=0)
y = pd.read_excel('y.xlsx')

Drop unnecessary columns from x

x = x.drop(columns=['ID', 'Name', 'Screen Name', 'Location'])

Convert data to numpy arrays

x = x.to_numpy()
edge_index = edge_index.to_numpy()
y = y.to_numpy().squeeze()

Encode edge_index columns as integers

le = LabelEncoder()
row, col = edge_index.T
le.fit(np.concatenate((row, col)))
row = le.transform(row)
col = le.transform(col)

Convert data to tensors

x = torch.tensor(x, dtype=torch.float)
edge_index = torch.tensor(np.stack([row, col], axis=0))
y = torch.tensor(y)

Create a PyTorch Geometric Data object

data = Data(x=x, edge_index=edge_index, y=y)

Split the data into train, val, and test sets

data = T.RandomNodeSplit(num_val=0.2, num_test=0.4)(data)

Set device

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

Define the GCN model

class GCN(torch.nn.Module):
def init(self, in_channels, hidden_channels, out_channels):
super().init()
self.conv1 = GCNConv(in_channels, hidden_channels, cached=True)
self.conv2 = GCNConv(hidden_channels, out_channels, cached=True)

def forward(self, x, edge_index, edge_weight=None):
    x = F.dropout(x, p=0.5, training=self.training)
    x = self.conv1(x, edge_index, edge_weight).relu()
    x = F.dropout(x, p=0.5, training=self.training)
    x = self.conv2(x, edge_index, edge_weight)
    return x

Move model and data to the designated device

model = GCN(data.x.size(-1), 64, data.y.max().item()+1)
model, data = model.to(device), data.to(device)

Set up the optimizer

optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

Define the training and evaluation functions

def train():
model.train()
optimizer.zero_grad()
out = model(data.x, data.edge_index, data.edge_weight)
loss = F.cross_entropy(out[data.train_mask], data.y[data.train_mask])
loss.backward()
optimizer.step()
return float(loss)

@torch.no_grad()
def test():
model.eval()
pred = model(data.x, data.edge_index, data.edge_weight).argmax(dim=-1)

accs = []
for mask in [data.train_mask, data.val_mask, data.test_mask]:
    accs.append(int((pred[mask] == data.y[mask]).sum()) / int(mask.sum()))
return accs

Train and evaluate the model

for epoch in range(1, 101):
loss = train()
accs = test()
print(f'Epoch {epoch}: loss = {loss:.4f}, acc = {accs[0]:.4f}, val_acc = {accs[1]:.4f}, test_acc = {accs[2]:.4f}')

this is the entire code ihave been using . actually i am trying to perform node classification using GCN. i am using a custom dataset. i made a twitter dataset, and my goal is bot detection. the three data files which are given as input in the code are : x, edge_index, y. x has features of users like Name, Screen Name, Friends , Followers, Favorites, Location, Tweets, ID. 'edge_index' describes the link or to be precise edges. finally 'y' is label which specifies whether the user is human or bot ( 1 for bot and 0 for human). this is all about my dataset. for some reason i am not able to execute the code.
tempdataset_1.csv
tempdatasetsourceandtarget_1.csv
y.xlsx
above are the dummy datasets i have been using.

ERROR: RuntimeError: index 114 is out of bounds for dimension 0 with size 114
i am struck here.

2 replies

rusty1s Jan 14, 2023
Maintainer

Yes, the issue is that x.size() == (114, 4) and edge_index.max() == 114. As such, indexing with edge_index into x will raise an index error. The edge_index needs to be in interval 0 to x.size(0) - 1 == 113.

Akashkalakonda Jan 16, 2023
Author

thank you sir. i solved the error. you are a life savor

ahsantfw · 2023-02-23T09:48:14Z

ahsantfw
Feb 23, 2023

Hi @AnotherAvenger, I have this dataset file and I want to train a GCN with 1 hidden layer only, Can you help me out how to load the data and give it to this current model here

This is how it looks like

0 replies

PROBLEMS WORKING WITH OWN DATASET (GCn) #5519

Uh oh!

Akashkalakonda Sep 23, 2022

AND THE ERRORS I GET ARE:

Replies: 7 comments · 9 replies

Uh oh!

Akashkalakonda Sep 23, 2022 Author

Uh oh!

EdisonLeeeee Sep 23, 2022 Collaborator

Uh oh!

Akashkalakonda Sep 23, 2022 Author

Uh oh!

EdisonLeeeee Sep 24, 2022 Collaborator

Uh oh!

Akashkalakonda Sep 24, 2022 Author

Uh oh!

EdisonLeeeee Sep 24, 2022 Collaborator

Uh oh!

Akashkalakonda Sep 23, 2022 Author

more errors sir:

Uh oh!

LeoGori Dec 5, 2022

Uh oh!

Akashkalakonda Jan 2, 2023 Author

Uh oh!

Akashkalakonda Jan 3, 2023 Author

Load the data

Drop unnecessary columns from x

Convert data to numpy arrays

Encode edge_index columns as integers

Convert data to tensors

Create a PyTorch Geometric Data object

Split the data into train, val, and test sets

Set device

Define the GCN model

Move model and data to the designated device

Set up the optimizer

Define the training and evaluation functions

Train and evaluate the model

Uh oh!

rusty1s Jan 3, 2023 Maintainer

Uh oh!

Akashkalakonda Jan 13, 2023 Author

Uh oh!

Akashkalakonda Jan 13, 2023 Author

Load the data

Drop unnecessary columns from x

Convert data to numpy arrays

Encode edge_index columns as integers

Convert data to tensors

Create a PyTorch Geometric Data object

Split the data into train, val, and test sets

Set device

Define the GCN model

Move model and data to the designated device

Set up the optimizer

Define the training and evaluation functions

Train and evaluate the model

Uh oh!

rusty1s Jan 14, 2023 Maintainer

Uh oh!

Akashkalakonda Jan 16, 2023 Author

Uh oh!

ahsantfw Feb 23, 2023

Akashkalakonda
Sep 23, 2022

Replies: 7 comments 9 replies

Akashkalakonda
Sep 23, 2022
Author

EdisonLeeeee
Sep 23, 2022
Collaborator

Akashkalakonda Sep 23, 2022
Author

EdisonLeeeee Sep 24, 2022
Collaborator

Akashkalakonda Sep 24, 2022
Author

EdisonLeeeee Sep 24, 2022
Collaborator

Akashkalakonda
Sep 23, 2022
Author

Akashkalakonda
Jan 2, 2023
Author

Akashkalakonda
Jan 3, 2023
Author

rusty1s Jan 3, 2023
Maintainer

Akashkalakonda Jan 13, 2023
Author

Akashkalakonda
Jan 13, 2023
Author

rusty1s Jan 14, 2023
Maintainer

Akashkalakonda Jan 16, 2023
Author

ahsantfw
Feb 23, 2023