Skip to content

Commit 5496287

Browse files
committed
add Tencent2k dataset
1 parent 748ddf4 commit 5496287

File tree

3 files changed

+66
-2
lines changed

3 files changed

+66
-2
lines changed

dhg/data/__init__.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
from .flickr import Flickr
1515
from .github import Github
1616
from .facebook import Facebook
17-
from .tencent import TencentBiGraph
17+
from .tencent import TencentBiGraph, Tencent2k
1818
from .cora import Cora, CoraBiGraph
1919
from .citeseer import Citeseer, CiteseerBiGraph
2020
from .pubmed import Pubmed, PubmedBiGraph
@@ -53,5 +53,6 @@
5353
"IMDB4k",
5454
"Recipe100k",
5555
"Recipe200k",
56-
"Yelp3k"
56+
"Yelp3k",
57+
"Tencent2k"
5758
]

dhg/data/tencent.py

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,3 +63,65 @@ def __init__(self, data_root: Optional[str] = None) -> None:
6363
"preprocess": [to_long_tensor],
6464
},
6565
}
66+
67+
68+
69+
class Tencent2k(BaseData):
70+
r"""The Tencent2k dataset is a social network dataset for vertex classification task.
71+
It is a subset of TencentBiGraph dataset.
72+
The nodes are social network users.
73+
Nodes are connected by a hyperedge if the corresponding users join the same social communities.
74+
75+
The content of the Tencent2k dataset includes the following:
76+
77+
- ``num_classes``: The number of classes: :math:`2`.
78+
- ``num_vertices``: The number of vertices: :math:`2,146`.
79+
- ``num_edges``: The number of edges: :math:`6,378`.
80+
- ``dim_features``: The dimension of features: :math:`8`.
81+
- ``features``: The vertex feature matrix. ``torch.Tensor`` with size :math:`(2,146 \times 8)`.
82+
- ``edge_list``: The edge list. ``List`` with length :math:`6,378`.
83+
- ``labels``: The label list. ``torch.LongTensor`` with size :math:`(2,146, )`.
84+
- ``train_mask``: The train mask. ``torch.BoolTensor`` with size :math:`(2,146, )`.
85+
- ``val_mask``: The validation mask. ``torch.BoolTensor`` with size :math:`(2,146, )`.
86+
- ``test_mask``: The test mask. ``torch.BoolTensor`` with size :math:`(2,146, )`.
87+
88+
Args:
89+
``data_root`` (``str``, optional): The ``data_root`` has stored the data. If set to ``None``, this function will auto-download from server and save into the default direction ``~/.dhg/datasets/``. Defaults to ``None``.
90+
"""
91+
def __init__(self, data_root: Optional[str] = None) -> None:
92+
super().__init__("tencent_2k", data_root)
93+
self._content = {
94+
"num_classes": 2,
95+
"num_vertices": 2146,
96+
"num_edges": 6378,
97+
"dim_features": 8,
98+
"features": {
99+
"upon": [{"filename": "features.pkl", "md5": "d3ff915a640b7e87e21849e3c400cc76"}],
100+
"loader": load_from_pickle,
101+
"preprocess": [to_tensor, partial(norm_ft, ord=1)],
102+
},
103+
"edge_list": {
104+
"upon": [{"filename": "edge_list.pkl", "md5": "c9dc2fa5092087173369385885ffbed4"}],
105+
"loader": load_from_pickle,
106+
},
107+
"labels": {
108+
"upon": [{"filename": "labels.pkl", "md5": "899ce99d0066d74c737cc19301f010f6"}],
109+
"loader": load_from_pickle,
110+
"preprocess": [to_long_tensor],
111+
},
112+
"train_mask": {
113+
"upon": [{"filename": "train_mask.pkl", "md5": "3e4de1d3b6b8b47bd729d4f59c65583d"}],
114+
"loader": load_from_pickle,
115+
"preprocess": [to_bool_tensor],
116+
},
117+
"val_mask": {
118+
"upon": [{"filename": "val_mask.pkl", "md5": "e8e42dc2ee21e733f7a2c733f162cea9"}],
119+
"loader": load_from_pickle,
120+
"preprocess": [to_bool_tensor],
121+
},
122+
"test_mask": {
123+
"upon": [{"filename": "test_mask.pkl", "md5": "783283d2a5f2bee5a83d0d8e11ef9e5e"}],
124+
"loader": load_from_pickle,
125+
"preprocess": [to_bool_tensor],
126+
},
127+
}

docs/source/api/data.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,7 @@ Hypergraph Datasets
6767
dhg.data.Recipe100k
6868
dhg.data.Recipe200k
6969
dhg.data.Yelp3k
70+
dhg.data.Tencent2k
7071

7172

7273
**Welcome to contribute datasets!**

0 commit comments

Comments
 (0)