Skip to content

Commit 748ddf4

Browse files
committed
add yelp3k dataset
1 parent a5a7965 commit 748ddf4

File tree

3 files changed

+48
-2
lines changed

3 files changed

+48
-2
lines changed

dhg/data/__init__.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
from .base import BaseData
22
from .cooking_200 import Cooking200
33
from .movielens import MovieLens1M
4-
from .yelp import Yelp2018, YelpRestaurant
4+
from .yelp import Yelp2018, YelpRestaurant, Yelp3k
55
from .gowalla import Gowalla
66
from .amazon import AmazonBook
77
from .walmart import WalmartTrips
@@ -52,5 +52,6 @@
5252
"News20",
5353
"IMDB4k",
5454
"Recipe100k",
55-
"Recipe200k"
55+
"Recipe200k",
56+
"Yelp3k"
5657
]

dhg/data/yelp.py

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -100,3 +100,47 @@ def __init__(self, data_root: Optional[str] = None) -> None:
100100
},
101101
}
102102

103+
104+
105+
class Yelp3k(BaseData):
106+
r"""The Yelp3k dataset is a subset of Yelp-Restaurant dataset for vertex classification task.
107+
It is a restaurant-review network. All businesses in the “restaurant” catalog are selected as our nodes,
108+
and formed hyperedges by selecting restaurants visited by the same user.
109+
We use the state of the business as the corresponding node label.
110+
111+
The content of the Yelp-Restaurant dataset includes the following:
112+
113+
- ``num_classes``: The number of classes: :math:`6`.
114+
- ``num_vertices``: The number of vertices: :math:`3,855`.
115+
- ``num_edges``: The number of edges: :math:`24,137`.
116+
- ``dim_features``: The dimension of features: :math:`1,862`.
117+
- ``features``: The vertex feature matrix. ``torch.Tensor`` with size :math:`(3,855 \times 1,862)`.
118+
- ``edge_list``: The edge list. ``List`` with length :math:`24,137`.
119+
- ``labels``: The label list. ``torch.LongTensor`` with size :math:`(3,855, )`.
120+
121+
Args:
122+
``data_root`` (``str``, optional): The ``data_root`` has stored the data. If set to ``None``, this function will auto-download from server and save into the default direction ``~/.dhg/datasets/``. Defaults to ``None``.
123+
"""
124+
125+
def __init__(self, data_root: Optional[str] = None) -> None:
126+
super().__init__("yelp_3k", data_root)
127+
self._content = {
128+
"num_classes": 6,
129+
"num_vertices": 3855,
130+
"num_edges": 24137,
131+
"dim_features": 1862,
132+
"features": {
133+
"upon": [{"filename": "features.pkl", "md5": "b107876d2e9ba5b0f7f67175e36bfdf4"}],
134+
"loader": load_from_pickle,
135+
"preprocess": [to_tensor,],
136+
},
137+
"edge_list": {
138+
"upon": [{"filename": "edge_list.pkl", "md5": "322cfd002c0f557cf7bc4b9b2654b987"}],
139+
"loader": load_from_pickle,
140+
},
141+
"labels": {
142+
"upon": [{"filename": "labels.pkl", "md5": "16ead8bcbb8a3621202c88f64c8cbc07"}],
143+
"loader": load_from_pickle,
144+
"preprocess": [to_long_tensor],
145+
},
146+
}

docs/source/api/data.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,7 @@ Hypergraph Datasets
6666
dhg.data.IMDB4k
6767
dhg.data.Recipe100k
6868
dhg.data.Recipe200k
69+
dhg.data.Yelp3k
6970

7071

7172
**Welcome to contribute datasets!**

0 commit comments

Comments
 (0)