Skip to content

Commit 0ac4fd3

Browse files
committed
add dblp_8k hypergraph dataset
1 parent bd8a8dd commit 0ac4fd3

File tree

2 files changed

+34
-0
lines changed

2 files changed

+34
-0
lines changed

dhg/data/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
from .white_house import HouseCommittees
99
from .news import News20
1010
from .coauthorship import CoauthorshipCora, CoauthorshipDBLP
11+
from .dblp import DBLP8k
1112
from .cocitation import CocitationCora, CocitationCiteseer, CocitationPubmed
1213
from .blogcatalog import BlogCatalog
1314
from .flickr import Flickr
@@ -38,6 +39,7 @@
3839
"PubmedBiGraph",
3940
"CoauthorshipCora",
4041
"CoauthorshipDBLP",
42+
"DBLP8k",
4143
"CocitationCora",
4244
"CocitationCiteseer",
4345
"CocitationPubmed",

dhg/data/dblp.py

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
from typing import Optional
2+
3+
from dhg.datapipe import load_from_pickle
4+
5+
from .base import BaseData
6+
7+
8+
class DBLP8k(BaseData):
9+
r"""The DBLP-8k dataset is a citation network dataset for link prediction task.
10+
The dataset is a part of the dataset crawled according to DBLP API,
11+
and we have selected each item based on some conditions,
12+
such as the venue and publication year (from 2018 to 2022). It contains 6498 authors and 2603 papers.
13+
The content of the DBLP-8k dataset includes the following:
14+
15+
- ``num_vertices``: The number of vertices: :math:`8,657`.
16+
- ``num_edges``: The number of edges: :math:`2,603`.
17+
- ``edge_list``: The edge list. ``List`` with length :math:`2,603`.
18+
19+
Args:
20+
``data_root`` (``str``, optional): The ``data_root`` has stored the data. If set to ``None``, this function will auto-download from server and save into the default direction ``~/.dhg/datasets/``. Defaults to None.
21+
"""
22+
def __init__(self, data_root: Optional[str] = None) -> None:
23+
super().__init__("dblp_8k", data_root)
24+
self._content = {
25+
"num_vertices": 8657,
26+
"num_edges": 2603,
27+
"edge_list": {
28+
"upon": [{"filename": "edge_list.pkl", "md5": "46b16106daae8eebfd39c2fc43ecbf0b"}],
29+
"loader": load_from_pickle,
30+
},
31+
}
32+

0 commit comments

Comments
 (0)