Skip to content

Commit 828eac8

Browse files
authored
Correct URLs and add Unit Test for sequential datasets: gowalla, tafeng, yoochoose (#680)
1 parent 069ce51 commit 828eac8

File tree

6 files changed

+112
-13
lines changed

6 files changed

+112
-13
lines changed

cornac/datasets/gowalla.py

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -13,9 +13,7 @@
1313
# limitations under the License.
1414
# ============================================================================
1515
"""
16-
This data is built based on the Ta Feng Grocery Dataset that contains
17-
a Chinese grocery store transaction data from November 2000 to February 2001.
18-
Accessed at https://www.kaggle.com/datasets/chiranjivdas09/ta-feng-grocery-dataset
16+
Source: https://snap.stanford.edu/data/loc-gowalla.html
1917
"""
2018

2119
from ..utils import cache
@@ -38,10 +36,9 @@ def load_checkins(fmt="USITJson", reader: Reader = None) -> List:
3836
Location information is stored in `json` format
3937
"""
4038
fpath = cache(
41-
url="https://static.preferred.ai/datasets/gowalla/check-ins.zip",
39+
url="https://static.preferred.ai/cornac/datasets/gowalla/check-ins.zip",
4240
unzip=True,
4341
relative_path="gowalla/check-ins.txt",
4442
)
4543
reader = Reader() if reader is None else reader
4644
return reader.read(fpath, fmt=fmt, sep="\t")
47-

cornac/datasets/tafeng.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
# limitations under the License.
1414
# ============================================================================
1515
"""
16-
This data is built based on the Ta Feng Grocery Dataset that contains
16+
This data is built based on the Ta Feng Grocery Dataset that contains
1717
a Chinese grocery store transaction data from November 2000 to February 2001.
1818
Accessed at https://www.kaggle.com/datasets/chiranjivdas09/ta-feng-grocery-dataset
1919
"""
@@ -37,7 +37,7 @@ def load_basket(fmt="UBITJson", reader: Reader = None) -> List:
3737
Data in the form of a list of tuples (user, basket, item, timestamp, json).
3838
"""
3939
fpath = cache(
40-
url="https://static.preferred.ai/hieudo/basket.zip",
40+
url="https://static.preferred.ai/cornac/datasets/tafeng/basket.zip",
4141
unzip=True,
4242
relative_path="tafeng/basket.txt",
4343
)

cornac/datasets/yoochoose.py

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -13,9 +13,7 @@
1313
# limitations under the License.
1414
# ============================================================================
1515
"""
16-
This data is built based on the Ta Feng Grocery Dataset that contains
17-
a Chinese grocery store transaction data from November 2000 to February 2001.
18-
Accessed at https://www.kaggle.com/datasets/chiranjivdas09/ta-feng-grocery-dataset
16+
Yoochoose Dataset is originally from the RecSys Challenge 2015.
1917
"""
2018

2119
from typing import List
@@ -39,7 +37,7 @@ def load_buy(fmt="SITJson", reader: Reader = None) -> List:
3937
Location information is stored in `json` format
4038
"""
4139
fpath = cache(
42-
url="https://static.preferred.ai/datasets/yoochoose/buy.zip",
40+
url="https://static.preferred.ai/cornac/datasets/yoochoose/buy.zip",
4341
unzip=True,
4442
relative_path="yoochoose/buy.txt",
4543
)
@@ -62,7 +60,7 @@ def load_click(fmt="SITJson", reader: Reader = None) -> List:
6260
Location information is stored in `json` format
6361
"""
6462
fpath = cache(
65-
url="https://static.preferred.ai/datasets/yoochoose/click.zip",
63+
url="https://static.preferred.ai/cornac/datasets/yoochoose/click.zip",
6664
unzip=True,
6765
relative_path="yoochoose/click.txt",
6866
)
@@ -85,7 +83,7 @@ def load_test(fmt="SITJson", reader: Reader = None) -> List:
8583
Location information is stored in `json` format
8684
"""
8785
fpath = cache(
88-
url="https://static.preferred.ai/datasets/yoochoose/test.zip",
86+
url="https://static.preferred.ai/cornac/datasets/yoochoose/test.zip",
8987
unzip=True,
9088
relative_path="yoochoose/test.txt",
9189
)
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
# Copyright 2023 The Cornac Authors. All Rights Reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
# ============================================================================
15+
16+
import random
17+
import time
18+
import unittest
19+
20+
from cornac.datasets import gowalla
21+
22+
23+
class TestGowalla(unittest.TestCase):
24+
25+
def test_load_checkins(self):
26+
random.seed(time.time())
27+
if random.random() > 0.8:
28+
checkins = gowalla.load_checkins()
29+
self.assertEqual(len(checkins), 6442892)
30+
31+
32+
if __name__ == "__main__":
33+
unittest.main()
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
# Copyright 2023 The Cornac Authors. All Rights Reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
# ============================================================================
15+
16+
import random
17+
import time
18+
import unittest
19+
20+
from cornac.datasets import tafeng
21+
22+
23+
class TestTafeng(unittest.TestCase):
24+
25+
def test_load_basket(self):
26+
random.seed(time.time())
27+
if random.random() > 0.8:
28+
baskets = tafeng.load_basket()
29+
self.assertEqual(len(baskets), 817741)
30+
31+
32+
if __name__ == "__main__":
33+
unittest.main()
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
# Copyright 2023 The Cornac Authors. All Rights Reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
# ============================================================================
15+
16+
import random
17+
import time
18+
import unittest
19+
20+
from cornac.datasets import yoochoose
21+
22+
23+
class TestYooChoose(unittest.TestCase):
24+
25+
def test_load_buy_click_test(self):
26+
random.seed(time.time())
27+
if random.random() > 0.8:
28+
buy = yoochoose.load_buy()
29+
click = yoochoose.load_click()
30+
test = yoochoose.load_test()
31+
32+
self.assertEqual(len(buy), 1150753)
33+
self.assertEqual(len(click), 33003944)
34+
self.assertEqual(len(test), 8251791)
35+
36+
37+
if __name__ == "__main__":
38+
unittest.main()

0 commit comments

Comments
 (0)