Skip to content

Commit d65aad9

Browse files
committed
Add new datasets: tradesy, gowalla
1 parent a69f7fc commit d65aad9

File tree

5 files changed

+105
-4
lines changed

5 files changed

+105
-4
lines changed

.gitignore

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,4 +4,9 @@
44
!hello_world.txt
55
*.npy
66
*.csv
7-
*.dat
7+
*.dat
8+
9+
# Large dataset files
10+
*.datcornac/datasets/gowalla/check-ins.zip
11+
cornac/datasets/tradesy/item_features.zip
12+
cornac/datasets/gowalla/check-ins.zip

README.md

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,10 @@ This repository contains various recommendation systems datasets for [Cornac](ht
1717
* [x] movielens
1818
* [ ] netflix
1919
* [x] tafeng
20-
* [ ] tradesy
21-
* [ ] `users.zip`
22-
* [ ] `item_features.zip`
20+
* [x] tradesy
21+
* [x] `users.zip`
22+
* [x] `item_features.zip`
23+
* [x] `item_ids.zip`
2324
* [x] yoochoose
2425
* [x] `buy.zip`
2526
* [x] `click.zip` (GitHub releases: https://github.com/PreferredAI/static-data/releases/download/yoochoose/click.zip)
551 KB
Binary file not shown.

cornac/datasets/tradesy/users.zip

1.34 MB
Binary file not shown.

upload_large_files.sh

Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
#!/bin/bash
2+
set -x # Enable debug output
3+
# upload_large_files.sh
4+
# Scans for files not ignored by .gitignore, checks if >100MB, uploads to GitHub Releases
5+
6+
7+
8+
set -e
9+
10+
11+
# DRY_RUN=1 for local test, DRY_RUN=0 for actual upload
12+
DRY_RUN=0
13+
14+
# Check if gh CLI is installed and authenticated
15+
if ! command -v gh >/dev/null 2>&1; then
16+
echo "Error: GitHub CLI (gh) is not installed. Please install it and authenticate."
17+
exit 1
18+
fi
19+
if ! gh auth status >/dev/null 2>&1; then
20+
echo "Error: GitHub CLI (gh) is not authenticated. Please run 'gh auth login'."
21+
exit 1
22+
fi
23+
24+
# List all files tracked or not ignored by git
25+
FILES=$(git ls-files --others --exclude-standard --cached)
26+
27+
# List ignored files
28+
IGNORED_FILES=$(git ls-files --others --ignored --exclude-standard)
29+
30+
for FILE in $FILES; do
31+
echo "Checking $FILE..."
32+
if [ -f "$FILE" ]; then
33+
SIZE=$(stat -f%z "$FILE")
34+
echo " Size: $SIZE bytes"
35+
if [ "$SIZE" -gt $((100*1024*1024)) ]; then
36+
BASENAME=$(basename "$FILE")
37+
TAG=$(basename "$(dirname "$FILE")")
38+
echo " Candidate for upload: $FILE (tag: $TAG, asset: $BASENAME)"
39+
if [ "$DRY_RUN" -eq 1 ]; then
40+
echo "[DRY RUN] Would process $FILE ($SIZE bytes) with release tag '$TAG' and asset name '$BASENAME'"
41+
# Add to .gitignore if not already present
42+
if ! grep -qxF "$FILE" .gitignore; then
43+
echo "$FILE" >> .gitignore
44+
echo "[DRY RUN] Added $FILE to .gitignore"
45+
fi
46+
else
47+
echo "Processing $FILE ($SIZE bytes)"
48+
# Add to .gitignore if not already present
49+
if ! grep -qxF "$FILE" .gitignore; then
50+
echo "$FILE" >> .gitignore
51+
echo "Added $FILE to .gitignore"
52+
fi
53+
# Create release if it doesn't exist
54+
if ! gh release view "$TAG" >/dev/null 2>&1; then
55+
echo "Creating release $TAG..."
56+
gh release create "$TAG" -t "$TAG" -n "Auto-uploaded large files for $TAG"
57+
else
58+
echo "Release $TAG already exists."
59+
fi
60+
echo "Uploading $FILE to release $TAG..."
61+
gh release upload "$TAG" "$FILE" --clobber
62+
fi
63+
fi
64+
fi
65+
done
66+
67+
# Process ignored files (already in .gitignore)
68+
for FILE in $IGNORED_FILES; do
69+
echo "Checking ignored $FILE..."
70+
if [ -f "$FILE" ]; then
71+
SIZE=$(stat -f%z "$FILE")
72+
echo " Size: $SIZE bytes"
73+
if [ "$SIZE" -gt $((100*1024*1024)) ]; then
74+
BASENAME=$(basename "$FILE")
75+
TAG=$(basename "$(dirname "$FILE")")
76+
echo " Candidate for upload: $FILE (tag: $TAG, asset: $BASENAME)"
77+
if [ "$DRY_RUN" -eq 1 ]; then
78+
echo "[DRY RUN] Would upload $FILE ($SIZE bytes) with release tag '$TAG' and asset name '$BASENAME'"
79+
else
80+
echo "Uploading $FILE ($SIZE bytes)"
81+
# Create release if it doesn't exist
82+
if ! gh release view "$TAG" >/dev/null 2>&1; then
83+
echo "Creating release $TAG..."
84+
gh release create "$TAG" -t "$TAG" -n "Auto-uploaded large files for $TAG"
85+
else
86+
echo "Release $TAG already exists."
87+
fi
88+
echo "Uploading $FILE to release $TAG..."
89+
gh release upload "$TAG" "$FILE" --clobber
90+
fi
91+
fi
92+
fi
93+
done
94+
95+
echo "Done."

0 commit comments

Comments
 (0)