Skip to content

Commit 5103d26

Browse files
authored
Merge pull request #44 from data-exp-lab/fe-dataset
Support Dataset Auto-Download
2 parents b714cad + 1e42878 commit 5103d26

File tree

1 file changed

+58
-0
lines changed

1 file changed

+58
-0
lines changed

start.sh

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,64 @@ kill_port() {
2222
fi
2323
}
2424

25+
# Function to check and download dataset with progress bar
26+
check_and_download_dataset() {
27+
echo "🔍 Checking for github_meta.duckdb dataset..."
28+
29+
data_file="public/data/github_meta.duckdb"
30+
31+
# Check if file exists and has reasonable size (>1MB)
32+
if [ -f "$data_file" ]; then
33+
file_size=$(stat -f%z "$data_file" 2>/dev/null || stat -c%s "$data_file" 2>/dev/null)
34+
if [ "$file_size" -gt 1048576 ]; then # Greater than 1MB
35+
echo "✅ Dataset found (${file_size} bytes)"
36+
return 0
37+
else
38+
echo "⚠️ Dataset exists but is too small (${file_size} bytes), will re-download"
39+
fi
40+
else
41+
echo "❌ Dataset not found"
42+
fi
43+
44+
echo "📥 Downloading dataset from Hugging Face..."
45+
46+
# Create data directory if it doesn't exist
47+
mkdir -p public/data
48+
49+
# Download URL
50+
url="https://huggingface.co/datasets/deepgit/github_meta/resolve/main/github_meta.duckdb"
51+
52+
# Download with progress bar using wget (more reliable than curl for progress)
53+
if command -v wget >/dev/null 2>&1; then
54+
echo "Using wget for download..."
55+
if wget --progress=bar:force:noscroll -O "$data_file" "$url"; then
56+
echo "✅ Successfully downloaded dataset"
57+
return 0
58+
else
59+
echo "❌ Failed to download dataset with wget"
60+
return 1
61+
fi
62+
elif command -v curl >/dev/null 2>&1; then
63+
echo "Using curl for download..."
64+
if curl -L -o "$data_file" --progress-bar "$url"; then
65+
echo "✅ Successfully downloaded dataset"
66+
return 0
67+
else
68+
echo "❌ Failed to download dataset with curl"
69+
return 1
70+
fi
71+
else
72+
echo "❌ Neither wget nor curl found. Please install one of them."
73+
return 1
74+
fi
75+
}
76+
77+
# Check and download dataset first
78+
if ! check_and_download_dataset; then
79+
echo "❌ Dataset download failed. Exiting."
80+
exit 1
81+
fi
82+
2583
# Kill any existing process on port 5002
2684
kill_port 5002
2785

0 commit comments

Comments
 (0)