Skip to content

Commit ab00f6d

Browse files
committed
Add script to generate legacy SNB data sets
1 parent a2329ee commit ab00f6d

File tree

2 files changed

+63
-2
lines changed

2 files changed

+63
-2
lines changed

graphalytics-generate-old.sh

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22

33
# This script generates old versions of Graphalytics data sets.
44
# Beware that the script cleans the current git repository and discards any changes.
5-
# The script is intended to reproduce Graphalytics data sets, but it can be modified to produce SNB data sets as well.
65

76
# To run this script:
87
#
@@ -103,4 +102,3 @@ for VERSION in v0.2.6 v0.2.7 v0.2.8; do
103102
mv social_network ../datagen-graphs/social_network-$VERSION
104103
fi
105104
done
106-

snb-generate-old.sh

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
#!/bin/bash
2+
3+
# This script generates old versions of SNB data sets.
4+
# Beware that the script cleans the current git repository and discards any changes.
5+
6+
# To run this script:
7+
#
8+
# 1. Configure Hadoop and set $HADOOP_HOME. Hadoop 2.6.0 works for all DATAGEN versions.
9+
#
10+
# 2. Make sure Hadoop's temp directory has enough space,
11+
# see https://github.com/ldbc/ldbc_snb_datagen/wiki/Troubleshooting#javaioioexception-no-space-left-on-device
12+
#
13+
# 3. Make sure the operating system's temp directory (e.g. /tmp) has enough space
14+
#
15+
# 4. Set up DATAGEN as required, ensuring that Hadoop has enough memory if its not already configured.
16+
#
17+
# export HADOOP_CLIENT_OPTS=-Xmx20G
18+
#
19+
# 5. Set the following environment variables, e.g.
20+
#
21+
# export SCALE_FACTOR=30
22+
# export STORE=false # only set this to true if you have enough space to store all graphs
23+
#
24+
# 6. Move this script outside the ldbc_snb_datagen directory and run it.
25+
26+
# Set the environment variables
27+
export SCALE_FACTOR=
28+
export STORE=
29+
30+
if [ -z "$SCALE_FACTOR" ] || [ -z "$STORE" ]; then
31+
echo Please set the SCALE_FACTOR and STORE variables in the script.
32+
exit 1
33+
fi
34+
35+
# Start generating graphs
36+
cd ldbc_snb_datagen || { echo "Could not change directory into ldbc_snb_datagen"; exit 1; }
37+
echo "Generation sequence started" >> ../datagen-snb.log
38+
39+
if [ "$STORE" = true ] ; then
40+
mkdir ../datagen-graphs
41+
fi
42+
43+
for VERSION in v0.2.1 v0.2.2 v0.2.3 v0.2.4 v0.2.5 v0.2.6 v0.2.7 v0.2.8; do
44+
echo $VERSION >> ../datagen-snb.log
45+
46+
git checkout -- .
47+
git clean -fxd .
48+
git checkout $VERSION
49+
50+
echo > params.ini
51+
echo ldbc.snb.datagen.generator.scaleFactor:snb.interactive.$SCALE_FACTOR >> params.ini
52+
echo ldbc.snb.datagen.serializer.personSerializer:ldbc.snb.datagen.serializer.snb.interactive.CSVPersonSerializer >> params.ini
53+
echo ldbc.snb.datagen.serializer.invariantSerializer:ldbc.snb.datagen.serializer.snb.interactive.CSVInvariantSerializer >> params.ini
54+
echo ldbc.snb.datagen.serializer.personActivitySerializer:ldbc.snb.datagen.serializer.snb.interactive.CSVPersonActivitySerializer >> params.ini
55+
56+
./run.sh
57+
tail -n +2 social_network/person_0_0.csv | wc -l >> ../datagen-snb.log
58+
tail -n +2 social_network/person_knows_person_0_0.csv | wc -l >> ../datagen-snb.log
59+
60+
if [ "$STORE" = true ] ; then
61+
mv social_network ../datagen-graphs/social_network-$VERSION
62+
fi
63+
done

0 commit comments

Comments
 (0)