Skip to content

Commit a0c9013

Browse files
authored
Merge pull request #77 from ldbc/generate-old
Add script to generate graphs of old versions
2 parents db5e4f3 + ae6d99f commit a0c9013

File tree

1 file changed

+106
-0
lines changed

1 file changed

+106
-0
lines changed

graphalytics-generate-old.sh

Lines changed: 106 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,106 @@
1+
#!/bin/bash
2+
3+
# This script generates old versions of Graphalytics data sets.
4+
# Beware that the script cleans the current git repository and discards any changes.
5+
# The script is intended to reproduce Graphalytics data sets, but it can be modified to produce SNB data sets as well.
6+
7+
# To run this script:
8+
#
9+
# 1. Configure Hadoop and set $HADOOP_HOME. Hadoop 2.6.0 works for all DATAGEN versions.
10+
#
11+
# 2. Make sure Hadoop's temp directory has enough space,
12+
# see https://github.com/ldbc/ldbc_snb_datagen/wiki/Troubleshooting#javaioioexception-no-space-left-on-device
13+
#
14+
# 3. Make sure the operating system's temp directory (e.g. /tmp) has enough space
15+
#
16+
# 4. Set up DATAGEN as required, ensuring that Hadoop has enough memory if its not already configured.
17+
#
18+
# export HADOOP_CLIENT_OPTS=-Xmx20G
19+
#
20+
# 5. Set the following environment variables, e.g.
21+
#
22+
# export SCALE_FACTOR=30
23+
# export STORE=false # only set this to true if you have enough space to store all graphs
24+
#
25+
# 6. Move this script outside the ldbc_snb_datagen directory and run it.
26+
27+
# Set the environment variables
28+
export SCALE_FACTOR=
29+
export STORE=
30+
31+
if [ -z "$SCALE_FACTOR" ] || [ -z "$STORE" ]; then
32+
echo Please set the SCALE_FACTOR and STORE variables in the script.
33+
exit 1
34+
fi
35+
36+
# Start generating graphs
37+
cd ldbc_snb_datagen || { echo "Could not change directory into ldbc_snb_datagen"; exit 1; }
38+
echo "Generation sequence started" >> ../datagen-graphalytics.log
39+
40+
if [ "$STORE" = true ] ; then
41+
mkdir ../datagen-graphs
42+
fi
43+
44+
# For versions 0.2.1-0.2.5, we need two runs: one for producing the vertices and another to produce the edges.
45+
for VERSION in v0.2.1 v0.2.2 v0.2.3 v0.2.4 v0.2.5; do
46+
echo $VERSION >> ../datagen-graphalytics.log
47+
48+
git checkout -- .
49+
git clean -fxd .
50+
git checkout $VERSION
51+
52+
# vertices
53+
echo > params.ini
54+
echo ldbc.snb.datagen.generator.scaleFactor:graphalytics.$SCALE_FACTOR >> params.ini
55+
echo ldbc.snb.datagen.serializer.personSerializer:ldbc.snb.datagen.serializer.snb.interactive.CSVPersonSerializer >> params.ini
56+
echo ldbc.snb.datagen.serializer.invariantSerializer:ldbc.snb.datagen.serializer.empty.EmptyInvariantSerializer >> params.ini
57+
echo ldbc.snb.datagen.serializer.personActivitySerializer:ldbc.snb.datagen.serializer.empty.EmptyPersonActivitySerializer >> params.ini
58+
59+
./run.sh
60+
tail -n +2 social_network/person_0_0.csv | wc -l >> ../datagen-graphalytics.log
61+
62+
if [ "$STORE" = true ] ; then
63+
mv social_network ../datagen-graphs/social_network-$SCALE_FACTOR-$VERSION-vertices
64+
fi
65+
66+
# edges
67+
# from version 0.2.2, it's also possible to use the CSVPersonSerializerWithWeights serializer, which adds edge weights
68+
echo > params.ini
69+
echo ldbc.snb.datagen.generator.scaleFactor:graphalytics.$SCALE_FACTOR >> params.ini
70+
echo ldbc.snb.datagen.serializer.personSerializer:ldbc.snb.datagen.serializer.graphalytics.CSVPersonSerializer >> params.ini
71+
echo ldbc.snb.datagen.serializer.invariantSerializer:ldbc.snb.datagen.serializer.empty.EmptyInvariantSerializer >> params.ini
72+
echo ldbc.snb.datagen.serializer.personActivitySerializer:ldbc.snb.datagen.serializer.empty.EmptyPersonActivitySerializer >> params.ini
73+
74+
./run.sh
75+
tail -n +2 social_network/person_knows_person_0_0.csv | wc -l >> ../datagen-graphalytics.log
76+
77+
if [ "$STORE" = true ] ; then
78+
mv social_network ../datagen-graphs/social_network-$SCALE_FACTOR-$VERSION-edges
79+
fi
80+
done
81+
82+
# For versions 0.2.6-0.2.8, we only need a single run, which produces both the vertices and the edges
83+
# using the CSVPersonSerializerExtended class, which also produces edge weights
84+
for VERSION in v0.2.6 v0.2.7 v0.2.8; do
85+
echo $VERSION >> ../datagen-graphalytics.log
86+
87+
git checkout -- .
88+
git clean -fxd .
89+
git checkout $VERSION
90+
91+
# vertices and edges
92+
echo > params.ini
93+
echo ldbc.snb.datagen.generator.scaleFactor:graphalytics.$SCALE_FACTOR >> params.ini
94+
echo ldbc.snb.datagen.serializer.personSerializer:ldbc.snb.datagen.serializer.graphalytics.CSVPersonSerializerExtended >> params.ini
95+
echo ldbc.snb.datagen.serializer.invariantSerializer:ldbc.snb.datagen.serializer.empty.EmptyInvariantSerializer >> params.ini
96+
echo ldbc.snb.datagen.serializer.personActivitySerializer:ldbc.snb.datagen.serializer.empty.EmptyPersonActivitySerializer >> params.ini
97+
98+
./run.sh
99+
tail -n +2 social_network/person_0_0.csv | wc -l >> ../datagen-graphalytics.log
100+
tail -n +2 social_network/person_knows_person_0_0.csv | wc -l >> ../datagen-graphalytics.log
101+
102+
if [ "$STORE" = true ] ; then
103+
mv social_network ../datagen-graphs/social_network-$VERSION
104+
fi
105+
done
106+

0 commit comments

Comments
 (0)