|
| 1 | +#!/bin/bash |
| 2 | + |
| 3 | +# This script generates old versions of Graphalytics data sets. |
| 4 | +# Beware that the script cleans the current git repository and discards any changes. |
| 5 | +# The script is intended to reproduce Graphalytics data sets, but it can be modified to produce SNB data sets as well. |
| 6 | + |
| 7 | +# To run this script: |
| 8 | +# |
| 9 | +# 1. Configure Hadoop and set $HADOOP_HOME. Hadoop 2.6.0 works for all DATAGEN versions. |
| 10 | +# |
| 11 | +# 2. Make sure Hadoop's temp directory has enough space, |
| 12 | +# see https://github.com/ldbc/ldbc_snb_datagen/wiki/Troubleshooting#javaioioexception-no-space-left-on-device |
| 13 | +# |
| 14 | +# 3. Make sure the operating system's temp directory (e.g. /tmp) has enough space |
| 15 | +# |
| 16 | +# 4. Set up DATAGEN as required, ensuring that Hadoop has enough memory if its not already configured. |
| 17 | +# |
| 18 | +# export HADOOP_CLIENT_OPTS=-Xmx20G |
| 19 | +# |
| 20 | +# 5. Set the following environment variables, e.g. |
| 21 | +# |
| 22 | +# export SCALE_FACTOR=30 |
| 23 | +# export STORE=false # only set this to true if you have enough space to store all graphs |
| 24 | +# |
| 25 | +# 6. Move this script outside the ldbc_snb_datagen directory and run it. |
| 26 | + |
| 27 | +# Set the environment variables |
| 28 | +export SCALE_FACTOR= |
| 29 | +export STORE= |
| 30 | + |
| 31 | +if [ -z "$SCALE_FACTOR" ] || [ -z "$STORE" ]; then |
| 32 | + echo Please set the SCALE_FACTOR and STORE variables in the script. |
| 33 | + exit 1 |
| 34 | +fi |
| 35 | + |
| 36 | +# Start generating graphs |
| 37 | +cd ldbc_snb_datagen || { echo "Could not change directory into ldbc_snb_datagen"; exit 1; } |
| 38 | +echo "Generation sequence started" >> ../datagen-graphalytics.log |
| 39 | + |
| 40 | +if [ "$STORE" = true ] ; then |
| 41 | + mkdir ../datagen-graphs |
| 42 | +fi |
| 43 | + |
| 44 | +# For versions 0.2.1-0.2.5, we need two runs: one for producing the vertices and another to produce the edges. |
| 45 | +for VERSION in v0.2.1 v0.2.2 v0.2.3 v0.2.4 v0.2.5; do |
| 46 | + echo $VERSION >> ../datagen-graphalytics.log |
| 47 | + |
| 48 | + git checkout -- . |
| 49 | + git clean -fxd . |
| 50 | + git checkout $VERSION |
| 51 | + |
| 52 | + # vertices |
| 53 | + echo > params.ini |
| 54 | + echo ldbc.snb.datagen.generator.scaleFactor:graphalytics.$SCALE_FACTOR >> params.ini |
| 55 | + echo ldbc.snb.datagen.serializer.personSerializer:ldbc.snb.datagen.serializer.snb.interactive.CSVPersonSerializer >> params.ini |
| 56 | + echo ldbc.snb.datagen.serializer.invariantSerializer:ldbc.snb.datagen.serializer.empty.EmptyInvariantSerializer >> params.ini |
| 57 | + echo ldbc.snb.datagen.serializer.personActivitySerializer:ldbc.snb.datagen.serializer.empty.EmptyPersonActivitySerializer >> params.ini |
| 58 | + |
| 59 | + ./run.sh |
| 60 | + tail -n +2 social_network/person_0_0.csv | wc -l >> ../datagen-graphalytics.log |
| 61 | + |
| 62 | + if [ "$STORE" = true ] ; then |
| 63 | + mv social_network ../datagen-graphs/social_network-$SCALE_FACTOR-$VERSION-vertices |
| 64 | + fi |
| 65 | + |
| 66 | + # edges |
| 67 | + # from version 0.2.2, it's also possible to use the CSVPersonSerializerWithWeights serializer, which adds edge weights |
| 68 | + echo > params.ini |
| 69 | + echo ldbc.snb.datagen.generator.scaleFactor:graphalytics.$SCALE_FACTOR >> params.ini |
| 70 | + echo ldbc.snb.datagen.serializer.personSerializer:ldbc.snb.datagen.serializer.graphalytics.CSVPersonSerializer >> params.ini |
| 71 | + echo ldbc.snb.datagen.serializer.invariantSerializer:ldbc.snb.datagen.serializer.empty.EmptyInvariantSerializer >> params.ini |
| 72 | + echo ldbc.snb.datagen.serializer.personActivitySerializer:ldbc.snb.datagen.serializer.empty.EmptyPersonActivitySerializer >> params.ini |
| 73 | + |
| 74 | + ./run.sh |
| 75 | + tail -n +2 social_network/person_knows_person_0_0.csv | wc -l >> ../datagen-graphalytics.log |
| 76 | + |
| 77 | + if [ "$STORE" = true ] ; then |
| 78 | + mv social_network ../datagen-graphs/social_network-$SCALE_FACTOR-$VERSION-edges |
| 79 | + fi |
| 80 | +done |
| 81 | + |
| 82 | +# For versions 0.2.6-0.2.8, we only need a single run, which produces both the vertices and the edges |
| 83 | +# using the CSVPersonSerializerExtended class, which also produces edge weights |
| 84 | +for VERSION in v0.2.6 v0.2.7 v0.2.8; do |
| 85 | + echo $VERSION >> ../datagen-graphalytics.log |
| 86 | + |
| 87 | + git checkout -- . |
| 88 | + git clean -fxd . |
| 89 | + git checkout $VERSION |
| 90 | + |
| 91 | + # vertices and edges |
| 92 | + echo > params.ini |
| 93 | + echo ldbc.snb.datagen.generator.scaleFactor:graphalytics.$SCALE_FACTOR >> params.ini |
| 94 | + echo ldbc.snb.datagen.serializer.personSerializer:ldbc.snb.datagen.serializer.graphalytics.CSVPersonSerializerExtended >> params.ini |
| 95 | + echo ldbc.snb.datagen.serializer.invariantSerializer:ldbc.snb.datagen.serializer.empty.EmptyInvariantSerializer >> params.ini |
| 96 | + echo ldbc.snb.datagen.serializer.personActivitySerializer:ldbc.snb.datagen.serializer.empty.EmptyPersonActivitySerializer >> params.ini |
| 97 | + |
| 98 | + ./run.sh |
| 99 | + tail -n +2 social_network/person_0_0.csv | wc -l >> ../datagen-graphalytics.log |
| 100 | + tail -n +2 social_network/person_knows_person_0_0.csv | wc -l >> ../datagen-graphalytics.log |
| 101 | + |
| 102 | + if [ "$STORE" = true ] ; then |
| 103 | + mv social_network ../datagen-graphs/social_network-$VERSION |
| 104 | + fi |
| 105 | +done |
| 106 | + |
0 commit comments