Skip to content

Commit ff8b4f4

Browse files
authored
Merge pull request #7 from developmentseed/process-pipeline
Add process file and Docker run for batch
2 parents 1f78ed6 + 2507831 commit ff8b4f4

File tree

13 files changed

+905
-478
lines changed

13 files changed

+905
-478
lines changed

.yarnrc.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
nodeLinker: node-modules

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,4 +3,4 @@
33
Visualize statistics and browse OSM diffs over a map.
44

55
- [Front-end app](web-vite/README.md)
6-
- Pipeline
6+
- [Pipeline](pipeline/README.md)

parser.js

Lines changed: 0 additions & 38 deletions
This file was deleted.

pipeline/Dockerfile

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
# Start with an official Node image
2+
FROM node:18
3+
4+
# Update the system and install necessary dependencies
5+
RUN apt-get update && \
6+
apt-get install -y parallel jq && \
7+
apt-get clean && \
8+
rm -rf /var/lib/apt/lists/*
9+
10+
# Create an app directory to hold the application code inside the image
11+
WORKDIR /usr/src/app
12+
13+
# Copy your package.json and package-lock.json (if you have one) into the container
14+
COPY package*.json ./
15+
16+
# Install your Node dependencies
17+
RUN npm install
18+
19+
# Copy your Node scripts into the container
20+
COPY fetchOsc.js ./
21+
COPY parser.js ./
22+
23+
# The main script to run the tasks
24+
COPY process.sh ./
25+
26+
# Install geojson-merge
27+
RUN npm install -g geojson-merge
28+
29+
# Give execute permissions to the script
30+
RUN chmod +x process.sh
31+
32+
# The command to run when the container starts
33+
ENTRYPOINT [ "./process.sh" ]

pipeline/README.md

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
## Usage
2+
3+
Generate the past 10 minutes of OSM data starting at a sequence number (5652386 in this example)
4+
5+
```
6+
docker build osm-tardis .
7+
docker run osm-tardis 5652386 10
8+
docker run -it -v ./data:/tmp ghcr.io/osgeo/gdal:alpine-small-latest sh ogr2ogr -f "FlatGeobuf" /tmp/test.fgb /tmp/2023-09-01T01_00.geojsonld -skipfailures
9+
```

pipeline/fetchOsc.js

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
const maxRetries = 10;
2+
const retryInterval = 5000;
3+
const fs = require("fs");
4+
const axios = require("axios");
5+
6+
const OSC_SOURCE_URL =
7+
"http://s3-eu-west-1.amazonaws.com/overpass-db-eu-west-1/augmented-diffs";
8+
9+
const sequenceNumber = process.argv[2];
10+
const outputFileName = process.argv[3];
11+
const url = `${OSC_SOURCE_URL}/${sequenceNumber}.osc`;
12+
13+
async function downloadFile(url, outputFileName) {
14+
const oscFile = fs.createWriteStream(outputFileName);
15+
try {
16+
const res = await axios({
17+
method: "get",
18+
url,
19+
responseType: "stream",
20+
});
21+
res.data.pipe(oscFile);
22+
} catch (err) {
23+
console.error(err);
24+
}
25+
}
26+
27+
downloadFile(url, outputFileName).then(() => {
28+
console.log(`Downloaded ${url} to ${outputFileName}`);
29+
});

package.json renamed to pipeline/package.json

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,12 @@
99
"author": "",
1010
"license": "ISC",
1111
"dependencies": {
12+
"axios": "^1.5.0",
1213
"osm-adiff-parser": "^1.1.0",
13-
"real-changesets-parser": "https://github.com/developmentseed/real-changesets-parser.git"
14+
"real-changesets-parser": "https://github.com/developmentseed/real-changesets-parser.git",
15+
"serve": "^14.2.1"
16+
},
17+
"devDependencies": {
18+
"geojson-validation": "^1.0.2"
1419
}
1520
}

pipeline/parser.js

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
const adiffParser = require("osm-adiff-parser");
2+
const changesetParser = require("real-changesets-parser");
3+
const fs = require("fs");
4+
5+
// Read the file name from the command-line argument
6+
const fileName = process.argv[2];
7+
8+
// Read the file
9+
fs.readFile(fileName, "utf8", (err, data) => {
10+
if (err) {
11+
console.error("Error reading the file:", err);
12+
return;
13+
}
14+
let ranOnce = false;
15+
adiffParser(data, null, (err, result) => {
16+
if (ranOnce) {
17+
return;
18+
}
19+
ranOnce = true;
20+
Object.keys(result).forEach((changesetId) => {
21+
result[changesetId].forEach((element) => {
22+
const change = changesetParser.elementParser(element);
23+
24+
change.forEach((feature) => {
25+
// do some validation here
26+
console.log(JSON.stringify(feature));
27+
});
28+
});
29+
});
30+
});
31+
});

pipeline/process.sh

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
#!/bin/bash
2+
3+
# Check if the necessary arguments were provided
4+
if [[ -z "$1" ]] || [[ -z "$2" ]]; then
5+
echo "Usage: $0 <date> <hour> [num_files]"
6+
exit 1
7+
fi
8+
9+
# Starting number provided as the first argument
10+
start_date=$1
11+
12+
start_hour=$2
13+
14+
# Add a leading zero if the hour is less than 10
15+
[[ $start_hour -lt 10 && $start_hour -ge 0 ]] && start_hour="0$start_hour"
16+
17+
# Number of files to fetch
18+
num_files=${3:-60}
19+
if (( num_files < 1 )); then
20+
num_files=1
21+
fi
22+
if (( num_files > 60 )); then
23+
num_files=60
24+
fi
25+
26+
echo "Number of files to fetch: ${num_files}"
27+
28+
start_num=$(( $(date -d "${start_date}T${start_hour}:00:00Z" +%s)/60 - 22457216))
29+
30+
end_num=$((start_num + num_files - 1))
31+
32+
if [[ $start_num != $end_num ]]; then
33+
echo "Fetching files from number ${start_num} to ${end_num}"
34+
else
35+
echo "Fetching file number ${start_num}"
36+
fi
37+
38+
# Fetch .osc files using fetchOsc.js
39+
fetchOscFunction() {
40+
sequence_num=$1
41+
file_path="/tmp/${sequence_num}.osc"
42+
node fetchOsc.js $sequence_num $file_path
43+
}
44+
45+
export -f fetchOscFunction
46+
47+
seq $start_num $end_num | parallel fetchOscFunction
48+
49+
# Parse .osc files and collect into a single geojson using parser.js and jq
50+
echo 'Parsing files'
51+
parallel -j +0 "
52+
echo {};
53+
file_path=\"/tmp/{}.osc\";
54+
node parser.js \$file_path > \"/tmp/${start_date}-${start_hour}-{}.geojsonld\"
55+
" ::: $(seq $start_num $((end_num-1)))
56+
57+
echo 'Done parsing files'
58+
59+
echo "Combining files"
60+
output_file="/tmp/${start_date}T${start_hour}:00.geojsonld"
61+
cat /tmp/${start_date}-${start_hour}-*.geojsonld > $output_file
62+
echo "Done combining files"
63+
64+
echo 'Replace nulls with 0'
65+
sed -i 's/null/0/g' $output_file
66+
echo 'Done replacing nulls with 0'

0 commit comments

Comments
 (0)