Skip to content

Commit 2085338

Browse files
authored
Merge pull request #38 from developmentseed/new-pipeline
New changeset parsing pipeline
2 parents 7dd8680 + 4df4084 commit 2085338

File tree

12 files changed

+364
-783
lines changed

12 files changed

+364
-783
lines changed

.github/workflows/pipeline.yaml

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
name: Hourly Pipeline
2+
3+
on:
4+
workflow_dispatch:
5+
schedule:
6+
- cron: "50 * * * *"
7+
8+
9+
jobs:
10+
generate-hourly-fgb:
11+
runs-on: ubuntu-latest
12+
13+
steps:
14+
- name: Checkout code
15+
uses: actions/checkout@v2
16+
17+
- name: Build docker image
18+
run: docker build -t gradient-pipeline ./pipeline
19+
20+
- name: set date
21+
id: set_date
22+
run: |
23+
DATE_TWO_HOURS_AGO=$(date -d "2 hours ago" +%Y-%m-%d)
24+
echo "DATE_TWO_HOURS_AGO=$DATE_TWO_HOURS_AGO" >> $GITHUB_OUTPUT
25+
26+
- name: set hour
27+
id: set_hour
28+
run: |
29+
HOUR_TWO_HOURS_AGO=$(date -d "2 hours ago" +%H)
30+
echo "HOUR_TWO_HOURS_AGO=$HOUR_TWO_HOURS_AGO" >> $GITHUB_OUTPUT
31+
32+
- name: Check if the date and hour are set
33+
run: cat $GITHUB_OUTPUT
34+
35+
- name: process changesets for the hour ${{ steps.set_hour.outputs.HOUR_TWO_HOURS_AGO }}:00 on ${{ steps.set_date.outputs.DATE_TWO_HOURS_AGO }}
36+
run: |
37+
docker run -v ./data:/tmp gradient-pipeline sh -c "node cli.js process-hour $DATE_TWO_HOURS_AGO $HOUR_TWO_HOURS_AGO"
38+
env:
39+
DATE_TWO_HOURS_AGO: ${{ steps.set_date.outputs.DATE_TWO_HOURS_AGO }}
40+
HOUR_TWO_HOURS_AGO: ${{ steps.set_hour.outputs.HOUR_TWO_HOURS_AGO }}
41+
42+
- name: convert to fgb for the hour ${{ steps.set_hour.outputs.HOUR_TWO_HOURS_AGO }}:00 on ${{ steps.set_date.outputs.DATE_TWO_HOURS_AGO }}
43+
run: |
44+
docker run -v ./data:/tmp gradient-pipeline sh -c "ogr2ogr -f FlatGeobuf /tmp/${DATE_TWO_HOURS_AGO}T${HOUR_TWO_HOURS_AGO}:00.fgb /tmp/${DATE_TWO_HOURS_AGO}T${HOUR_TWO_HOURS_AGO}:00.geojson -skipfailures"
45+
env:
46+
DATE_TWO_HOURS_AGO: ${{ steps.set_date.outputs.DATE_TWO_HOURS_AGO }}
47+
HOUR_TWO_HOURS_AGO: ${{ steps.set_hour.outputs.HOUR_TWO_HOURS_AGO }}
48+
49+
- name: Check if the pipeline ran successfully
50+
run: ls -lh ./data/*.fgb
51+
52+
- name: Authenticate with GCP
53+
uses: google-github-actions/auth@v2
54+
with:
55+
credentials_json: ${{ secrets.GCS_SERVICE_ACCOUNT_KEY }}
56+
57+
- name: Upload to GCS
58+
uses: google-github-actions/upload-cloud-storage@v2
59+
with:
60+
path: data
61+
destination: osm-tardis
62+
parent: false
63+
glob: '*.fgb'

.gitignore

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,4 +2,8 @@ node_modules/
22
.yarn
33
out
44
package-lock.json
5-
.pnp*
5+
.pnp*
6+
7+
pipeline/node_modules
8+
pipeline/.yarn
9+
pipeline/data

pipeline/Dockerfile

Lines changed: 9 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -1,33 +1,15 @@
1-
# Start with an official Node image
2-
FROM node:18
1+
FROM ghcr.io/osgeo/gdal:alpine-small-latest
32

4-
# Update the system and install necessary dependencies
5-
RUN apt-get update && \
6-
apt-get install -y parallel jq && \
7-
apt-get clean && \
8-
rm -rf /var/lib/apt/lists/*
3+
RUN apk add --no-cache nodejs yarn git
94

10-
# Create an app directory to hold the application code inside the image
11-
WORKDIR /usr/src/app
5+
WORKDIR /app
126

13-
# Copy your package.json and package-lock.json (if you have one) into the container
14-
COPY package*.json ./
7+
COPY package.json ./
8+
COPY yarn.lock ./
159

16-
# Install your Node dependencies
17-
RUN npm install
10+
RUN yarn install
1811

1912
# Copy your Node scripts into the container
20-
COPY fetchOsc.js ./
21-
COPY parser.js ./
22-
23-
# The main script to run the tasks
24-
COPY process.sh ./
25-
26-
# Install geojson-merge
27-
RUN npm install -g geojson-merge
28-
29-
# Give execute permissions to the script
30-
RUN chmod +x process.sh
31-
32-
# The command to run when the container starts
33-
ENTRYPOINT [ "./process.sh" ]
13+
COPY src/ ./src/
14+
COPY cli.js ./
15+
RUN chmod +x cli.js

pipeline/cli.js

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
#!/usr/bin/env node
2+
3+
const { program } = require('commander');
4+
5+
const { getHourlyReplicationFileURL, getChangesetIDs } = require('./src/utils');
6+
const { processChangesets, processChangeset } = require('./src/process');
7+
8+
/**
9+
* Runs the process to retrieve and process changesets for a given date and hour.
10+
* @param {string} date - The date in the format 'YYYY-MM-DD'.
11+
* @param {number} hour - The hour of the day (0-23).
12+
* @returns {void}
13+
*/
14+
async function run(date, hour) {
15+
let url = getHourlyReplicationFileURL(date, hour);
16+
let changesets = await getChangesetIDs(url);
17+
// changesets = changesets.slice(0, 2);
18+
// console.log(changesets);
19+
processChangesets(changesets, date, hour);
20+
}
21+
22+
async function processSingleChangeset(changeset) {
23+
await processChangeset(changeset);
24+
}
25+
26+
program
27+
.command('process-hour <date> <hour>')
28+
.description('Process an hour of changesets starting from a given date and hour (in UTC) and combine the changed features into a single GeoJSON file.')
29+
.action(run);
30+
31+
program
32+
.command('process-changeset <changeset>')
33+
.description('Process a single changeset and save the features to a JSON file - for debugging purposes.')
34+
.action(processSingleChangeset);
35+
36+
program.parseAsync(process.argv);

pipeline/fetchOsc.js

Lines changed: 0 additions & 29 deletions
This file was deleted.

pipeline/package.json

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,17 @@
11
{
2-
"name": "osmgradient",
2+
"name": "osm-gradient-pipeline",
33
"version": "1.0.0",
44
"description": "Minutely metrics for OSM using FlatGeoBuff",
5-
"main": "index.js",
65
"scripts": {
76
"test": "echo \"Error: no test specified\" && exit 1"
87
},
98
"author": "",
109
"license": "ISC",
1110
"dependencies": {
1211
"axios": "^1.5.0",
13-
"osm-adiff-parser": "^1.1.0",
12+
"commander": "12.0.0",
1413
"real-changesets-parser": "https://github.com/developmentseed/real-changesets-parser.git",
15-
"serve": "^14.2.1"
14+
"sax": "^1.3.0"
1615
},
1716
"devDependencies": {
1817
"geojson-validation": "^1.0.2"

pipeline/parser.js

Lines changed: 0 additions & 31 deletions
This file was deleted.

pipeline/process.sh

Lines changed: 0 additions & 66 deletions
This file was deleted.

pipeline/src/config.js

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
const config = {
2+
DATA_PATH: process.env.DATA_PATH || '/tmp',
3+
};
4+
5+
module.exports = config;

0 commit comments

Comments
 (0)