-
Notifications
You must be signed in to change notification settings - Fork 1
171 lines (145 loc) · 5.1 KB
/
process-historical-faa.yaml
File metadata and controls
171 lines (145 loc) · 5.1 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
name: Process Historical FAA Data
on:
workflow_dispatch: # Manual trigger
jobs:
generate-matrix:
runs-on: ubuntu-latest
outputs:
matrix: ${{ steps.set-matrix.outputs.matrix }}
steps:
- name: Generate date ranges
id: set-matrix
run: |
python3 << 'EOF'
import json
from datetime import datetime, timedelta
start = datetime(2023, 8, 16)
end = datetime(2026, 1, 1)
ranges = []
current = start
# Process in 4-day chunks
while current < end:
chunk_end = current + timedelta(days=4)
# Don't go past the end date
if chunk_end > end:
chunk_end = end
ranges.append({
"since": current.strftime("%Y-%m-%d"),
"until": chunk_end.strftime("%Y-%m-%d")
})
current = chunk_end
print(f"::set-output name=matrix::{json.dumps(ranges)}")
EOF
clone-faa-repo:
runs-on: ubuntu-latest
steps:
- name: Cache FAA repository
id: cache-faa-repo
uses: actions/cache@v4
with:
path: data/scrape-faa-releasable-aircraft
key: faa-repo-v1
- name: Clone FAA repository
if: steps.cache-faa-repo.outputs.cache-hit != 'true'
run: |
mkdir -p data
git clone https://github.com/simonw/scrape-faa-releasable-aircraft data/scrape-faa-releasable-aircraft
echo "Repository cloned successfully"
process-chunk:
needs: [generate-matrix, clone-faa-repo]
runs-on: ubuntu-latest
strategy:
max-parallel: 5 # Process 5 chunks at a time
matrix:
range: ${{ fromJson(needs.generate-matrix.outputs.matrix) }}
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: '3.12'
- name: Restore FAA repository cache
uses: actions/cache/restore@v4
with:
path: data/scrape-faa-releasable-aircraft
key: faa-repo-v1
fail-on-cache-miss: true
- name: Install dependencies
run: |
pip install -r requirements.txt
- name: Process chunk ${{ matrix.range.since }} to ${{ matrix.range.until }}
run: |
python src/get_historical_faa.py "${{ matrix.range.since }}" "${{ matrix.range.until }}"
- name: Upload CSV artifact
uses: actions/upload-artifact@v4
with:
name: csv-${{ matrix.range.since }}-to-${{ matrix.range.until }}
path: data/faa_releasable_historical/*.csv
retention-days: 1
create-release:
needs: process-chunk
runs-on: ubuntu-latest
permissions:
contents: write
steps:
- name: Download all artifacts
uses: actions/download-artifact@v4
with:
path: artifacts
- name: Prepare release files
run: |
mkdir -p release-files
find artifacts -name "*.csv" -exec cp {} release-files/ \;
ls -lh release-files/
- name: Create Release
uses: softprops/action-gh-release@v1
with:
tag_name: historical-faa-${{ github.run_number }}
name: Historical FAA Data Release ${{ github.run_number }}
body: |
Automated release of historical FAA aircraft data
Processing period: 2023-08-16 to 2026-01-01
Generated: ${{ github.event.repository.updated_at }}
files: release-files/*.csv
draft: false
prerelease: false
concatenate-and-release:
needs: process-chunk
runs-on: ubuntu-latest
permissions:
contents: write
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: '3.12'
- name: Install dependencies
run: |
pip install -r requirements.txt
- name: Download all artifacts
uses: actions/download-artifact@v4
with:
path: artifacts
- name: Prepare CSVs for concatenation
run: |
mkdir -p data/faa_releasable_historical
find artifacts -name "*.csv" -exec cp {} data/faa_releasable_historical/ \;
ls -lh data/faa_releasable_historical/
- name: Concatenate all CSVs
run: |
python scripts/concat_csvs.py
- name: Create Combined Release
uses: softprops/action-gh-release@v1
with:
tag_name: historical-faa-combined-${{ github.run_number }}
name: Historical FAA Data Combined Release ${{ github.run_number }}
body: |
Combined historical FAA aircraft data (all chunks concatenated)
Processing period: 2023-08-16 to 2026-01-01
Generated: ${{ github.event.repository.updated_at }}
files: data/openairframes/*.csv
draft: false
prerelease: false