Skip to content

Commit 2bb0a5e

Browse files
authored
Merge pull request PlaneQuery#33 from PlaneQuery/develop
Develop to Main: Handle ADSB when ADSB.lol has not released any data for day. Just rerelease latest adsb
2 parents 2dda3d3 + b54f33a commit 2bb0a5e

File tree

2 files changed

+67
-22
lines changed

2 files changed

+67
-22
lines changed

.github/workflows/openairframes-daily-release.yaml

Lines changed: 50 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,51 @@ jobs:
101101
date: ${{ needs.resolve-dates.outputs.adsb_date }}
102102
concat_with_latest_csv: true
103103

104+
adsb-reduce:
105+
needs: [resolve-dates, adsb-to-aircraft]
106+
if: always() && github.event_name != 'schedule' && needs.adsb-to-aircraft.result == 'failure'
107+
runs-on: ubuntu-24.04-arm
108+
steps:
109+
- name: Checkout
110+
uses: actions/checkout@v6
111+
112+
- name: Setup Python
113+
uses: actions/setup-python@v6
114+
with:
115+
python-version: '3.12'
116+
117+
- name: Install dependencies
118+
run: |
119+
python -m pip install --upgrade pip
120+
pip install -r requirements.txt
121+
122+
- name: Download compressed outputs
123+
uses: actions/download-artifact@v4
124+
with:
125+
pattern: adsb-compressed-${{ needs.resolve-dates.outputs.adsb_date }}-part-*
126+
path: data/output/compressed/${{ needs.resolve-dates.outputs.adsb_date }}
127+
merge-multiple: true
128+
129+
- name: Concatenate final outputs
130+
env:
131+
DATE: ${{ needs.resolve-dates.outputs.adsb_date }}
132+
CONCAT_WITH_LATEST_CSV: true
133+
run: |
134+
EXTRA=""
135+
if [ "$CONCAT_WITH_LATEST_CSV" = "true" ]; then
136+
EXTRA="--concat_with_latest_csv"
137+
fi
138+
python -m src.adsb.concat_parquet_to_final --date "$DATE" $EXTRA
139+
ls -lah data/output/ || true
140+
141+
- name: Upload final artifacts
142+
uses: actions/upload-artifact@v4
143+
with:
144+
name: openairframes_adsb-${{ needs.resolve-dates.outputs.adsb_date }}
145+
path: data/output/openairframes_adsb_*
146+
retention-days: 30
147+
if-no-files-found: error
148+
104149
build-community:
105150
runs-on: ubuntu-latest
106151
if: github.event_name != 'schedule'
@@ -188,13 +233,13 @@ jobs:
188233

189234
create-release:
190235
runs-on: ubuntu-latest
191-
needs: [resolve-dates, build-faa, adsb-to-aircraft, build-community, build-adsbexchange-json, build-mictronics-db]
236+
needs: [resolve-dates, build-faa, adsb-to-aircraft, adsb-reduce, build-community, build-adsbexchange-json, build-mictronics-db]
192237
if: github.event_name != 'schedule' && !cancelled()
193238
steps:
194-
- name: Check adsb-to-aircraft status
195-
if: needs.adsb-to-aircraft.result != 'success'
239+
- name: Check ADS-B workflow status
240+
if: needs.adsb-to-aircraft.result != 'success' && needs.adsb-reduce.result != 'success'
196241
run: |
197-
echo "WARNING: adsb-to-aircraft result was '${{ needs.adsb-to-aircraft.result }}', will continue without ADS-B artifacts"
242+
echo "WARNING: ADS-B workflow failed (adsb-to-aircraft='${{ needs.adsb-to-aircraft.result }}', adsb-reduce='${{ needs.adsb-reduce.result }}'), will continue without ADS-B artifacts"
198243
199244
- name: Checkout for gh CLI
200245
uses: actions/checkout@v4
@@ -211,7 +256,7 @@ jobs:
211256

212257
- name: Download ADS-B artifacts
213258
uses: actions/download-artifact@v5
214-
if: needs.adsb-to-aircraft.result == 'success'
259+
if: needs.adsb-to-aircraft.result == 'success' || needs.adsb-reduce.result == 'success'
215260
continue-on-error: true
216261
with:
217262
name: openairframes_adsb-${{ needs.resolve-dates.outputs.adsb_date }}

src/adsb/concat_parquet_to_final.py

Lines changed: 17 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
from pathlib import Path
22
import polars as pl
33
import argparse
4-
4+
import os
55
OUTPUT_DIR = Path("./data/output")
66
CORRECT_ORDER_OF_COLUMNS = ["time", "icao", "r", "t", "dbFlags", "ownOp", "year", "desc", "aircraft_category"]
77

@@ -13,26 +13,25 @@ def main():
1313

1414
compressed_dir = OUTPUT_DIR / "compressed"
1515
date_dir = compressed_dir / args.date
16-
if not date_dir.is_dir():
17-
raise FileNotFoundError(f"No date folder found: {date_dir}")
1816

1917
parquet_files = sorted(date_dir.glob("*.parquet"))
20-
if not parquet_files:
21-
raise FileNotFoundError(f"No parquet files found in {date_dir}")
18+
df = None
19+
if parquet_files: # TODO: This logic could be updated slightly.
20+
print(f"No parquet files found in {date_dir}")
2221

23-
frames = [pl.read_parquet(p) for p in parquet_files]
24-
df = pl.concat(frames, how="vertical", rechunk=True)
22+
frames = [pl.read_parquet(p) for p in parquet_files]
23+
df = pl.concat(frames, how="vertical", rechunk=True)
2524

26-
df = df.sort(["time", "icao"])
27-
df = df.select(CORRECT_ORDER_OF_COLUMNS)
28-
29-
output_path = OUTPUT_DIR / f"openairframes_adsb_{args.date}.parquet"
30-
print(f"Writing combined parquet to {output_path} with {df.height} rows")
31-
df.write_parquet(output_path)
25+
df = df.sort(["time", "icao"])
26+
df = df.select(CORRECT_ORDER_OF_COLUMNS)
27+
28+
output_path = OUTPUT_DIR / f"openairframes_adsb_{args.date}.parquet"
29+
print(f"Writing combined parquet to {output_path} with {df.height} rows")
30+
df.write_parquet(output_path)
3231

33-
csv_output_path = OUTPUT_DIR / f"openairframes_adsb_{args.date}.csv.gz"
34-
print(f"Writing combined csv.gz to {csv_output_path} with {df.height} rows")
35-
df.write_csv(csv_output_path, compression="gzip")
32+
csv_output_path = OUTPUT_DIR / f"openairframes_adsb_{args.date}.csv.gz"
33+
print(f"Writing combined csv.gz to {csv_output_path} with {df.height} rows")
34+
df.write_csv(csv_output_path, compression="gzip")
3635

3736
if args.concat_with_latest_csv:
3837
print("Loading latest CSV from GitHub releases to concatenate with...")
@@ -46,9 +45,10 @@ def main():
4645
csv_end_dt = datetime.strptime(csv_end_date, "%Y-%m-%d")
4746
args_dt = datetime.strptime(args.date, "%Y-%m-%d")
4847

49-
if csv_end_dt >= args_dt:
48+
if df is None or csv_end_dt >= args_dt:
5049
print(f"Latest CSV already includes data through {args.date} (end_date={csv_end_date} is exclusive)")
5150
print("Writing latest CSV directly without concatenation to avoid duplicates")
51+
os.makedirs(OUTPUT_DIR, exist_ok=True)
5252
final_csv_output_path = OUTPUT_DIR / f"openairframes_adsb_{csv_start_date}_{csv_end_date}.csv.gz"
5353
df_latest_csv = df_latest_csv.select(CORRECT_ORDER_OF_COLUMNS)
5454
df_latest_csv.write_csv(final_csv_output_path, compression="gzip")

0 commit comments

Comments
 (0)