Skip to content

Commit b2d6956

Browse files
committed
Update structure
1 parent 743720e commit b2d6956

File tree

21 files changed

+2191
-229
lines changed

21 files changed

+2191
-229
lines changed
Lines changed: 143 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,143 @@
1+
#!/usr/bin/env python3
2+
import csv
3+
import sys
4+
import io
5+
import os
6+
import argparse
7+
import urllib.request
8+
import zipfile
9+
from collections import defaultdict
10+
11+
12+
def gather_lines_picto(root_dir):
13+
"""Walk `root_dir` and find all files named `lines_picto.csv`.
14+
Return tuple (agencies_dict, list_of_files_read).
15+
"""
16+
agencies = defaultdict(set)
17+
files_read = []
18+
for dirpath, dirnames, filenames in os.walk(root_dir):
19+
for fn in filenames:
20+
if fn == 'lines_picto.csv':
21+
full = os.path.join(dirpath, fn)
22+
files_read.append(full)
23+
try:
24+
with open(full, encoding='utf-8') as f:
25+
reader = csv.DictReader(f, delimiter=';')
26+
if reader.fieldnames:
27+
reader.fieldnames = [x.lstrip('\ufeff') for x in reader.fieldnames]
28+
for row in reader:
29+
aid = row.get('agency_id')
30+
lid = row.get('line_id')
31+
if not aid or lid is None:
32+
continue
33+
lid_s = str(lid).strip()
34+
if lid_s:
35+
agencies[aid].add(lid_s)
36+
except Exception as e:
37+
print(f'Failed to read {full}: {e}')
38+
return agencies, files_read
39+
40+
41+
def check_gtfs_for_agencies(agencies, timeout=30):
42+
errors = []
43+
total_agencies = len(agencies)
44+
count = 0
45+
for agency, line_ids in agencies.items():
46+
count += 1
47+
url = f'https://clarifygdps.com/bridge/gtfs/{agency}.zip'
48+
print(f'[{count}/{total_agencies}] Checking GTFS for agency "{agency}" -> {url}')
49+
try:
50+
resp = urllib.request.urlopen(url, timeout=timeout)
51+
data = resp.read()
52+
except Exception as e:
53+
msg = f'Failed to download {url}: {e}'
54+
print(f'Agency {agency}: ERROR - {msg}')
55+
errors.append(msg)
56+
continue
57+
try:
58+
z = zipfile.ZipFile(io.BytesIO(data))
59+
except Exception as e:
60+
msg = f'Invalid zip for {agency}: {e}'
61+
print(f'Agency {agency}: ERROR - {msg}')
62+
errors.append(msg)
63+
continue
64+
65+
namelist = z.namelist()
66+
rname = None
67+
for n in namelist:
68+
if n.endswith('routes.txt'):
69+
rname = n
70+
break
71+
if not rname:
72+
msg = f'No routes.txt in GTFS for {agency} (found files: {namelist[:10]})'
73+
print(f'Agency {agency}: ERROR - {msg}')
74+
errors.append(msg)
75+
continue
76+
77+
try:
78+
with z.open(rname) as rf:
79+
txt = io.TextIOWrapper(rf, encoding='utf-8', errors='replace')
80+
reader = csv.DictReader(txt)
81+
if reader.fieldnames:
82+
reader.fieldnames = [fn.lstrip('\ufeff') for fn in reader.fieldnames]
83+
routes = list(reader)
84+
except Exception as e:
85+
msg = f'Failed to read routes.txt for {agency}: {e}'
86+
print(f'Agency {agency}: ERROR - {msg}')
87+
errors.append(msg)
88+
continue
89+
90+
route_ids = set()
91+
for r in routes:
92+
if isinstance(r, dict):
93+
rid = r.get('route_id')
94+
if rid is None:
95+
continue
96+
rid_s = str(rid).strip()
97+
if rid_s:
98+
route_ids.add(rid_s)
99+
100+
csv_ids = set(str(x).strip() for x in line_ids if x is not None and str(x).strip())
101+
missing = sorted([lid for lid in csv_ids if lid not in route_ids])
102+
if missing:
103+
msg = f'{len(missing)} missing line_id(s) not found in routes.txt: {missing[:20]}'
104+
print(f'Agency {agency}: ERROR - {msg}')
105+
errors.append(f'Agency {agency}: {msg}')
106+
else:
107+
print(f'Agency {agency}: OK ({len(line_ids)} line_ids matched)')
108+
109+
return errors
110+
111+
112+
def main():
113+
parser = argparse.ArgumentParser(description='Check GTFS routes for line IDs listed in lines_picto.csv files under a logo directory.')
114+
parser.add_argument('--logo-dir', default='logo', help='Path to the logo directory to search (default: logo)')
115+
parser.add_argument('--timeout', type=int, default=30, help='Network timeout seconds when downloading GTFS (default: 30)')
116+
args = parser.parse_args()
117+
118+
if not os.path.isdir(args.logo_dir):
119+
print(f'Logo directory "{args.logo_dir}" not found or is not a directory')
120+
sys.exit(1)
121+
122+
agencies, files_read = gather_lines_picto(args.logo_dir)
123+
if not files_read:
124+
print(f'No lines_picto.csv files found under "{args.logo_dir}"')
125+
sys.exit(1)
126+
127+
print(f'Parsed {len(agencies)} agency(ies) from {len(files_read)} file(s):')
128+
for p in files_read:
129+
print(' -', p)
130+
131+
errors = check_gtfs_for_agencies(agencies, timeout=args.timeout)
132+
133+
if errors:
134+
print('\nGTFS verification errors:')
135+
for e in errors:
136+
print(' -', e)
137+
sys.exit(1)
138+
else:
139+
print('\nAll GTFS checks passed.')
140+
141+
142+
if __name__ == '__main__':
143+
main()
Lines changed: 166 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,166 @@
1+
#!/usr/bin/env python3
2+
import json
3+
import sys
4+
import io
5+
import os
6+
import argparse
7+
import urllib.request
8+
import zipfile
9+
import csv
10+
from collections import defaultdict
11+
12+
13+
def gather_trafic_json(root_dir):
14+
"""Walk `root_dir` and find all files named `trafic.json`.
15+
Returns tuple (agencies_dict, files_read).
16+
Supports files where the JSON is either a single company object or a list of companies.
17+
"""
18+
agencies = defaultdict(set)
19+
files_read = []
20+
for dirpath, dirnames, filenames in os.walk(root_dir):
21+
for fn in filenames:
22+
if fn == 'trafic.json':
23+
full = os.path.join(dirpath, fn)
24+
files_read.append(full)
25+
try:
26+
with open(full, encoding='utf-8') as f:
27+
data = json.load(f)
28+
except Exception as e:
29+
print(f'Failed to load {full}: {e}')
30+
continue
31+
32+
def process_company(company):
33+
aid = company.get('companyId')
34+
if not aid:
35+
return
36+
for group in company.get('lines', []) or []:
37+
for item in group:
38+
if not isinstance(item, dict):
39+
continue
40+
lid = item.get('lineId')
41+
if lid is None:
42+
continue
43+
lid_s = str(lid).strip()
44+
if lid_s:
45+
agencies[aid].add(lid_s)
46+
47+
if isinstance(data, list):
48+
for company in data:
49+
if isinstance(company, dict):
50+
process_company(company)
51+
elif isinstance(data, dict):
52+
# Single company object
53+
process_company(data)
54+
else:
55+
print(f'Unexpected JSON structure in {full}, skipping')
56+
57+
return agencies, files_read
58+
59+
60+
def check_gtfs_for_agencies(agencies, timeout=30):
61+
errors = []
62+
total = len(agencies)
63+
idx = 0
64+
for aid, expected_line_ids in agencies.items():
65+
idx += 1
66+
if not expected_line_ids:
67+
print(f'[{idx}/{total}] Agency "{aid}": no lineIds to check, skipping')
68+
continue
69+
url = f'https://clarifygdps.com/bridge/gtfs/{aid}.zip'
70+
print(f'[{idx}/{total}] Checking GTFS for agency "{aid}" -> {url}')
71+
try:
72+
resp = urllib.request.urlopen(url, timeout=timeout)
73+
dataz = resp.read()
74+
except Exception as e:
75+
msg = f'Failed to download {url}: {e}'
76+
print('ERROR -', msg)
77+
errors.append(msg)
78+
continue
79+
80+
try:
81+
z = zipfile.ZipFile(io.BytesIO(dataz))
82+
except Exception as e:
83+
msg = f'Invalid zip for {aid}: {e}'
84+
print('ERROR -', msg)
85+
errors.append(msg)
86+
continue
87+
88+
namelist = z.namelist()
89+
rname = None
90+
for n in namelist:
91+
if n.endswith('routes.txt'):
92+
rname = n
93+
break
94+
if not rname:
95+
msg = f'No routes.txt in GTFS for {aid} (found files: {namelist[:10]})'
96+
print('ERROR -', msg)
97+
errors.append(msg)
98+
continue
99+
100+
try:
101+
with z.open(rname) as rf:
102+
txt = io.TextIOWrapper(rf, encoding='utf-8', errors='replace')
103+
reader = csv.DictReader(txt)
104+
if reader.fieldnames:
105+
reader.fieldnames = [fn.lstrip('\ufeff') for fn in reader.fieldnames]
106+
routes = list(reader)
107+
except Exception as e:
108+
msg = f'Failed to read routes.txt for {aid}: {e}'
109+
print('ERROR -', msg)
110+
errors.append(msg)
111+
continue
112+
113+
route_ids = set()
114+
for r in routes:
115+
if isinstance(r, dict):
116+
rid = r.get('route_id')
117+
if rid is None:
118+
continue
119+
rid_s = str(rid).strip()
120+
if rid_s:
121+
route_ids.add(rid_s)
122+
123+
expected_ids = set(str(x).strip() for x in expected_line_ids if x is not None and str(x).strip())
124+
missing = sorted([lid for lid in expected_ids if lid not in route_ids])
125+
if missing:
126+
msg = f'{len(missing)} missing line_id(s) not found in routes.txt: {missing[:20]}'
127+
print('ERROR -', msg)
128+
errors.append(f'Agency {aid}: {msg}')
129+
else:
130+
print(f'Agency {aid}: OK ({len(expected_line_ids)} line_ids matched)')
131+
132+
return errors
133+
134+
135+
def main():
136+
parser = argparse.ArgumentParser(description='Check GTFS routes for line IDs listed in trafic.json files under a logo directory.')
137+
parser.add_argument('--logo-dir', default='logo', help='Path to the logo directory to search (default: logo)')
138+
parser.add_argument('--timeout', type=int, default=30, help='Network timeout seconds when downloading GTFS (default: 30)')
139+
args = parser.parse_args()
140+
141+
if not os.path.isdir(args.logo_dir):
142+
print(f'Logo directory "{args.logo_dir}" not found or is not a directory')
143+
sys.exit(1)
144+
145+
agencies, files_read = gather_trafic_json(args.logo_dir)
146+
if not files_read:
147+
print(f'No trafic.json files found under "{args.logo_dir}"')
148+
sys.exit(1)
149+
150+
print(f'Parsed {len(agencies)} company(ies) from {len(files_read)} file(s):')
151+
for p in files_read:
152+
print(' -', p)
153+
154+
errors = check_gtfs_for_agencies(agencies, timeout=args.timeout)
155+
156+
if errors:
157+
print('\nGTFS verification errors:')
158+
for e in errors:
159+
print(' -', e)
160+
sys.exit(1)
161+
else:
162+
print('\nAll GTFS checks passed.')
163+
164+
165+
if __name__ == '__main__':
166+
main()
Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
#!/usr/bin/env python3
2+
import csv
3+
import os
4+
import sys
5+
import argparse
6+
7+
8+
def gather_missing_logo_paths(root_dir, strip_prefix='https://clarifygdps.com/hexatransit/'):
9+
missing = []
10+
files_read = []
11+
for dirpath, dirnames, filenames in os.walk(root_dir):
12+
for fn in filenames:
13+
if fn == 'lines_picto.csv':
14+
full = os.path.join(dirpath, fn)
15+
files_read.append(full)
16+
try:
17+
with open(full, encoding='utf-8') as f:
18+
r = csv.DictReader(f, delimiter=';')
19+
if r.fieldnames:
20+
r.fieldnames = [x.lstrip('\ufeff') for x in r.fieldnames]
21+
for row in r:
22+
logo = (row.get('logoPath') or '').strip()
23+
if not logo:
24+
continue
25+
path = logo
26+
if path.startswith(strip_prefix):
27+
path = path.replace(strip_prefix, '', 1)
28+
# normalize path (allow both forward and back slashes)
29+
path = os.path.normpath(path.lstrip('/\\'))
30+
if not os.path.exists(path):
31+
missing.append((logo, full, path))
32+
except Exception as e:
33+
print(f'Failed to read {full}: {e}')
34+
return missing, files_read
35+
36+
37+
def main():
38+
parser = argparse.ArgumentParser(description='Check logoPath entries in all lines_picto.csv files under a logo directory.')
39+
parser.add_argument('--logo-dir', default='logo', help='Directory to search recursively for lines_picto.csv (default: logo)')
40+
parser.add_argument('--strip-prefix', default='https://clarifygdps.com/hexatransit/', help='URL prefix to strip from logoPath before checking existence')
41+
args = parser.parse_args()
42+
43+
if not os.path.isdir(args.logo_dir):
44+
print(f'Logo directory "{args.logo_dir}" not found or is not a directory')
45+
sys.exit(1)
46+
47+
missing, files_read = gather_missing_logo_paths(args.logo_dir, strip_prefix=args.strip_prefix)
48+
49+
print(f'Checked {len(files_read)} file(s) under "{args.logo_dir}"')
50+
for p in files_read:
51+
print(' -', p)
52+
53+
print('\nMissing logo files ({}):'.format(len(missing)))
54+
for logo, srcfile, path in missing:
55+
print(' -', logo)
56+
print(' file:', srcfile)
57+
print(' expected local path:', path)
58+
59+
if missing:
60+
sys.exit(1)
61+
62+
63+
if __name__ == '__main__':
64+
main()

0 commit comments

Comments
 (0)