Skip to content

Commit 8bac4f3

Browse files
committed
Add OpenStates integration and fix broken source URLs
- Fix SEC, DoD, and BIS source URLs to point to actual documents - Add OpenStates API script for free state legislature data - Update workflow to fetch OpenStates data - Fix merge script to include helper functions in data.js https://claude.ai/code/session_01AuEEXJMhCdRxBkJBcEbxsQ
1 parent e5dcbbf commit 8bac4f3

File tree

5 files changed

+338
-8
lines changed

5 files changed

+338
-8
lines changed

.github/workflows/update-legislation.yml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,12 @@ jobs:
3535
run: python src/congress_gov.py --congress 118 --output .cache/congress_gov.json
3636
continue-on-error: true
3737

38+
- name: Fetch OpenStates data (free state legislature API)
39+
env:
40+
OPENSTATES_API_KEY: ${{ secrets.OPENSTATES_API_KEY }}
41+
run: python scripts/fetch_openstates.py
42+
continue-on-error: true
43+
3844
- name: Merge new data into tracker
3945
run: python src/merge_api_data.py
4046

data/us_federal_actions.json

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,7 @@
108108
"AI washing enforcement priority",
109109
"MD&A discussion requirements"
110110
],
111-
"source_url": "https://www.sec.gov",
111+
"source_url": "https://www.sec.gov/newsroom/speeches-statements/gensler-ai-021324",
112112
"tags": [
113113
"disclosure",
114114
"securities",
@@ -154,7 +154,7 @@
154154
"Workforce AI training requirements",
155155
"Industry partnership frameworks"
156156
],
157-
"source_url": "https://www.defense.gov",
157+
"source_url": "https://www.defense.gov/News/Releases/Release/Article/3578219/",
158158
"tags": [
159159
"defense",
160160
"adoption",
@@ -177,7 +177,7 @@
177177
"Country-specific controls (China, Russia, Iran)",
178178
"Cloud computing restrictions"
179179
],
180-
"source_url": "https://www.bis.doc.gov",
180+
"source_url": "https://www.bis.doc.gov/index.php/policy-guidance/semiconductor-controls",
181181
"tags": [
182182
"export_controls",
183183
"chips",

docs/data.js

Lines changed: 34 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
// Auto-generated from data/*.json files
2-
// Last updated: 2026-03-01 01:59:30 UTC
2+
// Last updated: 2026-03-01 02:28:04 UTC
33

44
const LEGISLATION_DATA = [
55
{
@@ -626,7 +626,7 @@ const LEGISLATION_DATA = [
626626
"AI washing enforcement priority",
627627
"MD&A discussion requirements"
628628
],
629-
"source_url": "https://www.sec.gov",
629+
"source_url": "https://www.sec.gov/newsroom/speeches-statements/gensler-ai-021324",
630630
"tags": [
631631
"disclosure",
632632
"securities",
@@ -672,7 +672,7 @@ const LEGISLATION_DATA = [
672672
"Workforce AI training requirements",
673673
"Industry partnership frameworks"
674674
],
675-
"source_url": "https://www.defense.gov",
675+
"source_url": "https://www.defense.gov/News/Releases/Release/Article/3578219/",
676676
"tags": [
677677
"defense",
678678
"adoption",
@@ -695,7 +695,7 @@ const LEGISLATION_DATA = [
695695
"Country-specific controls (China, Russia, Iran)",
696696
"Cloud computing restrictions"
697697
],
698-
"source_url": "https://www.bis.doc.gov",
698+
"source_url": "https://www.bis.doc.gov/index.php/policy-guidance/semiconductor-controls",
699699
"tags": [
700700
"export_controls",
701701
"chips",
@@ -705,3 +705,33 @@ const LEGISLATION_DATA = [
705705
"last_verified": "2024-12-24"
706706
}
707707
];
708+
709+
// Helper functions for app.js
710+
function getAllLegislation() {
711+
return LEGISLATION_DATA.map(item => {
712+
// Add jurisdiction_type for filtering
713+
let jurisdiction_type;
714+
if (item.state) {
715+
jurisdiction_type = 'state';
716+
} else if (item.jurisdiction === 'United States' || item.issuing_body || item.type === 'executive_order') {
717+
jurisdiction_type = 'federal';
718+
} else {
719+
jurisdiction_type = 'international';
720+
}
721+
return { ...item, jurisdiction_type };
722+
});
723+
}
724+
725+
function getTagCounts() {
726+
const tagCounts = {};
727+
LEGISLATION_DATA.forEach(item => {
728+
(item.tags || []).forEach(tag => {
729+
tagCounts[tag] = (tagCounts[tag] || 0) + 1;
730+
});
731+
});
732+
733+
// Sort by count descending, return top 10
734+
return Object.entries(tagCounts)
735+
.sort((a, b) => b[1] - a[1])
736+
.slice(0, 10);
737+
}

scripts/fetch_openstates.py

Lines changed: 247 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,247 @@
1+
#!/usr/bin/env python3
2+
"""
3+
Fetch AI-related state legislation from OpenStates API.
4+
5+
OpenStates provides free access to state legislature data.
6+
Get your free API key at: https://openstates.org/accounts/signup/
7+
8+
Usage:
9+
python fetch_openstates.py
10+
11+
Environment:
12+
OPENSTATES_API_KEY: Your OpenStates API key (optional for limited queries)
13+
"""
14+
15+
import json
16+
import os
17+
import sys
18+
from datetime import datetime
19+
from pathlib import Path
20+
21+
try:
22+
import requests
23+
except ImportError:
24+
print("Installing requests...")
25+
import subprocess
26+
subprocess.check_call([sys.executable, "-m", "pip", "install", "requests", "-q"])
27+
import requests
28+
29+
OPENSTATES_API_URL = "https://v3.openstates.org/graphql"
30+
31+
# AI-related search terms
32+
AI_SEARCH_TERMS = [
33+
"artificial intelligence",
34+
"machine learning",
35+
"algorithmic",
36+
"automated decision",
37+
"facial recognition",
38+
"deepfake",
39+
]
40+
41+
# States we're most interested in (active AI legislation)
42+
PRIORITY_STATES = [
43+
"CA", "CO", "CT", "IL", "NY", "TX", "UT", "TN", "WA", "VA", "MA", "NJ"
44+
]
45+
46+
47+
def fetch_bills(api_key: str, state: str, search_term: str, session: str = None) -> list:
48+
"""Fetch bills from OpenStates GraphQL API."""
49+
50+
headers = {"X-API-KEY": api_key} if api_key else {}
51+
52+
query = """
53+
query($state: String!, $searchQuery: String!, $first: Int!) {
54+
bills(
55+
jurisdiction: $state
56+
searchQuery: $searchQuery
57+
first: $first
58+
sort: "updated_desc"
59+
) {
60+
edges {
61+
node {
62+
id
63+
identifier
64+
title
65+
classification
66+
subject
67+
updatedAt
68+
createdAt
69+
legislativeSession {
70+
identifier
71+
name
72+
}
73+
fromOrganization {
74+
name
75+
}
76+
abstracts {
77+
abstract
78+
}
79+
actions {
80+
description
81+
date
82+
classification
83+
}
84+
sources {
85+
url
86+
}
87+
}
88+
}
89+
}
90+
}
91+
"""
92+
93+
variables = {
94+
"state": state.lower(),
95+
"searchQuery": search_term,
96+
"first": 25
97+
}
98+
99+
try:
100+
response = requests.post(
101+
OPENSTATES_API_URL,
102+
json={"query": query, "variables": variables},
103+
headers=headers,
104+
timeout=30
105+
)
106+
response.raise_for_status()
107+
data = response.json()
108+
109+
if "errors" in data:
110+
print(f" API errors: {data['errors']}")
111+
return []
112+
113+
bills = data.get("data", {}).get("bills", {}).get("edges", [])
114+
return [edge["node"] for edge in bills]
115+
116+
except requests.exceptions.RequestException as e:
117+
print(f" Request error for {state}: {e}")
118+
return []
119+
120+
121+
def determine_status(actions: list) -> str:
122+
"""Determine bill status from actions."""
123+
if not actions:
124+
return "pending"
125+
126+
action_text = " ".join([a.get("description", "").lower() for a in actions])
127+
classifications = []
128+
for a in actions:
129+
classifications.extend(a.get("classification", []))
130+
131+
if "became-law" in classifications or "signed" in action_text or "enacted" in action_text:
132+
return "enacted"
133+
if "governor-veto" in classifications or "vetoed" in action_text:
134+
return "vetoed"
135+
if "passage" in classifications:
136+
return "active" # Passed one chamber
137+
138+
return "pending"
139+
140+
141+
def transform_bill(bill: dict, state: str) -> dict:
142+
"""Transform OpenStates bill to our schema."""
143+
144+
# Get abstract/summary
145+
abstracts = bill.get("abstracts", [])
146+
summary = abstracts[0]["abstract"] if abstracts else bill.get("title", "")
147+
148+
# Get source URL
149+
sources = bill.get("sources", [])
150+
source_url = sources[0]["url"] if sources else ""
151+
152+
# Determine status
153+
status = determine_status(bill.get("actions", []))
154+
155+
# Get dates from actions
156+
actions = bill.get("actions", [])
157+
date_introduced = None
158+
date_enacted = None
159+
effective_date = None
160+
161+
for action in actions:
162+
action_date = action.get("date")
163+
classifications = action.get("classification", [])
164+
165+
if "introduction" in classifications and not date_introduced:
166+
date_introduced = action_date
167+
if "became-law" in classifications:
168+
date_enacted = action_date
169+
170+
# Generate unique ID
171+
bill_id = f"openstates-{state.lower()}-{bill['identifier'].replace(' ', '-').lower()}"
172+
173+
return {
174+
"id": bill_id,
175+
"state": state,
176+
"bill_number": bill.get("identifier", ""),
177+
"title": bill.get("title", ""),
178+
"status": status,
179+
"date_introduced": date_introduced,
180+
"date_enacted": date_enacted,
181+
"effective_date": effective_date,
182+
"summary": summary[:500] if summary else "", # Truncate long summaries
183+
"key_provisions": [], # Would need NLP to extract
184+
"source_url": source_url,
185+
"tags": ["openstates", "ai"],
186+
"last_verified": datetime.now().strftime("%Y-%m-%d"),
187+
"data_source": "openstates"
188+
}
189+
190+
191+
def fetch_all_state_bills(api_key: str) -> list:
192+
"""Fetch AI-related bills from all priority states."""
193+
all_bills = []
194+
seen_ids = set()
195+
196+
for state in PRIORITY_STATES:
197+
print(f"Fetching bills for {state}...")
198+
199+
for term in AI_SEARCH_TERMS[:3]: # Limit searches to avoid rate limits
200+
bills = fetch_bills(api_key, state, term)
201+
202+
for bill in bills:
203+
bill_key = f"{state}-{bill['identifier']}"
204+
if bill_key not in seen_ids:
205+
seen_ids.add(bill_key)
206+
transformed = transform_bill(bill, state)
207+
all_bills.append(transformed)
208+
209+
print(f" Found {len([b for b in all_bills if b['state'] == state])} unique bills")
210+
211+
return all_bills
212+
213+
214+
def main():
215+
api_key = os.environ.get("OPENSTATES_API_KEY", "")
216+
217+
if not api_key:
218+
print("Warning: No OPENSTATES_API_KEY set. API may be rate limited.")
219+
print("Get a free key at: https://openstates.org/accounts/signup/")
220+
print()
221+
222+
print("Fetching AI-related state legislation from OpenStates...")
223+
print("=" * 50)
224+
225+
bills = fetch_all_state_bills(api_key)
226+
227+
print()
228+
print(f"Total bills found: {len(bills)}")
229+
230+
# Save to file
231+
output_path = Path(__file__).parent.parent / "data" / "openstates_bills.json"
232+
with open(output_path, "w") as f:
233+
json.dump(bills, f, indent=2)
234+
235+
print(f"Saved to: {output_path}")
236+
237+
# Print summary by state
238+
print()
239+
print("Bills by state:")
240+
from collections import Counter
241+
state_counts = Counter(b["state"] for b in bills)
242+
for state, count in state_counts.most_common():
243+
print(f" {state}: {count}")
244+
245+
246+
if __name__ == "__main__":
247+
main()

0 commit comments

Comments
 (0)