|
| 1 | +#!/usr/bin/env python3 |
| 2 | +""" |
| 3 | +Fetch AI-related state legislation from OpenStates API. |
| 4 | +
|
| 5 | +OpenStates provides free access to state legislature data. |
| 6 | +Get your free API key at: https://openstates.org/accounts/signup/ |
| 7 | +
|
| 8 | +Usage: |
| 9 | + python fetch_openstates.py |
| 10 | +
|
| 11 | +Environment: |
| 12 | + OPENSTATES_API_KEY: Your OpenStates API key (optional for limited queries) |
| 13 | +""" |
| 14 | + |
| 15 | +import json |
| 16 | +import os |
| 17 | +import sys |
| 18 | +from datetime import datetime |
| 19 | +from pathlib import Path |
| 20 | + |
| 21 | +try: |
| 22 | + import requests |
| 23 | +except ImportError: |
| 24 | + print("Installing requests...") |
| 25 | + import subprocess |
| 26 | + subprocess.check_call([sys.executable, "-m", "pip", "install", "requests", "-q"]) |
| 27 | + import requests |
| 28 | + |
| 29 | +OPENSTATES_API_URL = "https://v3.openstates.org/graphql" |
| 30 | + |
| 31 | +# AI-related search terms |
| 32 | +AI_SEARCH_TERMS = [ |
| 33 | + "artificial intelligence", |
| 34 | + "machine learning", |
| 35 | + "algorithmic", |
| 36 | + "automated decision", |
| 37 | + "facial recognition", |
| 38 | + "deepfake", |
| 39 | +] |
| 40 | + |
| 41 | +# States we're most interested in (active AI legislation) |
| 42 | +PRIORITY_STATES = [ |
| 43 | + "CA", "CO", "CT", "IL", "NY", "TX", "UT", "TN", "WA", "VA", "MA", "NJ" |
| 44 | +] |
| 45 | + |
| 46 | + |
| 47 | +def fetch_bills(api_key: str, state: str, search_term: str, session: str = None) -> list: |
| 48 | + """Fetch bills from OpenStates GraphQL API.""" |
| 49 | + |
| 50 | + headers = {"X-API-KEY": api_key} if api_key else {} |
| 51 | + |
| 52 | + query = """ |
| 53 | + query($state: String!, $searchQuery: String!, $first: Int!) { |
| 54 | + bills( |
| 55 | + jurisdiction: $state |
| 56 | + searchQuery: $searchQuery |
| 57 | + first: $first |
| 58 | + sort: "updated_desc" |
| 59 | + ) { |
| 60 | + edges { |
| 61 | + node { |
| 62 | + id |
| 63 | + identifier |
| 64 | + title |
| 65 | + classification |
| 66 | + subject |
| 67 | + updatedAt |
| 68 | + createdAt |
| 69 | + legislativeSession { |
| 70 | + identifier |
| 71 | + name |
| 72 | + } |
| 73 | + fromOrganization { |
| 74 | + name |
| 75 | + } |
| 76 | + abstracts { |
| 77 | + abstract |
| 78 | + } |
| 79 | + actions { |
| 80 | + description |
| 81 | + date |
| 82 | + classification |
| 83 | + } |
| 84 | + sources { |
| 85 | + url |
| 86 | + } |
| 87 | + } |
| 88 | + } |
| 89 | + } |
| 90 | + } |
| 91 | + """ |
| 92 | + |
| 93 | + variables = { |
| 94 | + "state": state.lower(), |
| 95 | + "searchQuery": search_term, |
| 96 | + "first": 25 |
| 97 | + } |
| 98 | + |
| 99 | + try: |
| 100 | + response = requests.post( |
| 101 | + OPENSTATES_API_URL, |
| 102 | + json={"query": query, "variables": variables}, |
| 103 | + headers=headers, |
| 104 | + timeout=30 |
| 105 | + ) |
| 106 | + response.raise_for_status() |
| 107 | + data = response.json() |
| 108 | + |
| 109 | + if "errors" in data: |
| 110 | + print(f" API errors: {data['errors']}") |
| 111 | + return [] |
| 112 | + |
| 113 | + bills = data.get("data", {}).get("bills", {}).get("edges", []) |
| 114 | + return [edge["node"] for edge in bills] |
| 115 | + |
| 116 | + except requests.exceptions.RequestException as e: |
| 117 | + print(f" Request error for {state}: {e}") |
| 118 | + return [] |
| 119 | + |
| 120 | + |
| 121 | +def determine_status(actions: list) -> str: |
| 122 | + """Determine bill status from actions.""" |
| 123 | + if not actions: |
| 124 | + return "pending" |
| 125 | + |
| 126 | + action_text = " ".join([a.get("description", "").lower() for a in actions]) |
| 127 | + classifications = [] |
| 128 | + for a in actions: |
| 129 | + classifications.extend(a.get("classification", [])) |
| 130 | + |
| 131 | + if "became-law" in classifications or "signed" in action_text or "enacted" in action_text: |
| 132 | + return "enacted" |
| 133 | + if "governor-veto" in classifications or "vetoed" in action_text: |
| 134 | + return "vetoed" |
| 135 | + if "passage" in classifications: |
| 136 | + return "active" # Passed one chamber |
| 137 | + |
| 138 | + return "pending" |
| 139 | + |
| 140 | + |
| 141 | +def transform_bill(bill: dict, state: str) -> dict: |
| 142 | + """Transform OpenStates bill to our schema.""" |
| 143 | + |
| 144 | + # Get abstract/summary |
| 145 | + abstracts = bill.get("abstracts", []) |
| 146 | + summary = abstracts[0]["abstract"] if abstracts else bill.get("title", "") |
| 147 | + |
| 148 | + # Get source URL |
| 149 | + sources = bill.get("sources", []) |
| 150 | + source_url = sources[0]["url"] if sources else "" |
| 151 | + |
| 152 | + # Determine status |
| 153 | + status = determine_status(bill.get("actions", [])) |
| 154 | + |
| 155 | + # Get dates from actions |
| 156 | + actions = bill.get("actions", []) |
| 157 | + date_introduced = None |
| 158 | + date_enacted = None |
| 159 | + effective_date = None |
| 160 | + |
| 161 | + for action in actions: |
| 162 | + action_date = action.get("date") |
| 163 | + classifications = action.get("classification", []) |
| 164 | + |
| 165 | + if "introduction" in classifications and not date_introduced: |
| 166 | + date_introduced = action_date |
| 167 | + if "became-law" in classifications: |
| 168 | + date_enacted = action_date |
| 169 | + |
| 170 | + # Generate unique ID |
| 171 | + bill_id = f"openstates-{state.lower()}-{bill['identifier'].replace(' ', '-').lower()}" |
| 172 | + |
| 173 | + return { |
| 174 | + "id": bill_id, |
| 175 | + "state": state, |
| 176 | + "bill_number": bill.get("identifier", ""), |
| 177 | + "title": bill.get("title", ""), |
| 178 | + "status": status, |
| 179 | + "date_introduced": date_introduced, |
| 180 | + "date_enacted": date_enacted, |
| 181 | + "effective_date": effective_date, |
| 182 | + "summary": summary[:500] if summary else "", # Truncate long summaries |
| 183 | + "key_provisions": [], # Would need NLP to extract |
| 184 | + "source_url": source_url, |
| 185 | + "tags": ["openstates", "ai"], |
| 186 | + "last_verified": datetime.now().strftime("%Y-%m-%d"), |
| 187 | + "data_source": "openstates" |
| 188 | + } |
| 189 | + |
| 190 | + |
| 191 | +def fetch_all_state_bills(api_key: str) -> list: |
| 192 | + """Fetch AI-related bills from all priority states.""" |
| 193 | + all_bills = [] |
| 194 | + seen_ids = set() |
| 195 | + |
| 196 | + for state in PRIORITY_STATES: |
| 197 | + print(f"Fetching bills for {state}...") |
| 198 | + |
| 199 | + for term in AI_SEARCH_TERMS[:3]: # Limit searches to avoid rate limits |
| 200 | + bills = fetch_bills(api_key, state, term) |
| 201 | + |
| 202 | + for bill in bills: |
| 203 | + bill_key = f"{state}-{bill['identifier']}" |
| 204 | + if bill_key not in seen_ids: |
| 205 | + seen_ids.add(bill_key) |
| 206 | + transformed = transform_bill(bill, state) |
| 207 | + all_bills.append(transformed) |
| 208 | + |
| 209 | + print(f" Found {len([b for b in all_bills if b['state'] == state])} unique bills") |
| 210 | + |
| 211 | + return all_bills |
| 212 | + |
| 213 | + |
| 214 | +def main(): |
| 215 | + api_key = os.environ.get("OPENSTATES_API_KEY", "") |
| 216 | + |
| 217 | + if not api_key: |
| 218 | + print("Warning: No OPENSTATES_API_KEY set. API may be rate limited.") |
| 219 | + print("Get a free key at: https://openstates.org/accounts/signup/") |
| 220 | + print() |
| 221 | + |
| 222 | + print("Fetching AI-related state legislation from OpenStates...") |
| 223 | + print("=" * 50) |
| 224 | + |
| 225 | + bills = fetch_all_state_bills(api_key) |
| 226 | + |
| 227 | + print() |
| 228 | + print(f"Total bills found: {len(bills)}") |
| 229 | + |
| 230 | + # Save to file |
| 231 | + output_path = Path(__file__).parent.parent / "data" / "openstates_bills.json" |
| 232 | + with open(output_path, "w") as f: |
| 233 | + json.dump(bills, f, indent=2) |
| 234 | + |
| 235 | + print(f"Saved to: {output_path}") |
| 236 | + |
| 237 | + # Print summary by state |
| 238 | + print() |
| 239 | + print("Bills by state:") |
| 240 | + from collections import Counter |
| 241 | + state_counts = Counter(b["state"] for b in bills) |
| 242 | + for state, count in state_counts.most_common(): |
| 243 | + print(f" {state}: {count}") |
| 244 | + |
| 245 | + |
| 246 | +if __name__ == "__main__": |
| 247 | + main() |
0 commit comments