Add OpenStates integration and fix broken source URLs

claude · claude · commit 8bac4f373136 · 2026-03-01T02:28:34.000Z
- Fix SEC, DoD, and BIS source URLs to point to actual documents - Add OpenStates API script for free state legislature data - Update workflow to fetch OpenStates data - Fix merge script to include helper functions in data.js https://claude.ai/code/session_01AuEEXJMhCdRxBkJBcEbxsQ
diff --git a/.github/workflows/update-legislation.yml b/.github/workflows/update-legislation.yml
@@ -35,6 +35,12 @@ jobs:
         run: python src/congress_gov.py --congress 118 --output .cache/congress_gov.json
         continue-on-error: true
 
+      - name: Fetch OpenStates data (free state legislature API)
+        env:
+          OPENSTATES_API_KEY: ${{ secrets.OPENSTATES_API_KEY }}
+        run: python scripts/fetch_openstates.py
+        continue-on-error: true
+
       - name: Merge new data into tracker
         run: python src/merge_api_data.py
 
diff --git a/data/us_federal_actions.json b/data/us_federal_actions.json
@@ -108,7 +108,7 @@
       "AI washing enforcement priority",
       "MD&A discussion requirements"
     ],
-    "source_url": "https://www.sec.gov",
+    "source_url": "https://www.sec.gov/newsroom/speeches-statements/gensler-ai-021324",
     "tags": [
       "disclosure",
       "securities",
@@ -154,7 +154,7 @@
       "Workforce AI training requirements",
       "Industry partnership frameworks"
     ],
-    "source_url": "https://www.defense.gov",
+    "source_url": "https://www.defense.gov/News/Releases/Release/Article/3578219/",
     "tags": [
       "defense",
       "adoption",
@@ -177,7 +177,7 @@
       "Country-specific controls (China, Russia, Iran)",
       "Cloud computing restrictions"
     ],
-    "source_url": "https://www.bis.doc.gov",
+    "source_url": "https://www.bis.doc.gov/index.php/policy-guidance/semiconductor-controls",
     "tags": [
       "export_controls",
       "chips",
diff --git a/docs/data.js b/docs/data.js
@@ -1,5 +1,5 @@
 // Auto-generated from data/*.json files
-// Last updated: 2026-03-01 01:59:30 UTC
+// Last updated: 2026-03-01 02:28:04 UTC
 
 const LEGISLATION_DATA = [
   {
@@ -626,7 +626,7 @@ const LEGISLATION_DATA = [
       "AI washing enforcement priority",
       "MD&A discussion requirements"
     ],
-    "source_url": "https://www.sec.gov",
+    "source_url": "https://www.sec.gov/newsroom/speeches-statements/gensler-ai-021324",
     "tags": [
       "disclosure",
       "securities",
@@ -672,7 +672,7 @@ const LEGISLATION_DATA = [
       "Workforce AI training requirements",
       "Industry partnership frameworks"
     ],
-    "source_url": "https://www.defense.gov",
+    "source_url": "https://www.defense.gov/News/Releases/Release/Article/3578219/",
     "tags": [
       "defense",
       "adoption",
@@ -695,7 +695,7 @@ const LEGISLATION_DATA = [
       "Country-specific controls (China, Russia, Iran)",
       "Cloud computing restrictions"
     ],
-    "source_url": "https://www.bis.doc.gov",
+    "source_url": "https://www.bis.doc.gov/index.php/policy-guidance/semiconductor-controls",
     "tags": [
       "export_controls",
       "chips",
@@ -705,3 +705,33 @@ const LEGISLATION_DATA = [
     "last_verified": "2024-12-24"
   }
 ];
+
+// Helper functions for app.js
+function getAllLegislation() {
+  return LEGISLATION_DATA.map(item => {
+    // Add jurisdiction_type for filtering
+    let jurisdiction_type;
+    if (item.state) {
+      jurisdiction_type = 'state';
+    } else if (item.jurisdiction === 'United States' || item.issuing_body || item.type === 'executive_order') {
+      jurisdiction_type = 'federal';
+    } else {
+      jurisdiction_type = 'international';
+    }
+    return { ...item, jurisdiction_type };
+  });
+}
+
+function getTagCounts() {
+  const tagCounts = {};
+  LEGISLATION_DATA.forEach(item => {
+    (item.tags || []).forEach(tag => {
+      tagCounts[tag] = (tagCounts[tag] || 0) + 1;
+    });
+  });
+
+  // Sort by count descending, return top 10
+  return Object.entries(tagCounts)
+    .sort((a, b) => b[1] - a[1])
+    .slice(0, 10);
+}
diff --git a/scripts/fetch_openstates.py b/scripts/fetch_openstates.py
@@ -0,0 +1,247 @@
+#!/usr/bin/env python3
+"""
+Fetch AI-related state legislation from OpenStates API.
+
+OpenStates provides free access to state legislature data.
+Get your free API key at: https://openstates.org/accounts/signup/
+
+Usage:
+    python fetch_openstates.py
+
+Environment:
+    OPENSTATES_API_KEY: Your OpenStates API key (optional for limited queries)
+"""
+
+import json
+import os
+import sys
+from datetime import datetime
+from pathlib import Path
+
+try:
+    import requests
+except ImportError:
+    print("Installing requests...")
+    import subprocess
+    subprocess.check_call([sys.executable, "-m", "pip", "install", "requests", "-q"])
+    import requests
+
+OPENSTATES_API_URL = "https://v3.openstates.org/graphql"
+
+# AI-related search terms
+AI_SEARCH_TERMS = [
+    "artificial intelligence",
+    "machine learning",
+    "algorithmic",
+    "automated decision",
+    "facial recognition",
+    "deepfake",
+]
+
+# States we're most interested in (active AI legislation)
+PRIORITY_STATES = [
+    "CA", "CO", "CT", "IL", "NY", "TX", "UT", "TN", "WA", "VA", "MA", "NJ"
+]
+
+
+def fetch_bills(api_key: str, state: str, search_term: str, session: str = None) -> list:
+    """Fetch bills from OpenStates GraphQL API."""
+
+    headers = {"X-API-KEY": api_key} if api_key else {}
+
+    query = """
+    query($state: String!, $searchQuery: String!, $first: Int!) {
+        bills(
+            jurisdiction: $state
+            searchQuery: $searchQuery
+            first: $first
+            sort: "updated_desc"
+        ) {
+            edges {
+                node {
+                    id
+                    identifier
+                    title
+                    classification
+                    subject
+                    updatedAt
+                    createdAt
+                    legislativeSession {
+                        identifier
+                        name
+                    }
+                    fromOrganization {
+                        name
+                    }
+                    abstracts {
+                        abstract
+                    }
+                    actions {
+                        description
+                        date
+                        classification
+                    }
+                    sources {
+                        url
+                    }
+                }
+            }
+        }
+    }
+    """
+
+    variables = {
+        "state": state.lower(),
+        "searchQuery": search_term,
+        "first": 25
+    }
+
+    try:
+        response = requests.post(
+            OPENSTATES_API_URL,
+            json={"query": query, "variables": variables},
+            headers=headers,
+            timeout=30
+        )
+        response.raise_for_status()
+        data = response.json()
+
+        if "errors" in data:
+            print(f"  API errors: {data['errors']}")
+            return []
+
+        bills = data.get("data", {}).get("bills", {}).get("edges", [])
+        return [edge["node"] for edge in bills]
+
+    except requests.exceptions.RequestException as e:
+        print(f"  Request error for {state}: {e}")
+        return []
+
+
+def determine_status(actions: list) -> str:
+    """Determine bill status from actions."""
+    if not actions:
+        return "pending"
+
+    action_text = " ".join([a.get("description", "").lower() for a in actions])
+    classifications = []
+    for a in actions:
+        classifications.extend(a.get("classification", []))
+
+    if "became-law" in classifications or "signed" in action_text or "enacted" in action_text:
+        return "enacted"
+    if "governor-veto" in classifications or "vetoed" in action_text:
+        return "vetoed"
+    if "passage" in classifications:
+        return "active"  # Passed one chamber
+
+    return "pending"
+
+
+def transform_bill(bill: dict, state: str) -> dict:
+    """Transform OpenStates bill to our schema."""
+
+    # Get abstract/summary
+    abstracts = bill.get("abstracts", [])
+    summary = abstracts[0]["abstract"] if abstracts else bill.get("title", "")
+
+    # Get source URL
+    sources = bill.get("sources", [])
+    source_url = sources[0]["url"] if sources else ""
+
+    # Determine status
+    status = determine_status(bill.get("actions", []))
+
+    # Get dates from actions
+    actions = bill.get("actions", [])
+    date_introduced = None
+    date_enacted = None
+    effective_date = None
+
+    for action in actions:
+        action_date = action.get("date")
+        classifications = action.get("classification", [])
+
+        if "introduction" in classifications and not date_introduced:
+            date_introduced = action_date
+        if "became-law" in classifications:
+            date_enacted = action_date
+
+    # Generate unique ID
+    bill_id = f"openstates-{state.lower()}-{bill['identifier'].replace(' ', '-').lower()}"
+
+    return {
+        "id": bill_id,
+        "state": state,
+        "bill_number": bill.get("identifier", ""),
+        "title": bill.get("title", ""),
+        "status": status,
+        "date_introduced": date_introduced,
+        "date_enacted": date_enacted,
+        "effective_date": effective_date,
+        "summary": summary[:500] if summary else "",  # Truncate long summaries
+        "key_provisions": [],  # Would need NLP to extract
+        "source_url": source_url,
+        "tags": ["openstates", "ai"],
+        "last_verified": datetime.now().strftime("%Y-%m-%d"),
+        "data_source": "openstates"
+    }
+
+
+def fetch_all_state_bills(api_key: str) -> list:
+    """Fetch AI-related bills from all priority states."""
+    all_bills = []
+    seen_ids = set()
+
+    for state in PRIORITY_STATES:
+        print(f"Fetching bills for {state}...")
+
+        for term in AI_SEARCH_TERMS[:3]:  # Limit searches to avoid rate limits
+            bills = fetch_bills(api_key, state, term)
+
+            for bill in bills:
+                bill_key = f"{state}-{bill['identifier']}"
+                if bill_key not in seen_ids:
+                    seen_ids.add(bill_key)
+                    transformed = transform_bill(bill, state)
+                    all_bills.append(transformed)
+
+        print(f"  Found {len([b for b in all_bills if b['state'] == state])} unique bills")
+
+    return all_bills
+
+
+def main():
+    api_key = os.environ.get("OPENSTATES_API_KEY", "")
+
+    if not api_key:
+        print("Warning: No OPENSTATES_API_KEY set. API may be rate limited.")
+        print("Get a free key at: https://openstates.org/accounts/signup/")
+        print()
+
+    print("Fetching AI-related state legislation from OpenStates...")
+    print("=" * 50)
+
+    bills = fetch_all_state_bills(api_key)
+
+    print()
+    print(f"Total bills found: {len(bills)}")
+
+    # Save to file
+    output_path = Path(__file__).parent.parent / "data" / "openstates_bills.json"
+    with open(output_path, "w") as f:
+        json.dump(bills, f, indent=2)
+
+    print(f"Saved to: {output_path}")
+
+    # Print summary by state
+    print()
+    print("Bills by state:")
+    from collections import Counter
+    state_counts = Counter(b["state"] for b in bills)
+    for state, count in state_counts.most_common():
+        print(f"  {state}: {count}")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/src/merge_api_data.py b/src/merge_api_data.py