|
| 1 | +from firebase_functions.firestore_fn import ( |
| 2 | + Event, |
| 3 | + DocumentSnapshot, |
| 4 | +) |
| 5 | +from llm_functions import get_summary_api_function, get_tags_api_function_v2 |
| 6 | +from typing import TypedDict, NewType |
| 7 | + |
| 8 | +Category = NewType("Category", str) |
| 9 | + |
| 10 | + |
| 11 | +# This allows us to type the return of `get_topics` |
| 12 | +class TopicAndCategory(TypedDict): |
| 13 | + # We use the name `tag` in Python, but `topic` in the database |
| 14 | + topic: str |
| 15 | + # Topic can be mapped directly to a category |
| 16 | + category: Category |
| 17 | + |
| 18 | + |
| 19 | +# Get the corresponding categories for every topic if it is present in the |
| 20 | +# topic_to_category list |
| 21 | +def get_categories_from_topics( |
| 22 | + topics: list[str], topic_to_category: dict[str, Category] |
| 23 | +) -> list[TopicAndCategory]: |
| 24 | + return [ |
| 25 | + TopicAndCategory(topic=topic, category=topic_to_category[topic]) |
| 26 | + for topic in topics |
| 27 | + if topic_to_category.get(topic) |
| 28 | + ] |
| 29 | + |
| 30 | + |
| 31 | +# When a bill is created for a given session, we want to populate both the |
| 32 | +# summary and the tags for that bill. This is an idempotent function. |
| 33 | +def run_trigger(event: Event[DocumentSnapshot | None]) -> None: |
| 34 | + bill_id = event.params["bill_id"] |
| 35 | + inserted_data = event.data |
| 36 | + if inserted_data is None: |
| 37 | + print(f"bill with id `{bill_id}` has no event data") |
| 38 | + return |
| 39 | + |
| 40 | + inserted_content = inserted_data.to_dict() |
| 41 | + if inserted_content is None: |
| 42 | + print(f"bill with id `{bill_id}` has no inserted content") |
| 43 | + return |
| 44 | + |
| 45 | + # If the summary is already populated, only run the tags code |
| 46 | + summary = inserted_content.get("summary") |
| 47 | + if summary is None: |
| 48 | + document_text = inserted_content.get("contents", {}).get("DocumentText") |
| 49 | + document_title = inserted_content.get("contents", {}).get("Title") |
| 50 | + if document_text is None or document_title is None: |
| 51 | + print(f"bill with id `{bill_id}` unable to fetch document text or title") |
| 52 | + return |
| 53 | + |
| 54 | + summary = get_summary_api_function(bill_id, document_title, document_text) |
| 55 | + |
| 56 | + if summary["status"] in [-1, -2]: |
| 57 | + print( |
| 58 | + f"failed to generate summary for bill with id `{bill_id}`, got {summary['status']}" |
| 59 | + ) |
| 60 | + return |
| 61 | + |
| 62 | + # Set and insert the summary for the categorization step |
| 63 | + summary = summary["summary"] |
| 64 | + inserted_data.reference.update({"summary": summary}) |
| 65 | + print(f"Successfully updated summary for bill with id `{bill_id}`") |
| 66 | + |
| 67 | + # If the topics are already populated, we are done |
| 68 | + topics = inserted_content.get("topics") |
| 69 | + if topics is not None: |
| 70 | + print(f"bill with id `{bill_id}` has topics") |
| 71 | + return |
| 72 | + |
| 73 | + tags = get_tags_api_function_v2(bill_id, document_title, summary) |
| 74 | + |
| 75 | + if tags["status"] != 1: |
| 76 | + print( |
| 77 | + f"failed to generate tags for bill with id `{bill_id}`, got {tags['status']}" |
| 78 | + ) |
| 79 | + return |
| 80 | + topics_and_categories = get_categories_from_topics(tags["tags"], CATEGORY_BY_TOPIC) |
| 81 | + inserted_data.reference.update({"topics": topics_and_categories}) |
| 82 | + print(f"Successfully updated topics for bill with id `{bill_id}`") |
| 83 | + return |
| 84 | + |
| 85 | + |
| 86 | +TOPICS_BY_CATEGORY: dict[Category, list[str]] = { |
| 87 | + Category("Commerce"): [ |
| 88 | + "Banking and financial institutions regulation", |
| 89 | + "Consumer protection", |
| 90 | + "Corporation law and goverance", |
| 91 | + "Commercial insurance", |
| 92 | + "Marketing and advertising", |
| 93 | + "Non-profit law and governance", |
| 94 | + "Occupational licensing", |
| 95 | + "Partnerships and limited liability companies", |
| 96 | + "Retail and wholesale trades", |
| 97 | + "Securities", |
| 98 | + ], |
| 99 | + Category("Crime and Law Enforcement"): [ |
| 100 | + "Assault and harassment offenses", |
| 101 | + "Correctional facilities", |
| 102 | + "Crimes against animals and natural resources", |
| 103 | + "Crimes against children", |
| 104 | + "Criminal investigation, prosecution, interrogation", |
| 105 | + "Criminal justice information and records", |
| 106 | + "Criminal justice reform", |
| 107 | + "Criminal sentencing", |
| 108 | + "Firearms and explosives", |
| 109 | + "Fraud offenses and financial crimes", |
| 110 | + "Property crimes", |
| 111 | + ], |
| 112 | + Category("Economics and Public Finance"): [ |
| 113 | + "Budget process", |
| 114 | + "Debt collection", |
| 115 | + "Eminent domain", |
| 116 | + "Financial literacy", |
| 117 | + "Financial services and investments", |
| 118 | + "Government contractors", |
| 119 | + "Pension and retirement benefits", |
| 120 | + ], |
| 121 | + Category("Education"): [ |
| 122 | + "Academic performance and assessments", |
| 123 | + "Adult education and literacy", |
| 124 | + "Charter and private schools", |
| 125 | + "Curriculum and standards", |
| 126 | + "Education technology", |
| 127 | + "Educational facilities and institutions", |
| 128 | + "Elementary and secondary education", |
| 129 | + "Higher education", |
| 130 | + "Special education", |
| 131 | + "Student aid and college costs", |
| 132 | + "Teachers and educators", |
| 133 | + "Vocational and technical education", |
| 134 | + ], |
| 135 | + Category("Emergency Management"): [ |
| 136 | + "Disaster relief and insurance", |
| 137 | + "Emergency communications systems", |
| 138 | + "Emergency medical services and trauma care", |
| 139 | + "Emergency planning and evacuation", |
| 140 | + "Hazards and emergency operations", |
| 141 | + ], |
| 142 | + Category("Energy"): [ |
| 143 | + "Energy costs assistance", |
| 144 | + "Energy efficiency and conservation", |
| 145 | + "Energy infrastructure and storage", |
| 146 | + "Energy prices and subsidies", |
| 147 | + "Energy research", |
| 148 | + "Renewable energy sources", |
| 149 | + ], |
| 150 | + Category("Environmental Protection"): [ |
| 151 | + "Air quality", |
| 152 | + "Environmental assessment, monitoring, research", |
| 153 | + "Environmental education", |
| 154 | + "Environmental health", |
| 155 | + "Environmental regulatory procedures", |
| 156 | + "Hazardous wastes and toxic substances", |
| 157 | + "Pollution control and abatement", |
| 158 | + "Soil pollution", |
| 159 | + "Trash and recycling", |
| 160 | + "Water quality", |
| 161 | + "Wetlands", |
| 162 | + "Wildlife conservation", |
| 163 | + ], |
| 164 | + Category("Families"): [ |
| 165 | + "Adoption and foster care", |
| 166 | + "Family planning and birth control", |
| 167 | + "Family relationships and status", |
| 168 | + "Family services", |
| 169 | + "Life insurance", |
| 170 | + "Parenting and parental rights", |
| 171 | + ], |
| 172 | + Category("Food, Drugs, and Alcohol"): [ |
| 173 | + "Alcoholic beverages and licenses", |
| 174 | + "Drug, alcohol, tobacco use", |
| 175 | + "Drug safety, medical device, and laboratory regulation", |
| 176 | + "Food industry and services", |
| 177 | + "Food service employment", |
| 178 | + "Food supply, safety, and labeling", |
| 179 | + "Nutrition and diet", |
| 180 | + ], |
| 181 | + Category("Government Operations and Elections"): [ |
| 182 | + "Census and government statistics", |
| 183 | + "Government information and archives", |
| 184 | + "Government studies and investigations", |
| 185 | + "Government trust funds", |
| 186 | + "Lobbying and campaign finance", |
| 187 | + 'Municipality oversight and "home rule petitions"', |
| 188 | + "Political advertising", |
| 189 | + "Public-private partnerships", |
| 190 | + "Voting and elections", |
| 191 | + ], |
| 192 | + Category("Healthcare"): [ |
| 193 | + "Alternative treatments", |
| 194 | + "Dental care", |
| 195 | + "Health care costs", |
| 196 | + "Health facilities and institutions", |
| 197 | + "Health information and medical records", |
| 198 | + "Health insurance and coverage", |
| 199 | + "Health technology, devices, supplies", |
| 200 | + "Healthcare workforce", |
| 201 | + "Medical research", |
| 202 | + "Mental health", |
| 203 | + "Prescription drugs", |
| 204 | + "Sex and reproductive health", |
| 205 | + "Substance use disorder and addiction", |
| 206 | + "Telehealth", |
| 207 | + "Veterinary services and pets", |
| 208 | + ], |
| 209 | + Category("Housing and Community Development"): [ |
| 210 | + "Community life and organization", |
| 211 | + "Cooperative and condominium housing", |
| 212 | + "Homelessness and emergency shelter", |
| 213 | + "Housing discrimination", |
| 214 | + "Housing finance and home ownership", |
| 215 | + "Housing for the elderly and disabled", |
| 216 | + "Housing industry and standards", |
| 217 | + "Housing supply and affordability", |
| 218 | + "Landlord and tenant", |
| 219 | + "Low- and moderate-income housing", |
| 220 | + "Residential rehabilitation and home repair", |
| 221 | + ], |
| 222 | + Category("Immigrants and Foreign Nationals"): [ |
| 223 | + "Immigrant health and welfare", |
| 224 | + "Refugees, asylum, displaced persons", |
| 225 | + "Right to shelter", |
| 226 | + "Translation and language services", |
| 227 | + ], |
| 228 | + Category("Labor and Employment"): [ |
| 229 | + "Employee benefits", |
| 230 | + "Employment discrimination", |
| 231 | + "Employee leave", |
| 232 | + "Employee pensions", |
| 233 | + "Employee performance", |
| 234 | + "Migrant, seasonal, agricultural labor", |
| 235 | + "Self-employment", |
| 236 | + "Temporary and part-time employment", |
| 237 | + "Workers' compensation", |
| 238 | + "Workforce development and employment training", |
| 239 | + "Worker safety and health", |
| 240 | + "Youth employment and child labor", |
| 241 | + ], |
| 242 | + Category("Law and Judiciary"): [ |
| 243 | + "Civil disturbances", |
| 244 | + "Evidence and witnesses", |
| 245 | + "Judicial and court records", |
| 246 | + "Judicial review and appeals", |
| 247 | + "Jurisdiction and venue", |
| 248 | + "Legal fees and court costs", |
| 249 | + ], |
| 250 | + Category("Public and Natural Resources"): [ |
| 251 | + "Agriculture and aquaculture", |
| 252 | + "Coastal zones and ocean", |
| 253 | + "Forests, forestry, trees", |
| 254 | + "Monuments and memorials", |
| 255 | + "Watershed and water resources", |
| 256 | + "Wildlife", |
| 257 | + ], |
| 258 | + Category("Social Services"): [ |
| 259 | + "Child care and development", |
| 260 | + "Domestic violence and child abuse", |
| 261 | + "Food assistance and relief", |
| 262 | + "Home and outpatient care", |
| 263 | + "Social work, volunteer service, charitable organizations", |
| 264 | + "Unemployment", |
| 265 | + "Urban and suburban affairs and development", |
| 266 | + "Veterans' education, employment, rehabilitation", |
| 267 | + "Veterans' loans, housing, homeless programs", |
| 268 | + "Veterans' medical care", |
| 269 | + ], |
| 270 | + Category("Sports and Recreation"): [ |
| 271 | + "Art and culture", |
| 272 | + "Gambling and lottery", |
| 273 | + "Hunting and fishing", |
| 274 | + "Outdoor recreation", |
| 275 | + "Professional sports, stadiums and arenas", |
| 276 | + "Public parks", |
| 277 | + "Sports and recreation facilities", |
| 278 | + ], |
| 279 | + Category("Taxation"): [ |
| 280 | + "Capital gains tax", |
| 281 | + "Corporate tax", |
| 282 | + "Estate tax", |
| 283 | + "Excise tax", |
| 284 | + "Gift tax", |
| 285 | + "Income tax", |
| 286 | + "Payroll and emplyoment tax", |
| 287 | + "Property tax", |
| 288 | + "Sales tax", |
| 289 | + "Tax-exempt organizations", |
| 290 | + "Transfer and inheritance taxes", |
| 291 | + ], |
| 292 | + Category("Technology and Communications"): [ |
| 293 | + "Advanced technology and technological innovations", |
| 294 | + "Atmospheric science and weather", |
| 295 | + "Broadband and internet access", |
| 296 | + "Computers and information technology", |
| 297 | + "Cybersecurity and identity theft", |
| 298 | + "Data privacy", |
| 299 | + "Emerging technology (artificial intelligence, blockchain, etc.)", |
| 300 | + "Genetics", |
| 301 | + "Internet, web applications, social media", |
| 302 | + "Photography and imaging", |
| 303 | + "Telecommunication rates and fees", |
| 304 | + "Telephone and wireless communication", |
| 305 | + ], |
| 306 | + Category("Transportation and Public Works"): [ |
| 307 | + "Aviation and airports", |
| 308 | + "Highways and roads", |
| 309 | + "MBTA & public transportation", |
| 310 | + "Public utilities and utility rates", |
| 311 | + "Railroads", |
| 312 | + "Vehicle insurance and repairs", |
| 313 | + "Water storage", |
| 314 | + "Water use and supply", |
| 315 | + ], |
| 316 | +} |
| 317 | + |
| 318 | +CATEGORY_BY_TOPIC: dict[str, Category] = { |
| 319 | + topic: category |
| 320 | + for category, topics in TOPICS_BY_CATEGORY.items() |
| 321 | + for topic in topics |
| 322 | +} |
0 commit comments