|
2 | 2 | import datetime |
3 | 3 |
|
4 | 4 | facilities_schema = { |
5 | | - "scraped_date": datetime.datetime.now(datetime.UTC), |
6 | | - "scrape_runtime": 0, |
7 | 5 | "enrich_runtime": 0, |
8 | 6 | "facilities": {}, |
| 7 | + "scrape_runtime": 0, |
| 8 | + "scraped_date": datetime.datetime.now(datetime.UTC), |
9 | 9 | } |
10 | 10 |
|
11 | 11 | field_offices_schema: dict = { |
12 | 12 | "field_offices": {}, |
13 | | - "scraped_date": datetime.datetime.now(datetime.UTC), |
14 | 13 | "scrape_runtime": 0, |
| 14 | + "scraped_date": datetime.datetime.now(datetime.UTC), |
15 | 15 | } |
16 | 16 |
|
17 | 17 | field_office_schema: dict = { |
18 | | - "name": "", |
19 | | - "field_office": "", |
20 | | - "id": "", |
21 | | - "address_str": "", |
22 | 18 | "address": { |
23 | 19 | "administrative_area": "", |
24 | 20 | "country": "", |
25 | 21 | "locality": "", |
26 | 22 | "postal_code": "", |
27 | 23 | "street": "", |
28 | 24 | }, |
| 25 | + "address_str": "", |
29 | 26 | "aor": "", |
30 | 27 | "email": "", |
| 28 | + "field_office": "", |
| 29 | + "id": "", |
31 | 30 | "source_urls": [], |
32 | 31 | } |
33 | 32 |
|
34 | 33 | # default keys to "false"-y values so we can merge easier |
35 | 34 | facility_schema: dict = { |
| 35 | + "_repaired_record": False, |
36 | 36 | "address": { |
37 | 37 | "administrative_area": "", |
38 | 38 | "country": "", |
|
41 | 41 | "street": "", |
42 | 42 | }, |
43 | 43 | "address_str": "", |
44 | | - "_repaired_record": False, |
45 | 44 | "field_office": copy.deepcopy(field_office_schema), |
46 | | - "image_url": "", |
47 | | - "name": "", |
48 | | - "phone": "", |
49 | | - "source_urls": [], |
50 | | - "wikipedia": { |
51 | | - "page_url": "", |
52 | | - "search_query": "", |
| 45 | + "facility_type": { |
| 46 | + "description": "", |
| 47 | + "expanded_name": "", |
| 48 | + "id": "", |
53 | 49 | }, |
54 | | - "wikidata": { |
55 | | - "page_url": "", |
56 | | - "search_query": "", |
| 50 | + "inspection": { |
| 51 | + "last_date": None, |
| 52 | + "last_rating": "", |
| 53 | + "last_type": "", |
57 | 54 | }, |
| 55 | + "image_url": "", |
58 | 56 | "osm": { |
59 | | - "url": "", |
60 | 57 | "latitude": 0, |
61 | 58 | "longitude": 0, |
62 | 59 | "search_query": "", |
| 60 | + "url": "", |
63 | 61 | }, |
| 62 | + "name": "", |
64 | 63 | "page_updated_date": None, |
| 64 | + "phone": "", |
65 | 65 | "population": { |
66 | | - "male": { |
| 66 | + "avg_stay_length": 0, |
| 67 | + "female": { |
67 | 68 | "allowed": False, |
68 | 69 | "criminal": 0, |
69 | 70 | "non_criminal": 0, |
70 | 71 | }, |
71 | | - "female": { |
| 72 | + "male": { |
72 | 73 | "allowed": False, |
73 | 74 | "criminal": 0, |
74 | 75 | "non_criminal": 0, |
75 | 76 | }, |
| 77 | + "housing": { |
| 78 | + "mandatory": 0, |
| 79 | + "guaranteed_min": 0, |
| 80 | + }, |
76 | 81 | "ice_threat_level": { |
77 | 82 | "level_1": 0, |
78 | 83 | "level_2": 0, |
79 | 84 | "level_3": 0, |
80 | 85 | "none": 0, |
81 | 86 | }, |
82 | | - "total": 0, |
83 | 87 | "security_threat": { |
84 | 88 | "low": 0, |
85 | 89 | "medium_low": 0, |
86 | 90 | "medium_high": 0, |
87 | 91 | "high": 0, |
88 | 92 | }, |
89 | | - "housing": { |
90 | | - "mandatory": 0, |
91 | | - "guaranteed_min": 0, |
92 | | - }, |
93 | | - "avg_stay_length": 0, |
| 93 | + "total": 0, |
94 | 94 | }, |
95 | | - "facility_type": { |
96 | | - "id": "", |
97 | | - "description": "", |
98 | | - "expanded_name": "", |
| 95 | + "source_urls": [], |
| 96 | + "wikipedia": { |
| 97 | + "page_url": "", |
| 98 | + "search_query": "", |
99 | 99 | }, |
100 | | - "inspection": { |
101 | | - "last_type": "", |
102 | | - "last_date": None, |
103 | | - "last_rating": "", |
| 100 | + "wikidata": { |
| 101 | + "page_url": "", |
| 102 | + "search_query": "", |
104 | 103 | }, |
105 | 104 | } |
106 | 105 |
|
107 | 106 |
|
108 | 107 | # enrichment response object |
109 | 108 | enrich_resp_schema = { |
110 | | - "original_name": "", |
111 | 109 | "cleaned_name": "", |
112 | | - "search_query_steps": [], |
113 | | - "url": "", |
114 | 110 | "details": {}, |
115 | | - "method": "none", |
116 | 111 | "enrichment_type": "", |
| 112 | + "method": "none", |
| 113 | + "original_name": "", |
| 114 | + "search_query_steps": [], |
| 115 | + "url": "", |
117 | 116 | } |
118 | 117 |
|
119 | 118 | # enrichment print details |
120 | 119 | enrichment_print_schema = { |
| 120 | + "osm_found": 0, |
121 | 121 | "wiki_found": 0, |
122 | 122 | "wikidata_found": 0, |
123 | | - "osm_found": 0, |
124 | 123 | } |
125 | 124 |
|
126 | | -supported_output_types = ["csv", "json", "xlsx", "parquet"] |
| 125 | +supported_output_types = ["csv", "json", "parquet", "xlsx"] |
0 commit comments