Skip to content

Commit f65ff9a

Browse files
authored
Change DB Schema (#13)
* Begin setting up tests * Tests for return format * Update for new db * Allow release ingestion to happen in parallel * Remove restriction * Add indexes * Fix tests * Schema * Add beta compose * Update container names * Remove ports * ports * Parse effective date * Update version * Readme
1 parent 3cfb4cf commit f65ff9a

File tree

24 files changed

+1932
-823
lines changed

24 files changed

+1932
-823
lines changed

.docker/docker-compose.beta.yml

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
version: '3'
2+
services:
3+
congress_parser_api:
4+
container_name: beta_congress_parser_api
5+
environment:
6+
- STAGE=prod
7+
- db_host=10.0.0.248:5432
8+
- db_table=us_code_beta
9+
ports: ["9091:9090"]
10+
congress_viewer_app:
11+
container_name: beta_congress_viewer_app
12+
volumes:
13+
- /var/www/congress-beta:/usr/src/app/build
14+
entrypoint:
15+
- "yarn"
16+
command:
17+
- "build"
18+
ports: ["3001:3000"]
19+
congress_postgres:
20+
container_name: beta_congress_postgres
21+
image: tianon/true
22+
networks:
23+
parser:
24+
external:
25+
name: docker_parser

.docker/docker-compose.prod.yml

Lines changed: 1 addition & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -11,22 +11,8 @@ services:
1111
- "yarn"
1212
command:
1313
- "build"
14-
# congress_nginx:
15-
# container_name: congress_nginx
16-
# build:
17-
# context: ../
18-
# dockerfile: .docker/nginx.dockerfile
19-
# ports:
20-
# - 80:80
21-
# - 443:443
22-
# networks:
23-
# parser:
24-
# volumes:
25-
# - /etc/letsencrypt:/etc/letsencrypt
26-
# - ../frontend/.nginx/:/etc/nginx/sites-enabled/
27-
# - /var/www/congress:/var/www/congress
2814
congress_postgres:
29-
image: tianon/true
15+
image: tianon/true
3016
networks:
3117
parser:
3218
external:

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,4 +3,5 @@
33
frontend/yarn-error.log
44
.vscode
55
.docker/docker-compose.local.yml
6+
backend/venv/
67
venv/

README.md

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,14 +8,13 @@ docker-compose .docker/docker-compose.yml up -d
88
```
99

1010
## Loading the database
11-
From the backend folder, you will need to tell it to parse some files before you can view them. importers.releases will load 3 release points from the US Code website. ~10MB compressed and put it into the database ~500MB in the db.
12-
run_through will load the first 100 bills from the senate and the first 100 bills from the house (limited to 100 for speed), you can change it on line 671.
13-
A semi up to date postgres dump is available for [download](https://congress.dev/congress.backup)
11+
From the backend folder, you will need to tell it to parse some files before you can view them. importers.releases will load 1 release point from the US Code website. ~10MB compressed and put it into the database ~500MB in the db.
12+
A semi up to date postgres dump is available for [download](https://files.congress.dev/congress_beta.backup)
1413

1514
```bash
1615
docker exec -it docker_parser_api bash
1716

1817
python3 -m billparser.importers.releases rp.json
19-
python3 -m billparser.run_through
18+
python3 -m billparser.importers.bills bills.json
2019

2120
```

backend/.docker/Dockerfile

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,4 @@ RUN pip3 install -r requirements.txt
99
COPY . /usr/src/app
1010
WORKDIR /usr/src/app
1111

12-
EXPOSE 9090
13-
EXPOSE 80
14-
1512
ENTRYPOINT ["python3", "-m", "billparser"]

backend/billparser/__main__.py

Lines changed: 17 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -33,16 +33,17 @@
3333
get_versions,
3434
get_revisions,
3535
get_revision_diff,
36+
get_latest_base
3637
)
3738
from billparser.db.models import (
38-
Chapter,
39-
Section,
40-
Content,
41-
ContentDiff,
39+
USCChapter,
40+
USCSection,
41+
USCContent,
42+
USCContentDiff,
4243
Version,
43-
Bill,
44-
BillVersion,
45-
BillContent,
44+
Legislation,
45+
LegislationVersion,
46+
LegislationContent,
4647
)
4748
from billparser.helpers import treeify
4849

@@ -112,18 +113,19 @@ def bills() -> str:
112113
start = time.time()
113114
bill_by_number = defaultdict(lambda: {})
114115
for bill in get_bills(house, senate, query, incl, decl):
115-
title = "{}-{}".format(bill.chamber[0], bill.bill_number)
116-
obj = row2dict(bill)
116+
title = "{}-{}".format(bill.chamber.value[0], bill.number)
117+
#obj = row2dict(bill)
118+
obj = bill.to_dict()
117119
obj["versions"] = [
118-
row2dict(x)
120+
x.to_dict()
119121
for x in bill.versions
120122
if (
121123
True
122124
or ( # Filter based on only the included versions?
123125
incl == "" and decl == ""
124126
)
125-
or (incl != "" and x.bill_version in incl.split(","))
126-
or (decl != "" and x.bill_version in decl.split(","))
127+
or (incl != "" and x.legislation_version in incl.split(","))
128+
or (decl != "" and x.legislation_version in decl.split(","))
127129
)
128130
]
129131
bill_by_number[title] = obj
@@ -267,9 +269,8 @@ def sections(chapter_id: int) -> str:
267269
Returns:
268270
str: Stringifed array of the rows
269271
"""
270-
latest_base = (
271-
current_session.query(Version).filter(Version.base_id == None).all()[0]
272-
)
272+
latest_base = get_latest_base()
273+
273274
res = []
274275
for section in get_sections(str(chapter_id), latest_base.version_id):
275276
res.append(section.to_dict())
@@ -311,9 +312,7 @@ def contents(section_id: int) -> str:
311312
Returns:
312313
str: Stringified array of the content rows
313314
"""
314-
latest_base = (
315-
current_session.query(Version).filter(Version.base_id == None).all()[0]
316-
)
315+
latest_base = get_latest_base()
317316
res = []
318317
for section in get_content(str(section_id), latest_base.version_id):
319318
res.append(section.to_dict())

backend/billparser/actions/__init__.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,10 @@
8383
"INSERT-CHAPTER-AT-END": [
8484
r"Title (?P<title>\d\d?A?), (?P<document_title>.+), is amended by adding at the end the following new chapter:?"
8585
],
86-
"TERM-DEFINITION": [r"The term \"(?P<term>.+?)\" means (?P<term_def>.+?)."],
86+
"TERM-DEFINITION": [
87+
r"The term \"(?P<term>.+?)\" means (?P<term_def>.+?).",
88+
r"The term (?P<term>.+?) means (?P<term_def>.+?).",
89+
],
8790
}
8891

8992
SuchCodeRegex = re.compile(r"(Section|paragraph) (?P<section>\d*)\(", re.IGNORECASE)
@@ -155,6 +158,7 @@ def __init__(self, **kwargs):
155158
self.cited_content = kwargs.get("cited_content", None)
156159
self.last_title = kwargs.get("last_title", "")
157160
self.next = kwargs.get("next", None)
161+
self.legislation_content = kwargs.get("legislation_content", None)
158162
# print(kwargs)
159163

160164
def set_action(self, action):

backend/billparser/actions/insert.py

Lines changed: 49 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
1-
from billparser.transformer import Session
1+
from billparser.db.handler import Session
22
from billparser.translater import translate_paragraph
33

4-
from billparser.db.models import ContentDiff, Section, Content
4+
from billparser.db.models import USCContentDiff, USCSection, USCContent
55

66
from billparser.actions import ActionObject
77

@@ -24,6 +24,7 @@ def recursive_content(
2424
version_id: int,
2525
last_ident: str,
2626
session: "Session",
27+
legislation_id: int,
2728
) -> None:
2829
"""
2930
This is the function that "inserts" a new block of content from a bill.
@@ -67,10 +68,10 @@ def recursive_content(
6768
" ".join(content_elem.itertext()).strip().replace("\n", " ")
6869
)
6970
if "heading" in heading.tag:
70-
content = Content(
71+
content = USCContent(
7172
content_type=search_element.tag,
7273
usc_ident=ident,
73-
section_id=section_id,
74+
usc_section_id=section_id,
7475
parent_id=content_id,
7576
order_number=order,
7677
version_id=version_id,
@@ -86,27 +87,28 @@ def recursive_content(
8687
content_str = (
8788
" ".join(content_elem.itertext()).strip().replace("\n", " ")
8889
)
89-
content = Content(
90+
content = USCContent(
9091
content_type=search_element.tag,
9192
usc_ident=ident,
92-
section_id=section_id,
93+
usc_section_id=section_id,
9394
parent_id=content_id,
9495
order_number=order,
9596
version_id=version_id,
9697
)
9798
session.add(content)
9899
session.flush()
99-
diff = ContentDiff(
100-
chapter_id=chapter_id,
100+
diff = USCContentDiff(
101+
usc_chapter_id=chapter_id,
101102
version_id=version_id,
102-
content_id=content.content_id,
103-
section_id=section_id,
103+
usc_content_id=content.usc_content_id,
104+
usc_section_id=section_id,
104105
number=enum.attrib.get("value", ""),
105106
section_display=enum.text,
106107
content_str=content_str,
107108
heading=heading.text
108109
if heading is not None and heading.text != content_str
109110
else None,
111+
legislation_content_id=legislation_id,
110112
)
111113
session.add(diff)
112114
session.commit()
@@ -116,12 +118,13 @@ def recursive_content(
116118
recursive_content(
117119
chapter_id,
118120
section_id,
119-
content.content_id,
121+
content.usc_content_id,
120122
elem,
121123
order,
122124
version_id,
123125
ident,
124126
session,
127+
legislation_id,
125128
)
126129
order = order + 1
127130

@@ -135,24 +138,30 @@ def insert_section_after(action_obj: ActionObject, session: "Session") -> None:
135138
session (Session): DB session to insert into
136139
"""
137140
cited_content = action_obj.cited_content
141+
legislation_content = action_obj.legislation_content
142+
if legislation_content is not None:
143+
legislation_id = legislation_content.legislation_content_id
144+
else:
145+
legislation_id = None
138146
new_vers_id = action_obj.version_id
139147
if action_obj.next is not None:
140148
chapter = (
141-
session.query(Section)
142-
.filter(Section.section_id == cited_content.section_id)
149+
session.query(USCSection)
150+
.filter(USCSection.usc_section_id == cited_content.usc_section_id)
143151
.all()
144152
)
145153
if len(chapter) > 0:
146-
chapter_id = chapter[0].chapter_id
154+
chapter_id = chapter[0].usc_chapter_id
147155
recursive_content(
148156
chapter_id,
149-
cited_content.section_id,
157+
cited_content.usc_section_id,
150158
cited_content.parent_id,
151159
translate_paragraph(action_obj.next)[0],
152160
cited_content.order_number + 1,
153161
new_vers_id,
154162
"/".join(cited_content.usc_ident.split("/")[:-1]),
155163
session,
164+
legislation_id,
156165
)
157166
session.commit()
158167

@@ -169,9 +178,9 @@ def insert_end(action_obj: ActionObject, session: "Session") -> None:
169178
cited_content = action_obj.cited_content
170179
if action_obj.next is not None:
171180
last_content = (
172-
session.query(Content)
173-
.filter(Content.parent_id == cited_content.content_id)
174-
.order_by(Content.order_number.desc())
181+
session.query(USCContent)
182+
.filter(USCContent.parent_id == cited_content.usc_content_id)
183+
.order_by(USCContent.order_number.desc())
175184
.limit(1)
176185
.all()
177186
)
@@ -193,43 +202,50 @@ def insert_text_end(action_obj: ActionObject, session: "Session") -> None:
193202
"""
194203
action = action_obj.action
195204
cited_content = action_obj.cited_content
196-
log.debug(cited_content.content_id)
205+
log.debug(cited_content.usc_content_id)
197206
new_vers_id = action_obj.version_id
198207
to_replace = action.get("to_replace", "")
199208
chapter = (
200-
session.query(Section)
201-
.filter(Section.section_id == cited_content.section_id)
209+
session.query(USCSection)
210+
.filter(USCSection.usc_section_id == cited_content.usc_section_id)
202211
.limit(1)
203212
.all()
204213
)
205214
diff = None
215+
legislation_content = action_obj.legislation_content
216+
if legislation_content is not None:
217+
legislation_id = legislation_content.legislation_content_id
218+
else:
219+
legislation_id = None
206220
if len(chapter) > 0:
207-
chapter_id = chapter[0].chapter_id
221+
chapter_id = chapter[0].usc_chapter_id
208222
if cited_content.heading is not None:
209223
heading_diff = cited_content.heading + " " + to_replace
210-
if heading_diff != cited_content.heading:
211-
diff = ContentDiff(
212-
content_id=cited_content.content_id,
213-
section_id=cited_content.section_id,
214-
chapter_id=chapter_id,
224+
if heading_diff != cited_content.heading and heading_diff != " ":
225+
diff = USCContentDiff(
226+
usc_content_id=cited_content.usc_content_id,
227+
usc_section_id=cited_content.usc_section_id,
228+
usc_chapter_id=chapter_id,
215229
version_id=new_vers_id,
216230
heading=heading_diff,
231+
legislation_content_id=legislation_id,
217232
)
218233
elif cited_content.content_str is not None:
219234
content_diff = cited_content.content_str + " " + to_replace
220-
if content_diff != cited_content.content_str:
221-
diff = ContentDiff(
222-
content_id=cited_content.content_id,
223-
section_id=cited_content.section_id,
224-
chapter_id=chapter_id,
235+
if content_diff != cited_content.content_str and heading_diff != " ":
236+
diff = USCContentDiff(
237+
usc_content_id=cited_content.usc_content_id,
238+
usc_section_id=cited_content.usc_section_id,
239+
usc_chapter_id=chapter_id,
225240
version_id=new_vers_id,
226241
content_str=content_diff,
242+
legislation_content_id=legislation_id,
227243
)
228244
if diff is not None:
229245
log.debug("adding")
230246
session.add(diff)
231247
session.commit()
232-
log.debug("Added diff", diff.diff_id)
248+
log.debug("Added diff", diff.usd_content_diff_id)
233249

234250

235251
def insert_text_after(action_obj: ActionObject, session: "Session") -> None:

0 commit comments

Comments
 (0)