Skip to content

Commit 4035b3e

Browse files
authored
Merge pull request #173 from mlibrary/citation-trailing-slashes
citations and compress api output
2 parents d267a18 + 1a1b5ae commit 4035b3e

File tree

4 files changed

+31
-8
lines changed

4 files changed

+31
-8
lines changed

api/catalog_api/main.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
from fastapi import FastAPI, HTTPException
2+
from fastapi.middleware.gzip import GZipMiddleware
23
from prometheus_fastapi_instrumentator import Instrumentator
34
from prometheus_client import Histogram
45

@@ -9,6 +10,7 @@
910
app = FastAPI(
1011
title="Catalog Search API", description="REST API for Catalog Search Solr"
1112
)
13+
app.add_middleware(GZipMiddleware, minimum_size=1000)
1214

1315
Instrumentator().instrument(app).expose(app)
1416

api/catalog_api/marc.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,10 @@
55
from collections.abc import Callable
66
from catalog_api.entities import SearchField, FieldElement, PairedField
77

8+
# used with rstrip_chars. When we want to trim the end of a string, these are
9+
# the ones that usually should be removed.
10+
TRIM_CHARS = "/.,: "
11+
812

913
class Linkage:
1014
def __init__(self, field: pymarc.Field):
@@ -32,13 +36,16 @@ class FieldRuleset:
3236
search: list | None = None
3337
browse_sfs: str | None = None
3438
filter: Callable[..., bool] = lambda field: True
39+
rstrip_chars: str = ""
3540

3641
def has_any_subfields(self, field: pymarc.Field) -> bool:
3742
return bool(self._get_subfields(field, self.text_sfs))
3843

3944
def value_for(self, field: pymarc.Field):
4045
result = {
41-
"text": self._get_subfields(field, self.text_sfs).strip(),
46+
"text": self._get_subfields(field, self.text_sfs)
47+
.rstrip(self.rstrip_chars)
48+
.strip(),
4249
"tag": field.tag,
4350
}
4451

api/catalog_api/record.py

Lines changed: 15 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
from __future__ import annotations
22
from catalog_api.solr_client import SolrClient
33
from catalog_api.solr import SolrDocProcessor
4-
from catalog_api.marc import Processor, FieldRuleset
4+
from catalog_api.marc import Processor, FieldRuleset, TRIM_CHARS
55
import re
66
import pymarc
77
import io
@@ -585,6 +585,7 @@ class TaggedCitation:
585585
"ruleset": FieldRuleset(
586586
tags=["100", "101", "110", "111", "700", "710", "711"],
587587
text_sfs="abcdefgjklnpqtu4",
588+
rstrip_chars=TRIM_CHARS,
588589
),
589590
"ris": ["AU"],
590591
"meta": ["author"],
@@ -613,6 +614,7 @@ class TaggedCitation:
613614
filter=lambda field: (
614615
field.indicator1 == "0" and re.match("ed", field.get("e", ""))
615616
),
617+
rstrip_chars=TRIM_CHARS,
616618
),
617619
"ris": ["ED", "A2"],
618620
"meta": ["editor"],
@@ -626,8 +628,7 @@ class TaggedCitation:
626628
{
627629
"kind": "marc",
628630
"ruleset": FieldRuleset(
629-
tags=["245"],
630-
text_sfs="abnp",
631+
tags=["245"], text_sfs="abnp", rstrip_chars=TRIM_CHARS
631632
),
632633
"ris": ["JF", "T1", "TI"], # JF seems sus; how do we know it's a journal?
633634
"meta": ["title", "journal_title"],
@@ -697,6 +698,7 @@ class TaggedCitation:
697698
"ruleset": FieldRuleset(
698699
tags=["264", "260"],
699700
text_sfs="b",
701+
rstrip_chars=TRIM_CHARS,
700702
),
701703
"ris": ["PB"],
702704
"meta": ["publisher"],
@@ -1078,10 +1080,7 @@ def type(self):
10781080
@property
10791081
def title(self):
10801082
rulesets = (
1081-
FieldRuleset(
1082-
tags=["245"],
1083-
text_sfs="abp",
1084-
),
1083+
FieldRuleset(tags=["245"], text_sfs="abp", rstrip_chars=TRIM_CHARS),
10851084
)
10861085
return self._get_marc_content(rulesets)
10871086

@@ -1113,11 +1112,13 @@ def publisher_place(self):
11131112
FieldRuleset(
11141113
tags=["260"],
11151114
text_sfs="a",
1115+
rstrip_chars=TRIM_CHARS,
11161116
),
11171117
FieldRuleset(
11181118
tags=["264"],
11191119
text_sfs="a",
11201120
filter=lambda field: (field.indicator2 == "1"),
1121+
rstrip_chars=TRIM_CHARS,
11211122
),
11221123
)
11231124
return self._get_marc_content(rulesets)
@@ -1128,11 +1129,13 @@ def publisher(self):
11281129
FieldRuleset(
11291130
tags=["260"],
11301131
text_sfs="b",
1132+
rstrip_chars=TRIM_CHARS,
11311133
),
11321134
FieldRuleset(
11331135
tags=["264"],
11341136
text_sfs="b",
11351137
filter=lambda field: (field.indicator2 == "1"),
1138+
rstrip_chars=TRIM_CHARS,
11361139
),
11371140
)
11381141
return self._get_marc_content(rulesets)
@@ -1163,13 +1166,15 @@ def author(self):
11631166
filter=lambda field: (
11641167
field.indicator1 == "1" and field.get("e") not in field_e_strings
11651168
),
1169+
rstrip_chars=TRIM_CHARS,
11661170
),
11671171
FieldRuleset(
11681172
tags=["100", "700"],
11691173
text_sfs="ab",
11701174
filter=lambda field: (
11711175
field.indicator1 == "0" and field.get("e") not in field_e_strings
11721176
),
1177+
rstrip_chars=TRIM_CHARS,
11731178
),
11741179
)
11751180

@@ -1180,6 +1185,7 @@ def author(self):
11801185
tags=["110", "111", "710", "711"],
11811186
text_sfs="ab",
11821187
filter=lambda field: (field.get("e") not in field_e_strings),
1188+
rstrip_chars=TRIM_CHARS,
11831189
),
11841190
)
11851191
corporate_authors = self._to_literal(
@@ -1201,13 +1207,15 @@ def editor(self):
12011207
filter=lambda field: (
12021208
field.indicator1 == "1" and field.get("e") in field_e_strings
12031209
),
1210+
rstrip_chars=TRIM_CHARS,
12041211
),
12051212
FieldRuleset(
12061213
tags=["700"],
12071214
text_sfs="ab",
12081215
filter=lambda field: (
12091216
field.indicator1 == "0" and field.get("e") in field_e_strings
12101217
),
1218+
rstrip_chars=TRIM_CHARS,
12111219
),
12121220
)
12131221
result = self._to_author(self._get_marc_contents(rulesets))

api/tests/test_record.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1519,6 +1519,12 @@ def test_title(self):
15191519

15201520
assert subject.title == "a b p"
15211521

1522+
def test_title_does_not_have_trailing_chars(self):
1523+
record = create_record_with_paired_field(tag="245")
1524+
record["245"]["p"] = "p . /,: / "
1525+
subject = CSL(marc_record=record)
1526+
assert subject.title == "a b p"
1527+
15221528
def test_call_number(self, solr_bib):
15231529
solr_bib["callnumber"].append("some other call number")
15241530
subject = CSL(solr_doc=solr_bib)

0 commit comments

Comments
 (0)