Skip to content

Commit 81e7a70

Browse files
authored
Merge pull request #177 from mlibrary/api-stub-in-search-results
LIBSEARCH-1164-stub-in-search-results
2 parents 2e05d9c + b4687ff commit 81e7a70

File tree

9 files changed

+3246
-0
lines changed

9 files changed

+3246
-0
lines changed

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,3 +53,6 @@ overlap/*.tsv
5353

5454
#this is so github actions doesn't try to add all the gems to the repo when updating a translation map
5555
vendor/
56+
57+
api/*.md
58+
api/*.txt

api/catalog_api/main.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
from catalog_api import schemas
77
from catalog_api.solr_client import NotFoundError
88
from catalog_api.record import record_for
9+
from catalog_api.results import Results
910

1011
app = FastAPI(
1112
title="Catalog Search API", description="REST API for Catalog Search Solr"
@@ -42,3 +43,12 @@ def get_record(id: str) -> schemas.Record:
4243
return result
4344
except NotFoundError:
4445
raise HTTPException(status_code=404, detail="Item not found")
46+
47+
48+
@app.get("/search", response_model_exclude_none=True)
49+
def get_search_results(offset: int = 0) -> schemas.Results:
50+
"""
51+
Does a search in catalog solr
52+
"""
53+
results = Results({"offset": offset})
54+
return results

api/catalog_api/results.py

Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
import json
2+
from pathlib import Path
3+
from dataclasses import dataclass
4+
from catalog_api.record import Record
5+
6+
7+
class Results:
8+
fixture_path = Path(__file__).parents[0] / "../tests/fixtures/results/"
9+
with open(fixture_path / "page1.json") as f:
10+
page1 = json.load(f)
11+
12+
with open(fixture_path / "page2.json") as f:
13+
page2 = json.load(f)
14+
15+
with open(fixture_path / "page3.json") as f:
16+
page3 = json.load(f)
17+
18+
def __init__(self, data: dict):
19+
offset = data["offset"]
20+
if offset < 10:
21+
self.data = self.page1
22+
elif offset < 20:
23+
self.data = self.page2
24+
else:
25+
self.data = self.page3
26+
27+
@property
28+
def records(self):
29+
return [Record(data) for data in self.data["response"]["docs"]]
30+
31+
@property
32+
def filters(self):
33+
facet_fields = self.data["facet_counts"]["facet_fields"]
34+
return [
35+
Filter(field=x, values=facet_fields[x])
36+
for x in facet_fields.keys()
37+
if x in Filter.filter_field_map
38+
]
39+
40+
@property
41+
def total(self):
42+
return self.data["response"]["numFound"]
43+
44+
@property
45+
def limit(self):
46+
return self.data["responseHeader"]["params"]["rows"]
47+
48+
@property
49+
def offset(self):
50+
return self.data["response"]["start"]
51+
52+
53+
# problems with filter data.
54+
# 1. the fields aren't the correct names
55+
# 2. location filter has the wrong name (time for the other api???)
56+
# 3. some of them (just search only?) are empty
57+
# 4. availability has some special rules
58+
class Filter:
59+
filter_field_map = {
60+
"availability": "availability",
61+
"format": "format",
62+
"topicStr": "subject",
63+
"publishDateRange": "date_of_publication",
64+
"language": "language",
65+
"collection": "collection",
66+
"hlb3Str": "academic_discipline",
67+
"authorStr": "author",
68+
"place_of_publication": "place_of_publication",
69+
"geographicSt": "region",
70+
"building": "location",
71+
} # institution and search_only are skipped for this
72+
73+
def __init__(self, field: str, values: list):
74+
self.field = self.filter_field_map[field]
75+
self.values = self.get_values(values)
76+
77+
def get_values(self, values):
78+
result = []
79+
for x in range(0, len(values), 2):
80+
result.append(FilterValue(text=values[x], count=values[x + 1]))
81+
return result
82+
83+
84+
@dataclass(frozen=True)
85+
class FilterValue:
86+
text: str
87+
count: int

api/catalog_api/schemas.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -264,6 +264,24 @@ class Record(BaseModel):
264264
model_config = ConfigDict(populate_by_name=True)
265265

266266

267+
class FilterValue(BaseModel):
268+
text: str
269+
count: int
270+
271+
272+
class Filter(BaseModel):
273+
field: str
274+
values: list[FilterValue]
275+
276+
277+
class Results(BaseModel):
278+
records: list[Record]
279+
filters: list[Filter]
280+
limit: int
281+
offset: int
282+
total: int
283+
284+
267285
class Response(BaseModel):
268286
detail: str
269287

api/notes.md

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
# Notes for Search Results
2+
3+
## Example solr results query
4+
5+
```
6+
Below is close to what current search is asking for. Facet count is just limited to 50 instead of 500
7+
http://bulleit-1.umdl.umich.edu:8026/solr/biblio/select?f.authorStr.facet.mincount=1&df=allfields&f.authorStr.facet.sort=count&f.hlb3Str.facet.offset=0&f.publishDateRange.facet.limit=50&qq1=%22jazz+singer+book%22&tie=0.1&f.institution.facet.sort=count&f.building.facet.mincount=1&f.hlb3Str.facet.sort=count&f.topicStr.facet.sort=count&f.authorStr.facet.limit=50&f.language.facet.sort=count&f.format.facet.limit=50&f.topicStr.facet.limit=50&f.building.facet.sort=count&qq=%22_query_\:\{\!edismax+mm%3D$default_mm+mm.autoRelax%3D$mm.autoRelax+tie%3D$tie+qf%3D$all_fields_qf+pf%3D$all_fields_pf+pf2%3D$all_fields_pf2+ps2%3D$all_fields_ps2+boost%3D$all_fields_boost+v%3D$q1\}%22&per_page=10&f.search_only.facet.offset=0&f.search_only.facet.mincount=1&qt=standard&f.building.facet.limit=50&f.institution.facet.limit=50&sort=score+desc&f.availability.facet.limit=50&f.search_only.facet.sort=count&default_mm=2%3C-1+5%3C67%25&f.geographicStr.facet.limit=50&mm.autoRelax=true&f.institution.facet.offset=0&f.hlb3Str.facet.mincount=1&f.geographicStr.facet.mincount=1&f.format.facet.offset=0&f.place_of_publication.facet.offset=0&f.topicStr.facet.mincount=1&page=1&t1=%22jazz+singer%22+book&f.place_of_publication.facet.mincount=1&f.language.facet.mincount=1&f.availability.facet.mincount=1&f.language.facet.offset=0&all_fields_pf2=title_author^500+title_equiv^80+title_l^50&fl=*,score&f.location.facet.mincount=1&f.publishDateRange.facet.offset=0&f.language.facet.limit=50&fq=institution:(UM\+Ann\+Arbor\+Libraries)&fq=%2B(new_availability:physical+OR+new_availability:hathi_trust_full_text_or_electronic_holding)&f.location.facet.sort=count&f.publishDateRange.facet.sort=count&all_fields_pf=title_equiv^40+title_top^20+title_rest^10+author^80+author_top^30+author_rest^20&f.format.facet.mincount=1&f.hlb3Str.facet.limit=50&f.geographicStr.facet.sort=count&f.institution.facet.mincount=1&facet.threads=10&clean_string=(%22jazz+singer%22+book)&wt=json&f.search_only.facet.limit=50&f.topicStr.facet.offset=0&f.location.facet.offset=0&q1=(%22jazz+singer%22+book)&facet.field=search_only&facet.field=availability&facet.field=format&facet.field=topicStr&facet.field=publishDateRange&facet.field=language&facet.field=location&facet.field=hlb3Str&facet.field=authorStr&facet.field=place_of_publication&facet.field=geographicStr&facet.field=institution&facet.field=building&f.place_of_publication.facet.sort=count&f.geographicStr.facet.offset=0&start=0&rows=10&all_fields_boost=product(+if(termfreq(%27format%27,+%27Journal%27),+1.4,+1),+max(+map(+query({!field+f%3Dtitle_common_exact+v%3D$q1},+1),+0,+1,+1,+180+),+map(+query({!field+f%3Dtitle_equiv_exact+v%3D$q1},+1),+0,+1,+1,+50+),+map(+query({!field+f%3Dtitle_a_exact+v%3D$q1},+1),+0,+1,+1,+10+)),+map(+query({!dismax+f%3Dtitle_author+v%3D$q1+mm%3D%22100%25%22},+1),+0,+1,+1,+50+),+)&f.availability.facet.sort=count&q=_query_:{!edismax+mm%3D$default_mm+mm.autoRelax%3D$mm.autoRelax+tie%3D$tie+qf%3D$all_fields_qf+pf%3D$all_fields_pf+pf2%3D$all_fields_pf2+ps2%3D$all_fields_ps2+boost%3D$all_fields_boost+v%3D$q1}&f.location.facet.limit=50&f.place_of_publication.facet.limit=50&f.authorStr.facet.offset=0&f.building.facet.offset=0&all_fields_ps2=2&all_fields_qf=allfieldsProper^2+allfields^1+title_common^50+title_equiv^10+mainauthor^80+author^50+isbn+issn+oclc+lccn+barcode+htid+callnosearch+bookplate&f.format.facet.sort=count&f.publishDateRange.facet.mincount=1&facet=true&f.availability.facet.offset=0
8+
9+
10+
This is what we want. location is changed to collection. Facet count is 50
11+
http://bulleit-1.umdl.umich.edu:8026/solr/biblio/select?f.authorStr.facet.mincount=1&df=allfields&f.authorStr.facet.sort=count&f.hlb3Str.facet.offset=0&f.publishDateRange.facet.limit=50&qq1=%22jazz+singer+book%22&tie=0.1&f.institution.facet.sort=count&f.building.facet.mincount=1&f.hlb3Str.facet.sort=count&f.topicStr.facet.sort=count&f.authorStr.facet.limit=50&f.language.facet.sort=count&f.format.facet.limit=50&f.topicStr.facet.limit=50&f.building.facet.sort=count&qq=%22_query_\:\{\!edismax+mm%3D$default_mm+mm.autoRelax%3D$mm.autoRelax+tie%3D$tie+qf%3D$all_fields_qf+pf%3D$all_fields_pf+pf2%3D$all_fields_pf2+ps2%3D$all_fields_ps2+boost%3D$all_fields_boost+v%3D$q1\}%22&per_page=10&f.search_only.facet.offset=0&f.search_only.facet.mincount=1&qt=standard&f.building.facet.limit=50&f.institution.facet.limit=50&sort=score+desc&f.availability.facet.limit=50&f.search_only.facet.sort=count&default_mm=2%3C-1+5%3C67%25&f.geographicStr.facet.limit=50&mm.autoRelax=true&f.institution.facet.offset=0&f.hlb3Str.facet.mincount=1&f.geographicStr.facet.mincount=1&f.format.facet.offset=0&f.place_of_publication.facet.offset=0&f.topicStr.facet.mincount=1&page=1&t1=%22jazz+singer%22+book&f.place_of_publication.facet.mincount=1&f.language.facet.mincount=1&f.availability.facet.mincount=1&f.language.facet.offset=0&all_fields_pf2=title_author^500+title_equiv^80+title_l^50&fl=*,score&f.collection.facet.mincount=1&f.publishDateRange.facet.offset=0&f.language.facet.limit=50&fq=institution:(UM\+Ann\+Arbor\+Libraries)&fq=%2B(new_availability:physical+OR+new_availability:hathi_trust_full_text_or_electronic_holding)&f.collection.facet.sort=count&f.publishDateRange.facet.sort=count&all_fields_pf=title_equiv^40+title_top^20+title_rest^10+author^80+author_top^30+author_rest^20&f.format.facet.mincount=1&f.hlb3Str.facet.limit=50&f.geographicStr.facet.sort=count&f.institution.facet.mincount=1&facet.threads=10&clean_string=(%22jazz+singer%22+book)&wt=json&f.search_only.facet.limit=50&f.topicStr.facet.offset=0&f.collection.facet.offset=0&q1=(%22jazz+singer%22+book)&facet.field=search_only&facet.field=availability&facet.field=format&facet.field=topicStr&facet.field=publishDateRange&facet.field=language&facet.field=collection&facet.field=hlb3Str&facet.field=authorStr&facet.field=place_of_publication&facet.field=geographicStr&facet.field=institution&facet.field=building&f.place_of_publication.facet.sort=count&f.geographicStr.facet.offset=0&start=0&rows=10&all_fields_boost=product(+if(termfreq(%27format%27,+%27Journal%27),+1.4,+1),+max(+map(+query({!field+f%3Dtitle_common_exact+v%3D$q1},+1),+0,+1,+1,+180+),+map(+query({!field+f%3Dtitle_equiv_exact+v%3D$q1},+1),+0,+1,+1,+50+),+map(+query({!field+f%3Dtitle_a_exact+v%3D$q1},+1),+0,+1,+1,+10+)),+map(+query({!dismax+f%3Dtitle_author+v%3D$q1+mm%3D%22100%25%22},+1),+0,+1,+1,+50+),+)&f.availability.facet.sort=count&q=_query_:{!edismax+mm%3D$default_mm+mm.autoRelax%3D$mm.autoRelax+tie%3D$tie+qf%3D$all_fields_qf+pf%3D$all_fields_pf+pf2%3D$all_fields_pf2+ps2%3D$all_fields_ps2+boost%3D$all_fields_boost+v%3D$q1}&f.collection.facet.limit=50&f.place_of_publication.facet.limit=50&f.authorStr.facet.offset=0&f.building.facet.offset=0&all_fields_ps2=2&all_fields_qf=allfieldsProper^2+allfields^1+title_common^50+title_equiv^10+mainauthor^80+author^50+isbn+issn+oclc+lccn+barcode+htid+callnosearch+bookplate&f.format.facet.sort=count&f.publishDateRange.facet.mincount=1&facet=true&f.availability.facet.offset=0
12+
```
13+
14+
## Paths for query_parser_api
15+
16+
```
17+
/catalog/search (this is the url for searching. takes in a bunch of parameters)
18+
/onlinejournals/search (this is the url for searching online journals)
19+
```
20+
21+
```
22+
# Potential path; It's a different solr, but I don't think that matters. It uses the query parser so it ought to go here.
23+
/website.search
24+
25+
# Other catalog paths?
26+
/catalog/record # could make the catalog_api not know how to query solr.
27+
/catalog/debug # show some output for debugging a query
28+
/catalog/solr # send solr queries straight through. Might be useful for super advanced search.
29+
```
30+
31+
This handles changing the complicated availability filter into something solr/query parser can read.
32+
33+
## Results response
34+
35+
{
36+
records: []
37+
filters: []
38+
total: int
39+
limit: int
40+
offset: int
41+
}

api/tests/fixtures/results/page1.json

Lines changed: 1307 additions & 0 deletions
Large diffs are not rendered by default.

0 commit comments

Comments
 (0)