Skip to content

Commit 0c7a096

Browse files
authored
Merge pull request internetarchive#11156 from cdrini/feature/monitor-empty-homepage
Improve solr + nginx performance monitoring
2 parents c7e2cc0 + 0ec6ce8 commit 0c7a096

File tree

13 files changed

+569
-113
lines changed

13 files changed

+569
-113
lines changed

compose.production.yaml

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,8 @@ services:
4646
max-file: "4"
4747

4848
solr_restarter:
49-
profiles: ["ol-solr0"]
49+
# Disabled for now ; seemed to be making things worse, and seems no longer needed
50+
profiles: ["ol-never"]
5051
build:
5152
context: scripts/solr_restarter
5253
args:
@@ -316,7 +317,7 @@ services:
316317
# This job runs various monitoring/grafana checks across the entire cluster.
317318
# It has access to the other nodes via the docker socket.
318319
monitoring:
319-
profiles: ["ol-web0", "ol-web1", "ol-web2", "ol-covers0", "ol-www0"]
320+
profiles: ["ol-web0", "ol-web1", "ol-web2", "ol-covers0", "ol-www0", "ol-solr0", "ol-solr1"]
320321
build:
321322
context: .
322323
dockerfile: scripts/monitoring/Dockerfile

openlibrary/macros/RawQueryCarousel.html

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@
5656
if has_fulltext_only:
5757
params['has_fulltext'] = 'true'
5858

59-
results = work_search(params, fields = ",".join(fields), sort=sort, limit=limit, facet=False)
59+
results = work_search(params, fields = ",".join(fields), sort=sort, limit=limit, facet=False, query_label='BOOK_CAROUSEL')
6060
books = [storage(b) for b in (results.get('docs', []))]
6161
load_more = {
6262
"queryType": "SEARCH",

openlibrary/plugins/openlibrary/partials.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -232,7 +232,14 @@ def generate(self) -> dict:
232232
param = data.get('param', {})
233233

234234
sort = None
235-
search_response = do_search(param, sort, rows=0, spellcheck_count=3, facet=True)
235+
search_response = do_search(
236+
param,
237+
sort,
238+
rows=0,
239+
spellcheck_count=3,
240+
facet=True,
241+
query_label='BOOK_SEARCH_FACETS',
242+
)
236243

237244
sidebar = render_template(
238245
'search/work_search_facets',

openlibrary/plugins/worksearch/code.py

Lines changed: 76 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import copy
2+
import functools
23
import itertools
34
import json
45
import logging
@@ -188,6 +189,24 @@ def read_cookie():
188189
return 'details'
189190

190191

192+
QueryLabel = Literal[
193+
'UNLABELLED',
194+
'BOOK_SEARCH',
195+
'BOOK_SEARCH_API',
196+
'BOOK_SEARCH_FACETS',
197+
'BOOK_CAROUSEL',
198+
# Used for the internal request made by solr to choose the best edition
199+
# during a normal book search
200+
'EDITION_MATCH',
201+
'LIST_SEARCH',
202+
'LIST_SEARCH_API',
203+
'SUBJECT_SEARCH',
204+
'SUBJECT_SEARCH_API',
205+
'AUTHOR_SEARCH',
206+
'AUTHOR_SEARCH_API',
207+
]
208+
209+
191210
def run_solr_query( # noqa: PLR0912
192211
scheme: SearchScheme,
193212
param: dict | None = None,
@@ -200,6 +219,7 @@ def run_solr_query( # noqa: PLR0912
200219
facet: bool | Iterable[str] = True,
201220
allowed_filter_params: set[str] | None = None,
202221
extra_params: list[tuple[str, Any]] | None = None,
222+
query_label: QueryLabel = 'UNLABELLED',
203223
):
204224
"""
205225
:param param: dict of query parameters
@@ -219,6 +239,7 @@ def run_solr_query( # noqa: PLR0912
219239
*(('fq', subquery) for subquery in scheme.universe),
220240
('start', offset),
221241
('rows', rows),
242+
('ol.label', query_label),
222243
('wt', param.get('wt', 'json')),
223244
] + (extra_params or [])
224245

@@ -361,6 +382,7 @@ def do_search(
361382
rows=100,
362383
facet=False,
363384
spellcheck_count=None,
385+
query_label: QueryLabel = 'UNLABELLED',
364386
):
365387
"""
366388
:param param: dict of search url parameters
@@ -391,6 +413,7 @@ def do_search(
391413
spellcheck_count,
392414
fields=list(fields),
393415
facet=facet,
416+
query_label=query_label,
394417
)
395418

396419

@@ -585,7 +608,12 @@ def GET(self):
585608
rows = 20
586609
if param:
587610
search_response = do_search(
588-
param, sort, page, rows=rows, spellcheck_count=3
611+
param,
612+
sort,
613+
page,
614+
rows=rows,
615+
spellcheck_count=3,
616+
query_label='BOOK_SEARCH',
589617
)
590618
else:
591619
search_response = SearchResponse(
@@ -709,18 +737,23 @@ def GET(self): # referenced subject_search
709737
req = ListSearchRequest.from_web_input(web.input(api='new'))
710738
# Can't set fields when rendering html
711739
req.fields = 'key'
712-
resp = self.get_results(req)
740+
resp = self.get_results(req, 'LIST_SEARCH')
713741
lists = list(web.ctx.site.get_many([doc['key'] for doc in resp.docs]))
714742
return render_template('search/lists.html', req, resp, lists)
715743

716-
def get_results(self, req: ListSearchRequest):
744+
def get_results(
745+
self,
746+
req: ListSearchRequest,
747+
query_label: Literal['LIST_SEARCH', 'LIST_SEARCH_API'],
748+
):
717749
return run_solr_query(
718750
ListSearchScheme(),
719751
{'q': req.q},
720752
offset=req.offset,
721753
rows=req.limit,
722754
fields=req.fields,
723755
sort=req.sort,
756+
query_label=query_label,
724757
)
725758

726759

@@ -732,7 +765,7 @@ class list_search_json(list_search):
732765

733766
def GET(self):
734767
req = ListSearchRequest.from_web_input(web.input())
735-
resp = self.get_results(req)
768+
resp = self.get_results(req, 'LIST_SEARCH_API')
736769

737770
web.header('Content-Type', 'application/json')
738771
if req.api == 'next':
@@ -769,15 +802,23 @@ class subject_search(delegate.page):
769802
path = '/search/subjects'
770803

771804
def GET(self):
772-
return render_template('search/subjects', self.get_results)
773-
774-
def get_results(self, q, offset=0, limit=100):
805+
get_results = functools.partial(self.get_results, query_label='SUBJECT_SEARCH')
806+
return render_template('search/subjects', get_results)
807+
808+
def get_results(
809+
self,
810+
q,
811+
query_label: Literal['SUBJECT_SEARCH', 'SUBJECT_SEARCH_API'],
812+
offset=0,
813+
limit=100,
814+
):
775815
response = run_solr_query(
776816
SubjectSearchScheme(),
777817
{'q': q},
778818
offset=offset,
779819
rows=limit,
780820
sort='work_count desc',
821+
query_label=query_label,
781822
)
782823

783824
return response
@@ -793,7 +834,12 @@ def GET(self):
793834
limit = safeint(i.limit, 100)
794835
limit = min(1000, limit) # limit limit to 1000.
795836

796-
response = self.get_results(i.q, offset=offset, limit=limit)
837+
response = self.get_results(
838+
i.q,
839+
query_label='SUBJECT_SEARCH_API',
840+
offset=offset,
841+
limit=limit,
842+
)
797843

798844
# Backward compatibility :/
799845
raw_resp = response.raw_resp['response']
@@ -809,16 +855,26 @@ class author_search(delegate.page):
809855
path = '/search/authors'
810856

811857
def GET(self):
812-
return render_template('search/authors', self.get_results)
813-
814-
def get_results(self, q, offset=0, limit=100, fields='*', sort=''):
858+
get_results = functools.partial(self.get_results, query_label='AUTHOR_SEARCH')
859+
return render_template('search/authors', get_results)
860+
861+
def get_results(
862+
self,
863+
q,
864+
query_label: Literal['AUTHOR_SEARCH', 'AUTHOR_SEARCH_API'],
865+
offset=0,
866+
limit=100,
867+
fields='*',
868+
sort='',
869+
):
815870
resp = run_solr_query(
816871
AuthorSearchScheme(),
817872
{'q': q},
818873
offset=offset,
819874
rows=limit,
820875
fields=fields,
821876
sort=sort,
877+
query_label=query_label,
822878
)
823879

824880
return resp
@@ -835,7 +891,12 @@ def GET(self):
835891
limit = min(1000, limit) # limit limit to 1000.
836892

837893
response = self.get_results(
838-
i.q, offset=offset, limit=limit, fields=i.fields, sort=i.sort
894+
i.q,
895+
query_label='AUTHOR_SEARCH_API',
896+
offset=offset,
897+
limit=limit,
898+
fields=i.fields,
899+
sort=i.sort,
839900
)
840901
raw_resp = response.raw_resp['response']
841902
for doc in raw_resp['docs']:
@@ -901,6 +962,7 @@ def work_search(
901962
fields: str = '*',
902963
facet: bool = True,
903964
spellcheck_count: int | None = None,
965+
query_label: QueryLabel = 'UNLABELLED',
904966
) -> dict:
905967
"""
906968
:param sort: key of SORTS dict at the top of this file
@@ -923,6 +985,7 @@ def work_search(
923985
fields=fields,
924986
facet=facet,
925987
spellcheck_count=spellcheck_count,
988+
query_label=query_label,
926989
)
927990
response = resp.raw_resp['response']
928991

@@ -996,6 +1059,7 @@ def GET(self):
9961059
# so disable it. This makes it much faster.
9971060
facet=False,
9981061
spellcheck_count=spellcheck_count,
1062+
query_label='BOOK_SEARCH_API',
9991063
)
10001064
response['documentation_url'] = "https://openlibrary.org/dev/docs/api/search"
10011065
response['q'] = q

openlibrary/plugins/worksearch/schemes/works.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -508,6 +508,7 @@ def convert_work_query_to_edition_query(work_query: str) -> str:
508508
# for `userEdQuery` to be available to `editions.q`, we will
509509
# need to specify it twice.
510510
new_params.append(('editions.userEdQuery', ed_q or '*:*'))
511+
new_params.append(('editions.ol.label', 'EDITION_MATCH'))
511512

512513
full_ed_query = '({{!edismax bq="{bq}" v={v} qf="{qf}"}})'.format(
513514
# See qf in work_query

scripts/deployment/deploy.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ DEPLOY_DIR="/tmp/openlibrary_deploy"
2828
mkdir -p $DEPLOY_DIR
2929

3030
WEB_HOSTNAMES="ol-web0 ol-web1 ol-web2"
31-
ALL_HOSTNAMES="ol-home0 ol-covers0 ol-www0 $WEB_HOSTNAMES"
31+
ALL_HOSTNAMES="ol-home0 ol-covers0 ol-www0 ol-solr0 ol-solr1 $WEB_HOSTNAMES"
3232
SERVER_SUFFIX=${SERVER_SUFFIX:-".us.archive.org"}
3333

3434
KILL_CRON=${KILL_CRON:-""}

scripts/monitoring/haproxy_monitor.py

Lines changed: 30 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
import time
1111
from collections.abc import Callable, Iterable
1212
from dataclasses import dataclass
13-
from typing import Literal
13+
from typing import Literal, cast
1414

1515
import requests
1616

@@ -32,6 +32,32 @@ class GraphiteEvent:
3232
def serialize(self):
3333
return (self.path, (self.timestamp, self.value))
3434

35+
def serialize_str(self) -> str:
36+
return f"{self.path} {self.value} {self.timestamp}"
37+
38+
def submit(self, graphite_address: str | tuple[str, int]):
39+
GraphiteEvent.submit_many([self], graphite_address)
40+
41+
@staticmethod
42+
def submit_many(
43+
events: 'list[GraphiteEvent]', graphite_address: str | tuple[str, int]
44+
):
45+
if isinstance(graphite_address, str):
46+
graphite_host, graphite_port = cast(
47+
tuple[str, str], tuple(graphite_address.split(':', 1))
48+
)
49+
graphite_address_tuple = (graphite_host, int(graphite_port))
50+
else:
51+
graphite_address_tuple = graphite_address
52+
53+
payload = pickle.dumps([e.serialize() for e in events], protocol=2)
54+
header = struct.pack("!L", len(payload))
55+
message = header + payload
56+
57+
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as sock:
58+
sock.connect(graphite_address_tuple)
59+
sock.sendall(message)
60+
3561

3662
@dataclass
3763
class HaproxyCapture:
@@ -83,8 +109,8 @@ async def main(
83109
commit_freq=30,
84110
agg: Literal['max', 'min', 'sum', None] = None,
85111
):
86-
graphite_address = tuple(graphite_address.split(':', 1))
87-
graphite_address = (graphite_address[0], int(graphite_address[1]))
112+
graphite_address_tuple = tuple(graphite_address.split(':', 1))
113+
graphite_address_tuple = (graphite_address_tuple[0], int(graphite_address_tuple[1]))
88114

89115
agg_options: dict[str, Callable[[Iterable[float]], float]] = {
90116
'max': max,
@@ -127,15 +153,7 @@ async def main(
127153
print(e.serialize())
128154

129155
if not dry_run:
130-
payload = pickle.dumps(
131-
[e.serialize() for e in events_buffer], protocol=2
132-
)
133-
header = struct.pack("!L", len(payload))
134-
message = header + payload
135-
136-
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as sock:
137-
sock.connect(graphite_address)
138-
sock.sendall(message)
156+
GraphiteEvent.submit_many(events_buffer, graphite_address_tuple)
139157

140158
events_buffer = []
141159
last_commit_ts = ts

0 commit comments

Comments
 (0)