Skip to content

Commit c125cb7

Browse files
authored
Merge pull request #1972 from RogerHaase/1885-search
Search does not support namespaces fixes #1907
2 parents 4014a52 + 5be7398 commit c125cb7

File tree

2 files changed

+73
-100
lines changed

2 files changed

+73
-100
lines changed

src/moin/apps/frontend/views.py

Lines changed: 72 additions & 99 deletions
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@
4848
import pytz
4949

5050
from whoosh import sorting
51-
from whoosh.query import Term, Prefix, And, Or, Not, DateRange, Every
51+
from whoosh.query import Term, Prefix, And, Or, Not, DateRange
5252
from whoosh.query.qcore import QueryError, TermNotFound
5353
from whoosh.analysis import StandardAnalyzer
5454

@@ -343,18 +343,14 @@ def _compute_item_transclusions(item_name):
343343
return transcluded_names
344344

345345

346-
def add_file_filters(_filter, filetypes):
346+
def expand_content_types(filetypes):
347347
"""
348-
Add various terms to the filter for the search query for the selected file types
349-
in the search options.
350-
351-
:param _filter: the current filter
352-
:param filetypes: list of selected filetypes
353-
:returns: the required _filter for the search query
348+
:param filetypes: list of Content Types Selected by user on the More Search Options form
349+
:Returns: a partial query derived fron user selection of wanted contenttypes.
354350
"""
355-
if filetypes and "all" not in filetypes:
351+
all_contenttypes = []
352+
if filetypes:
356353
contenttypes = []
357-
files_filter = []
358354
if "markup" in filetypes:
359355
contenttypes.append(CONTENTTYPE_MARKUP)
360356
if "text" in filetypes:
@@ -371,22 +367,20 @@ def add_file_filters(_filter, filetypes):
371367
contenttypes.append(CONTENTTYPE_OTHER)
372368
for ctype in contenttypes:
373369
for itemtype in ctype:
374-
files_filter.append(Term("contenttype", itemtype))
370+
all_contenttypes.append(Term("contenttype", itemtype))
375371
if "unknown" in filetypes:
376372
known_types = []
377373
for known in CONTENTTYPES_MAP.keys():
378374
known_types.append(Term("contenttype", known))
379375
unknown_types = Not(Or(known_types))
380-
if not files_filter:
381-
_filter.append(unknown_types)
382-
_filter = And(_filter)
383-
return _filter
376+
if not all_contenttypes:
377+
all_contenttypes.append(unknown_types)
378+
all_contenttypes = And(all_contenttypes)
379+
return all_contenttypes
384380
else:
385-
files_filter.append(unknown_types)
386-
files_filter = Or(files_filter)
387-
_filter.append(files_filter)
388-
_filter = And(_filter)
389-
return _filter
381+
all_contenttypes.append(unknown_types)
382+
all_contenttypes = Or(all_contenttypes)
383+
return all_contenttypes
390384

391385

392386
def add_facets(facets, time_sorting):
@@ -427,134 +421,116 @@ def parse_scoped_query(query):
427421
return None, query
428422

429423

430-
@frontend.route("/+search/<itemname:item_name>", methods=["GET", "POST"])
431-
@frontend.route("/+search", defaults=dict(item_name=""), methods=["GET", "POST"])
432-
def search(item_name):
424+
@frontend.route("/+search", methods=["GET", "POST"])
425+
def search():
433426
"""
434427
Perform a whoosh search of the index and display the matching items.
435428
429+
This procedure supports both the one line simple query string and the ajax
430+
character by character changes to the "More search options" form.
436431
The default search is across all namespaces in the index and excludes trash.
437432
438433
The Jinja template formatting the output may also display data related to the
439-
search such as the whoosh query, filter (if any), hit counts, and additional
440-
suggested search terms.
434+
search such as the whoosh query, hit counts, and score.
435+
436+
Two prefixes to the query string are supported. A leading \ causes a browser
437+
redirect to the highest scoring search result. A leading > limits the search to
438+
an item's subitems.
441439
442-
"Currently" there is no theme generating the '/+search/<itemname:item_name>' link
443-
within Item Views. To access, users must key the query link into the browsers URL. The
444-
query result is filtered limiting the output to the target item, target subitems
445-
and sub-subitems..., and transclusions within those items.
446-
Example URL: http://127.0.0.1:8080/+search/OtherTextItems?q=moin
440+
namespaces can be entered in 3 different ways:
441+
* a leading namespace in in a subitem query: >users/joe red
442+
* explicit reference in the query string: namespace:users
443+
* clicking a checkbox in the More Search Options form
444+
The user should choose one way, and avoid conflicting choices.
447445
"""
448446
search_form = SearchForm.from_flat(request.values)
449447
ajax = True if request.args.get("boolajax") else False
450448
valid = search_form.validate()
451449
time_sorting = False
452450
filetypes = []
451+
namespaces = []
452+
terms = []
453+
trash = request.args.get("trash", "false")
454+
leading_ns = ""
453455
if ajax:
454456
query = request.args.get("q")
455457
history = request.args.get("history") == "true"
456458
time_sorting = request.args.get("time_sorting")
457459
if time_sorting == "default":
458460
time_sorting = False
459461
filetypes = request.args.get("filetypes")
462+
namespaces = request.args.get("namespaces")
460463
is_ticket = bool(request.args.get("is_ticket"))
461464
if filetypes:
462465
filetypes = filetypes.split(",")[:-1] # To remove the extra '' at the end of the list
466+
if namespaces:
467+
namespaces = namespaces.split(",")[:-1]
468+
namespaces = ["" if ns == NAMESPACE_UI_DEFAULT else ns for ns in namespaces]
463469
else:
464470
query = search_form["q"].value
465471
history = bool(request.values.get("history"))
466-
467472
best_match = False
468473
# we test for query in case this is a test run
469474
if query and query.startswith("\\"):
470475
best_match = True
471476
query = query[1:]
472477

473-
# detect prefix and extract target item
474-
subitem_target, query = parse_scoped_query(query)
478+
# if query starts with > extract target item name and query: ">joe red, pink" becomes ("joe", "red pink")
479+
item_name, query = parse_scoped_query(query)
480+
481+
# is there a leading namespace in query string
482+
if item_name:
483+
in_parts = item_name.split("/", 1)
484+
if len(in_parts) > 1:
485+
is_ns = [x[0] for x in app.cfg.namespace_mapping if x[0] == in_parts[0]]
486+
if is_ns:
487+
if is_ns[0] not in [NAMESPACE_USERPROFILES, NAMESPACE_DEFAULT]:
488+
leading_ns = is_ns[0]
489+
item_name = in_parts[1]
475490

476491
if valid or ajax:
477492
# most fields in the schema use a StandardAnalyzer, it omits fairly frequently used words
478493
# this finds such words and reports to the user
479494
analyzer = StandardAnalyzer()
480-
omitted_words = [token.text for token in analyzer(query, removestops=False) if token.stopped]
481-
495+
omitted_words = [token.text for token in analyzer(query) if token.stopped]
482496
idx_name = ALL_REVS if history else LATEST_REVS
483-
484497
if best_match:
485498
qp = flaskg.storage.query_parser([NAMES, NAMENGRAM], idx_name=idx_name)
486499
else:
487500
qp = flaskg.storage.query_parser(
488501
[NAMES, NAMENGRAM, TAGS, SUMMARY, SUMMARYNGRAM, CONTENT, CONTENTNGRAM, COMMENT], idx_name=idx_name
489502
)
490503
q = qp.parse(query)
491-
_filter = []
492-
_filter = add_file_filters(_filter, filetypes)
493-
494-
# if the user specified a subitem target
495-
if subitem_target:
496-
# if they also specified an item name from the URL
497-
if item_name:
498-
# display a note that the subitem will override the item
499-
flash(_("Note: Subitem target in query overrides the item in the URL."), "info")
500-
# update the item_name to be the subitem_target
501-
item_name = subitem_target
502-
503-
if item_name: # Only search this item and subitems
504-
full_name = None
505-
506-
# search for the full item name (i.e. "Home/Readings" for "Readings")
507-
with flaskg.storage.indexer.ix[LATEST_REVS].searcher() as searcher:
508-
all_items = searcher.search(Every(), limit=None)
509-
for hit in all_items:
510-
try:
511-
hit_name = hit[NAME][0]
512-
except IndexError:
513-
# deleted items have no names, e.g. []
514-
continue
515-
if hit_name.endswith("/" + item_name) or hit_name == item_name:
516-
full_name = hit_name
517-
break
518-
519-
if full_name:
520-
prefix_name = full_name + "/"
521-
terms = [Term(NAME_EXACT, full_name), Prefix(NAME_EXACT, prefix_name)]
522-
523-
show_transclusions = True
524-
if show_transclusions:
525-
# XXX Search subitems and all transcluded items (even recursively),
526-
# still looks like a hack. Imaging you have "foo" on main page and
527-
# "bar" on transcluded one. Then you search for "foo AND bar".
528-
# Such stuff would only work if we expand transcluded items
529-
# at indexing time (and we currently don't).
530-
with flaskg.storage.indexer.ix[LATEST_REVS].searcher() as searcher:
531-
subq = Or([Term(NAME_EXACT, full_name), Prefix(NAME_EXACT, prefix_name)])
532-
subq = And([subq, Every(ITEMTRANSCLUSIONS)])
533-
flaskg.clock.start("search subitems with transclusions")
534-
results = searcher.search(subq, limit=None)
535-
flaskg.clock.stop("search subitems with transclusions")
536-
transcluded_names = set()
537-
for hit in results:
538-
name = hit[NAME]
539-
transclusions = _compute_item_transclusions(name)
540-
transcluded_names.update(transclusions)
541-
# XXX Will whoosh cope with such a large filter query?
542-
terms.extend([Term(NAME_EXACT, tname) for tname in transcluded_names])
543-
_filter = Or(terms)
504+
if trash == "false":
505+
q = And([q, Not(Term(TRASH, True))])
506+
if namespaces:
507+
ns_terms = [Term(NAMESPACE, ns) for ns in namespaces]
508+
q = And([q, Or(ns_terms)])
509+
elif leading_ns:
510+
ns_terms = [Term(NAMESPACE, leading_ns)]
511+
q = And([q, Or(ns_terms)])
512+
all_contenttypes = expand_content_types(filetypes)
513+
514+
if item_name:
515+
prefix_name = item_name + "/"
516+
terms = [Term(NAME_EXACT, item_name), Prefix(NAME_EXACT, prefix_name)]
517+
if all_contenttypes:
518+
q = And([q, Or(all_contenttypes)])
519+
if terms:
520+
q = And([q, Or(terms)])
544521

545522
with flaskg.storage.indexer.ix[idx_name].searcher() as searcher:
546523
# terms is set to retrieve list of terms which matched, in the searchtemplate, for highlight.
547524
facets = []
548525
facets = add_facets(facets, time_sorting)
549526
flaskg.clock.start("search")
550527
try:
551-
results = searcher.search(q, filter=_filter, limit=100, terms=True, sortedby=facets)
528+
results = searcher.search(q, limit=100, terms=True, sortedby=facets)
552529
# this may be an ajax transaction, search.js will handle a full page response
553530
except QueryError:
554531
flash(_("""QueryError: invalid search term: {search_term}""").format(search_term=q), "error")
555532
return render_template("search.html", query=query, medium_search_form=search_form, item_name=item_name)
556533
except TermNotFound:
557-
# name:'moin has bugs'
558534
flash(_("""TermNotFound: field is not indexed: {search_term}""").format(search_term=q), "error")
559535
return render_template("search.html", query=query, medium_search_form=search_form, item_name=item_name)
560536
flaskg.clock.stop("search")
@@ -567,28 +543,25 @@ def search(item_name):
567543
html = render_template(
568544
"ajaxsearch.html",
569545
results=results,
546+
query=query,
570547
omitted_words=", ".join(omitted_words),
571548
history=history,
572-
is_ticket=is_ticket,
573549
whoosh_query=q,
574-
whoosh_filter=_filter,
575550
flaskg=flaskg,
576-
subitem_target=subitem_target,
577-
query=query,
551+
item_name=item_name,
552+
is_ticket=is_ticket,
578553
)
579554
else:
580555
html = render_template(
581556
"search.html",
582557
results=results,
583558
query=query,
584-
medium_search_form=search_form,
585-
item_name=item_name,
586559
omitted_words=", ".join(omitted_words),
587560
history=history,
588561
whoosh_query=q,
589-
whoosh_filter=_filter,
590562
flaskg=flaskg,
591-
subitem_target=subitem_target,
563+
item_name=item_name,
564+
medium_search_form=search_form,
592565
)
593566
flaskg.clock.stop("search render")
594567
else:

src/moin/templates/ajaxsearch.html

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ <h4>{{ _("Please check these tickets if they cover your issue:") }}</h4>
4545
</p>
4646
{%- endif %}
4747
{%- if omitted_words %}
48-
<p class="moin-suggestions"> {{ _("Common words omitted from query:") }}
48+
<p class="moin-suggestions"> {{ _("Common words omitted from content field query:") }}
4949
<span class="moin-suggestion-terms">{{ omitted_words }} </span>
5050
</p>
5151
{%- endif %}

0 commit comments

Comments
 (0)