4848import pytz
4949
5050from whoosh import sorting
51- from whoosh .query import Term , Prefix , And , Or , Not , DateRange , Every
51+ from whoosh .query import Term , Prefix , And , Or , Not , DateRange
5252from whoosh .query .qcore import QueryError , TermNotFound
5353from whoosh .analysis import StandardAnalyzer
5454
@@ -343,18 +343,14 @@ def _compute_item_transclusions(item_name):
343343 return transcluded_names
344344
345345
346- def add_file_filters ( _filter , filetypes ):
346+ def expand_content_types ( filetypes ):
347347 """
348- Add various terms to the filter for the search query for the selected file types
349- in the search options.
350-
351- :param _filter: the current filter
352- :param filetypes: list of selected filetypes
353- :returns: the required _filter for the search query
348+ :param filetypes: list of Content Types Selected by user on the More Search Options form
349+ :Returns: a partial query derived fron user selection of wanted contenttypes.
354350 """
355- if filetypes and "all" not in filetypes :
351+ all_contenttypes = []
352+ if filetypes :
356353 contenttypes = []
357- files_filter = []
358354 if "markup" in filetypes :
359355 contenttypes .append (CONTENTTYPE_MARKUP )
360356 if "text" in filetypes :
@@ -371,22 +367,20 @@ def add_file_filters(_filter, filetypes):
371367 contenttypes .append (CONTENTTYPE_OTHER )
372368 for ctype in contenttypes :
373369 for itemtype in ctype :
374- files_filter .append (Term ("contenttype" , itemtype ))
370+ all_contenttypes .append (Term ("contenttype" , itemtype ))
375371 if "unknown" in filetypes :
376372 known_types = []
377373 for known in CONTENTTYPES_MAP .keys ():
378374 known_types .append (Term ("contenttype" , known ))
379375 unknown_types = Not (Or (known_types ))
380- if not files_filter :
381- _filter .append (unknown_types )
382- _filter = And (_filter )
383- return _filter
376+ if not all_contenttypes :
377+ all_contenttypes .append (unknown_types )
378+ all_contenttypes = And (all_contenttypes )
379+ return all_contenttypes
384380 else :
385- files_filter .append (unknown_types )
386- files_filter = Or (files_filter )
387- _filter .append (files_filter )
388- _filter = And (_filter )
389- return _filter
381+ all_contenttypes .append (unknown_types )
382+ all_contenttypes = Or (all_contenttypes )
383+ return all_contenttypes
390384
391385
392386def add_facets (facets , time_sorting ):
@@ -427,134 +421,116 @@ def parse_scoped_query(query):
427421 return None , query
428422
429423
430- @frontend .route ("/+search/<itemname:item_name>" , methods = ["GET" , "POST" ])
431- @frontend .route ("/+search" , defaults = dict (item_name = "" ), methods = ["GET" , "POST" ])
432- def search (item_name ):
424+ @frontend .route ("/+search" , methods = ["GET" , "POST" ])
425+ def search ():
433426 """
434427 Perform a whoosh search of the index and display the matching items.
435428
429+ This procedure supports both the one line simple query string and the ajax
430+ character by character changes to the "More search options" form.
436431 The default search is across all namespaces in the index and excludes trash.
437432
438433 The Jinja template formatting the output may also display data related to the
439- search such as the whoosh query, filter (if any), hit counts, and additional
440- suggested search terms.
434+ search such as the whoosh query, hit counts, and score.
435+
436+ Two prefixes to the query string are supported. A leading \ causes a browser
437+ redirect to the highest scoring search result. A leading > limits the search to
438+ an item's subitems.
441439
442- "Currently" there is no theme generating the '/+search/<itemname:item_name>' link
443- within Item Views. To access, users must key the query link into the browsers URL. The
444- query result is filtered limiting the output to the target item, target subitems
445- and sub-subitems..., and transclusions within those items.
446- Example URL: http://127.0.0.1:8080/+search/OtherTextItems?q=moin
440+ namespaces can be entered in 3 different ways:
441+ * a leading namespace in in a subitem query: >users/joe red
442+ * explicit reference in the query string: namespace:users
443+ * clicking a checkbox in the More Search Options form
444+ The user should choose one way, and avoid conflicting choices.
447445 """
448446 search_form = SearchForm .from_flat (request .values )
449447 ajax = True if request .args .get ("boolajax" ) else False
450448 valid = search_form .validate ()
451449 time_sorting = False
452450 filetypes = []
451+ namespaces = []
452+ terms = []
453+ trash = request .args .get ("trash" , "false" )
454+ leading_ns = ""
453455 if ajax :
454456 query = request .args .get ("q" )
455457 history = request .args .get ("history" ) == "true"
456458 time_sorting = request .args .get ("time_sorting" )
457459 if time_sorting == "default" :
458460 time_sorting = False
459461 filetypes = request .args .get ("filetypes" )
462+ namespaces = request .args .get ("namespaces" )
460463 is_ticket = bool (request .args .get ("is_ticket" ))
461464 if filetypes :
462465 filetypes = filetypes .split ("," )[:- 1 ] # To remove the extra '' at the end of the list
466+ if namespaces :
467+ namespaces = namespaces .split ("," )[:- 1 ]
468+ namespaces = ["" if ns == NAMESPACE_UI_DEFAULT else ns for ns in namespaces ]
463469 else :
464470 query = search_form ["q" ].value
465471 history = bool (request .values .get ("history" ))
466-
467472 best_match = False
468473 # we test for query in case this is a test run
469474 if query and query .startswith ("\\ " ):
470475 best_match = True
471476 query = query [1 :]
472477
473- # detect prefix and extract target item
474- subitem_target , query = parse_scoped_query (query )
478+ # if query starts with > extract target item name and query: ">joe red, pink" becomes ("joe", "red pink")
479+ item_name , query = parse_scoped_query (query )
480+
481+ # is there a leading namespace in query string
482+ if item_name :
483+ in_parts = item_name .split ("/" , 1 )
484+ if len (in_parts ) > 1 :
485+ is_ns = [x [0 ] for x in app .cfg .namespace_mapping if x [0 ] == in_parts [0 ]]
486+ if is_ns :
487+ if is_ns [0 ] not in [NAMESPACE_USERPROFILES , NAMESPACE_DEFAULT ]:
488+ leading_ns = is_ns [0 ]
489+ item_name = in_parts [1 ]
475490
476491 if valid or ajax :
477492 # most fields in the schema use a StandardAnalyzer, it omits fairly frequently used words
478493 # this finds such words and reports to the user
479494 analyzer = StandardAnalyzer ()
480- omitted_words = [token .text for token in analyzer (query , removestops = False ) if token .stopped ]
481-
495+ omitted_words = [token .text for token in analyzer (query ) if token .stopped ]
482496 idx_name = ALL_REVS if history else LATEST_REVS
483-
484497 if best_match :
485498 qp = flaskg .storage .query_parser ([NAMES , NAMENGRAM ], idx_name = idx_name )
486499 else :
487500 qp = flaskg .storage .query_parser (
488501 [NAMES , NAMENGRAM , TAGS , SUMMARY , SUMMARYNGRAM , CONTENT , CONTENTNGRAM , COMMENT ], idx_name = idx_name
489502 )
490503 q = qp .parse (query )
491- _filter = []
492- _filter = add_file_filters (_filter , filetypes )
493-
494- # if the user specified a subitem target
495- if subitem_target :
496- # if they also specified an item name from the URL
497- if item_name :
498- # display a note that the subitem will override the item
499- flash (_ ("Note: Subitem target in query overrides the item in the URL." ), "info" )
500- # update the item_name to be the subitem_target
501- item_name = subitem_target
502-
503- if item_name : # Only search this item and subitems
504- full_name = None
505-
506- # search for the full item name (i.e. "Home/Readings" for "Readings")
507- with flaskg .storage .indexer .ix [LATEST_REVS ].searcher () as searcher :
508- all_items = searcher .search (Every (), limit = None )
509- for hit in all_items :
510- try :
511- hit_name = hit [NAME ][0 ]
512- except IndexError :
513- # deleted items have no names, e.g. []
514- continue
515- if hit_name .endswith ("/" + item_name ) or hit_name == item_name :
516- full_name = hit_name
517- break
518-
519- if full_name :
520- prefix_name = full_name + "/"
521- terms = [Term (NAME_EXACT , full_name ), Prefix (NAME_EXACT , prefix_name )]
522-
523- show_transclusions = True
524- if show_transclusions :
525- # XXX Search subitems and all transcluded items (even recursively),
526- # still looks like a hack. Imaging you have "foo" on main page and
527- # "bar" on transcluded one. Then you search for "foo AND bar".
528- # Such stuff would only work if we expand transcluded items
529- # at indexing time (and we currently don't).
530- with flaskg .storage .indexer .ix [LATEST_REVS ].searcher () as searcher :
531- subq = Or ([Term (NAME_EXACT , full_name ), Prefix (NAME_EXACT , prefix_name )])
532- subq = And ([subq , Every (ITEMTRANSCLUSIONS )])
533- flaskg .clock .start ("search subitems with transclusions" )
534- results = searcher .search (subq , limit = None )
535- flaskg .clock .stop ("search subitems with transclusions" )
536- transcluded_names = set ()
537- for hit in results :
538- name = hit [NAME ]
539- transclusions = _compute_item_transclusions (name )
540- transcluded_names .update (transclusions )
541- # XXX Will whoosh cope with such a large filter query?
542- terms .extend ([Term (NAME_EXACT , tname ) for tname in transcluded_names ])
543- _filter = Or (terms )
504+ if trash == "false" :
505+ q = And ([q , Not (Term (TRASH , True ))])
506+ if namespaces :
507+ ns_terms = [Term (NAMESPACE , ns ) for ns in namespaces ]
508+ q = And ([q , Or (ns_terms )])
509+ elif leading_ns :
510+ ns_terms = [Term (NAMESPACE , leading_ns )]
511+ q = And ([q , Or (ns_terms )])
512+ all_contenttypes = expand_content_types (filetypes )
513+
514+ if item_name :
515+ prefix_name = item_name + "/"
516+ terms = [Term (NAME_EXACT , item_name ), Prefix (NAME_EXACT , prefix_name )]
517+ if all_contenttypes :
518+ q = And ([q , Or (all_contenttypes )])
519+ if terms :
520+ q = And ([q , Or (terms )])
544521
545522 with flaskg .storage .indexer .ix [idx_name ].searcher () as searcher :
546523 # terms is set to retrieve list of terms which matched, in the searchtemplate, for highlight.
547524 facets = []
548525 facets = add_facets (facets , time_sorting )
549526 flaskg .clock .start ("search" )
550527 try :
551- results = searcher .search (q , filter = _filter , limit = 100 , terms = True , sortedby = facets )
528+ results = searcher .search (q , limit = 100 , terms = True , sortedby = facets )
552529 # this may be an ajax transaction, search.js will handle a full page response
553530 except QueryError :
554531 flash (_ ("""QueryError: invalid search term: {search_term}""" ).format (search_term = q ), "error" )
555532 return render_template ("search.html" , query = query , medium_search_form = search_form , item_name = item_name )
556533 except TermNotFound :
557- # name:'moin has bugs'
558534 flash (_ ("""TermNotFound: field is not indexed: {search_term}""" ).format (search_term = q ), "error" )
559535 return render_template ("search.html" , query = query , medium_search_form = search_form , item_name = item_name )
560536 flaskg .clock .stop ("search" )
@@ -567,28 +543,25 @@ def search(item_name):
567543 html = render_template (
568544 "ajaxsearch.html" ,
569545 results = results ,
546+ query = query ,
570547 omitted_words = ", " .join (omitted_words ),
571548 history = history ,
572- is_ticket = is_ticket ,
573549 whoosh_query = q ,
574- whoosh_filter = _filter ,
575550 flaskg = flaskg ,
576- subitem_target = subitem_target ,
577- query = query ,
551+ item_name = item_name ,
552+ is_ticket = is_ticket ,
578553 )
579554 else :
580555 html = render_template (
581556 "search.html" ,
582557 results = results ,
583558 query = query ,
584- medium_search_form = search_form ,
585- item_name = item_name ,
586559 omitted_words = ", " .join (omitted_words ),
587560 history = history ,
588561 whoosh_query = q ,
589- whoosh_filter = _filter ,
590562 flaskg = flaskg ,
591- subitem_target = subitem_target ,
563+ item_name = item_name ,
564+ medium_search_form = search_form ,
592565 )
593566 flaskg .clock .stop ("search render" )
594567 else :
0 commit comments