Skip to content
This repository was archived by the owner on Aug 30, 2024. It is now read-only.

Commit 850f53b

Browse files
committed
Added support for search query parameter processing and execution
- Unit tests for search query in CloudantDatabaseTests class - Added dictionary of search query parameters in common_util
1 parent 1545bcd commit 850f53b

File tree

5 files changed

+320
-5
lines changed

5 files changed

+320
-5
lines changed

CHANGES.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
2.1.0 (Unreleased)
22
==================
3+
- [NEW] Added support for Cloudant Search execution.
34

45

56
2.0.3 (2016-06-03)

src/cloudant/_common_util.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,27 @@
114114

115115
TEXT_INDEX_ARGS = {'fields': list, 'default_field': dict, 'selector': dict}
116116

117+
SEARCH_INDEX_ARGS = {
118+
'bookmark': STRTYPE,
119+
'counts': list,
120+
'drilldown': list,
121+
'group_field': STRTYPE,
122+
'group_limit': (int, NONETYPE),
123+
'group_sort': list,
124+
'include_docs': bool,
125+
'limit': (int, NONETYPE),
126+
'query': (STRTYPE, int),
127+
'ranges': dict,
128+
'sort': (STRTYPE, list),
129+
'stale': STRTYPE,
130+
'highlight_fields': list,
131+
'highlight_pre_tag': STRTYPE,
132+
'highlight_post_tag': STRTYPE,
133+
'highlight_number': (int, NONETYPE),
134+
'highlight_size': (int, NONETYPE),
135+
'include_fields': list
136+
}
137+
117138
# Functions
118139

119140
def feed_arg_types(feed_type):

src/cloudant/database.py

Lines changed: 133 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -21,15 +21,18 @@
2121

2222
from requests.exceptions import HTTPError
2323

24-
from ._2to3 import url_quote_plus
24+
from ._2to3 import url_quote_plus, iteritems_
25+
from ._common_util import (
26+
JSON_INDEX_TYPE,
27+
SEARCH_INDEX_ARGS,
28+
SPECIAL_INDEX_TYPE,
29+
TEXT_INDEX_TYPE,
30+
python_to_couch
31+
)
2532
from .document import Document
2633
from .design_document import DesignDocument
2734
from .view import View
2835
from .index import Index, TextIndex, SpecialIndex
29-
from ._common_util import JSON_INDEX_TYPE
30-
from ._common_util import TEXT_INDEX_TYPE
31-
from ._common_util import SPECIAL_INDEX_TYPE
32-
from ._common_util import python_to_couch
3336
from .query import Query
3437
from .error import CloudantException, CloudantArgumentError
3538
from .result import Result, QueryResult
@@ -1098,3 +1101,128 @@ def get_query_result(self, selector, fields=None, raw_result=False,
10981101
return QueryResult(query, **kwargs)
10991102
else:
11001103
return query.result
1104+
1105+
def get_search_result(self, ddoc_id, index_name, **query_params):
1106+
"""
1107+
Retrieves the raw JSON content from the remote database based on the
1108+
search index on the server, using the query_params provided as query
1109+
parameters. A ``query`` parameter containing the Lucene query
1110+
syntax is mandatory.
1111+
1112+
Example for search queries:
1113+
1114+
.. code-block:: python
1115+
1116+
# Assuming that 'searchindex001' exists as part of the
1117+
# 'ddoc001' design document in the remote database...
1118+
# Retrieve documents where the Lucene field name is 'name' and
1119+
# the value is 'julia*'
1120+
resp = db.get_search_result('ddoc001', 'searchindex001',
1121+
query='name:julia*',
1122+
include_docs=True)
1123+
for row in resp['rows']:
1124+
# Process search index data (in JSON format).
1125+
1126+
Example if the search query requires grouping by using
1127+
the ``group_field`` parameter:
1128+
1129+
.. code-block:: python
1130+
1131+
# Assuming that 'searchindex001' exists as part of the
1132+
# 'ddoc001' design document in the remote database...
1133+
# Retrieve JSON response content, limiting response to 10 documents
1134+
resp = db.get_search_result('ddoc001', 'searchindex001',
1135+
query='name:julia*',
1136+
group_field='name',
1137+
limit=10)
1138+
for group in resp['groups']:
1139+
for row in group['rows']:
1140+
# Process search index data (in JSON format).
1141+
1142+
:param str ddoc_id: Design document id used to get the search result.
1143+
:param str index_name: Name used in part to identify the index.
1144+
:param str bookmark: Optional string that enables you to specify which
1145+
page of results you require. Only valid for queries that do not
1146+
specify the ``group_field`` query parameter.
1147+
:param list counts: Optional JSON array of field names for which
1148+
counts should be produced. The response will contain counts for each
1149+
unique value of this field name among the documents matching the
1150+
search query.
1151+
Requires the index to have faceting enabled.
1152+
:param list drilldown: Optional list of fields that each define a
1153+
pair of a field name and a value. This field can be used several
1154+
times. The search will only match documents that have the given
1155+
value in the field name. It differs from using
1156+
``query=fieldname:value` only in that the values are not analyzed.
1157+
:param str group_field: Optional string field by which to group
1158+
search matches. Fields containing other data
1159+
(numbers, objects, arrays) can not be used.
1160+
:param int group_limit: Optional number with the maximum group count.
1161+
This field can only be used if ``group_field`` query parameter
1162+
is specified.
1163+
:param group_sort: Optional JSON field that defines the order of the
1164+
groups in a search using ``group_field``. The default sort order
1165+
is relevance. This field can have the same values as the sort field,
1166+
so single fields as well as arrays of fields are supported.
1167+
:param int limit: Optional number to limit the maximum count of the
1168+
returned documents. In case of a grouped search, this parameter
1169+
limits the number of documents per group.
1170+
:param query: A Lucene query in the form of ``name:value``.
1171+
If name is omitted, the special value ``default`` is used.
1172+
:param ranges: Optional JSON facet syntax that reuses the standard
1173+
Lucene syntax to return counts of results which fit into each
1174+
specified category. Inclusive range queries are denoted by brackets.
1175+
Exclusive range queries are denoted by curly brackets.
1176+
For example ``ranges={"price":{"cheap":"[0 TO 100]"}}`` has an
1177+
inclusive range of 0 to 100.
1178+
Requires the index to have faceting enabled.
1179+
:param sort: Optional JSON string of the form ``fieldname<type>`` for
1180+
ascending or ``-fieldname<type>`` for descending sort order.
1181+
Fieldname is the name of a string or number field and type is either
1182+
number or string or a JSON array of such strings. The type part is
1183+
optional and defaults to number.
1184+
:param str stale: Optional string to allow the results from a stale
1185+
index to be used. This makes the request return immediately, even
1186+
if the index has not been completely built yet.
1187+
:param list highlight_fields: Optional list of fields which should be
1188+
highlighted.
1189+
:param str highlight_pre_tag: Optional string inserted before the
1190+
highlighted word in the highlights output. Defaults to ``<em>``.
1191+
:param str highlight_post_tag: Optional string inserted after the
1192+
highlighted word in the highlights output. Defaults to ``</em>``.
1193+
:param int highlight_number: Optional number of fragments returned in
1194+
highlights. If the search term occurs less often than the number of
1195+
fragments specified, longer fragments are returned. Default is 1.
1196+
:param int highlight_size: Optional number of characters in each
1197+
fragment for highlights. Defaults to 100 characters.
1198+
:param list include_fields: Optional list of field names to include in
1199+
search results. Any fields included must have been indexed with the
1200+
``store:true`` option.
1201+
1202+
:returns: Search query result data in JSON format
1203+
"""
1204+
if not query_params.get('query'):
1205+
raise CloudantArgumentError(
1206+
'No query parameter found. Please add a query parameter '
1207+
'containing Lucene syntax and retry.')
1208+
1209+
# Validate query arguments and values
1210+
for key, val in iteritems_(query_params):
1211+
if key not in list(SEARCH_INDEX_ARGS.keys()):
1212+
msg = 'Invalid argument: {0}'.format(key)
1213+
raise CloudantArgumentError(msg)
1214+
if not isinstance(val, SEARCH_INDEX_ARGS[key]):
1215+
msg = (
1216+
'Argument {0} is not an instance of expected type: {1}'
1217+
).format(key, SEARCH_INDEX_ARGS[key])
1218+
raise CloudantArgumentError(msg)
1219+
# Execute query search
1220+
headers = {'Content-Type': 'application/json'}
1221+
ddoc = DesignDocument(self, ddoc_id)
1222+
resp = self.r_session.post(
1223+
'/'.join([ddoc.document_url, '_search', index_name]),
1224+
headers=headers,
1225+
data=json.dumps(query_params, cls=self.client.encoder)
1226+
)
1227+
resp.raise_for_status()
1228+
return resp.json()

tests/unit/database_tests.py

Lines changed: 151 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1065,5 +1065,156 @@ def test_get_query_indexes(self):
10651065
self.assertEqual(indexes[2].design_document_id, '_design/ddoc001')
10661066
self.assertEqual(indexes[2].name, 'text-idx-001')
10671067

1068+
def test_get_search_result_with_invalid_argument(self):
1069+
"""
1070+
Test get_search_result by passing in invalid arguments
1071+
"""
1072+
with self.assertRaises(CloudantArgumentError) as cm:
1073+
self.db.get_search_result('searchddoc001', 'searchindex001',
1074+
query='julia*', foo={'bar': 'baz'})
1075+
err = cm.exception
1076+
self.assertEqual(str(err), 'Invalid argument: foo')
1077+
1078+
def test_get_search_result_with_invalid_value_types(self):
1079+
"""
1080+
Test get_search_result by passing in invalid value types for
1081+
query parameters
1082+
"""
1083+
test_data = [
1084+
{'bookmark': 1}, # Should be a STRTYPE
1085+
{'counts': 'blah'}, # Should be a list
1086+
{'drilldown': 'blah'}, # Should be a list
1087+
{'group_field': ['blah']}, # Should be a STRTYPE
1088+
{'group_limit': 'int'}, # Should be an int
1089+
{'group_sort': 'blah'}, # Should be a list
1090+
{'include_docs': 'blah'}, # Should be a boolean
1091+
{'limit': 'blah'}, # Should be an int
1092+
{'ranges': 1}, # Should be a dict
1093+
{'sort': 10}, # Should be a STRTYPE or list
1094+
{'stale': ['blah']}, # Should be a STRTYPE
1095+
{'highlight_fields': 'blah'}, # Should be a list
1096+
{'highlight_pre_tag': ['blah']}, # Should be a STRTYPE
1097+
{'highlight_post_tag': 1}, # Should be a STRTYPE
1098+
{'highlight_number': ['int']}, # Should be an int
1099+
{'highlight_size': 'blah'}, # Should be an int
1100+
{'include_fields': 'list'}, # Should be a list
1101+
]
1102+
1103+
for argument in test_data:
1104+
with self.assertRaises(CloudantArgumentError) as cm:
1105+
self.db.get_search_result('searchddoc001', 'searchindex001',
1106+
query='julia*', **argument)
1107+
err = cm.exception
1108+
self.assertTrue(str(err).startswith(
1109+
'Argument {0} is not an instance of expected type:'.format(
1110+
list(argument.keys())[0])
1111+
))
1112+
1113+
def test_get_search_result_without_query(self):
1114+
"""
1115+
Test get_search_result without providing a search query
1116+
"""
1117+
with self.assertRaises(CloudantArgumentError) as cm:
1118+
self.db.get_search_result('searchddoc001', 'searchindex001',
1119+
limit=10, include_docs=True)
1120+
err = cm.exception
1121+
self.assertEqual(
1122+
str(err),
1123+
'No query parameter found. Please add a query parameter '
1124+
'containing Lucene syntax and retry.'
1125+
)
1126+
1127+
def test_get_search_result_with_invalid_query_type(self):
1128+
"""
1129+
Test get_search_result by passing an invalid query type
1130+
"""
1131+
with self.assertRaises(CloudantArgumentError) as cm:
1132+
self.db.get_search_result(
1133+
'searchddoc001', 'searchindex001', query=['blah']
1134+
)
1135+
err = cm.exception
1136+
self.assertTrue(str(err).startswith(
1137+
'Argument query is not an instance of expected type:'
1138+
))
1139+
1140+
def test_get_search_result_executes_search_query(self):
1141+
"""
1142+
Test get_search_result executes a search query
1143+
"""
1144+
self.create_search_index()
1145+
self.populate_db_with_documents(100)
1146+
resp = self.db.get_search_result(
1147+
'searchddoc001',
1148+
'searchindex001',
1149+
query='julia*',
1150+
limit=5,
1151+
include_docs=True
1152+
)
1153+
self.assertEqual(len(resp['rows']), 5)
1154+
self.assertTrue(resp['bookmark'])
1155+
resp.pop('bookmark')
1156+
for row in resp['rows']:
1157+
self.assertTrue(row['doc']['_rev'].startswith('1-'))
1158+
row['doc'].pop('_rev')
1159+
self.assertEqual(
1160+
resp,
1161+
{'rows': [{'fields': {'name': 'julia'}, 'doc': {'_id': 'julia000',
1162+
'age': 0,
1163+
'name': 'julia'},
1164+
'id': 'julia000', 'order': [1.0, 0]},
1165+
{'fields': {'name': 'julia'}, 'doc': {'_id': 'julia001',
1166+
'age': 1,
1167+
'name': 'julia'},
1168+
'id': 'julia001', 'order': [1.0, 0]},
1169+
{'fields': {'name': 'julia'},'doc': {'_id': 'julia002',
1170+
'age': 2,
1171+
'name': 'julia'},
1172+
'id': 'julia002', 'order': [1.0, 0]},
1173+
{'fields': {'name': 'julia'}, 'doc': {'_id': 'julia004',
1174+
'age': 4,
1175+
'name': 'julia'},
1176+
'id': 'julia004', 'order': [1.0, 1]},
1177+
{'fields': {'name': 'julia'},
1178+
'doc': {'_id': 'julia005', 'age': 5,
1179+
'name': 'julia'},
1180+
'id': 'julia005', 'order': [1.0, 1]}], 'total_rows': 100}
1181+
)
1182+
1183+
def test_get_search_result_executes_search_query_with_group_option(self):
1184+
"""
1185+
Test get_search_result executes a search query with grouping parameters.
1186+
"""
1187+
self.create_search_index()
1188+
self.populate_db_with_documents(100)
1189+
resp = self.db.get_search_result(
1190+
'searchddoc001',
1191+
'searchindex001',
1192+
query='name:julia*',
1193+
group_field='_id',
1194+
group_limit=5
1195+
)
1196+
# for group parameter options, 'rows' results are within 'groups' key
1197+
self.assertEqual(len(resp['groups']), 5)
1198+
self.assertEqual(
1199+
resp,
1200+
{'total_rows': 100, 'groups': [
1201+
{'rows': [{'fields': {'name': 'julia'}, 'id': 'julia000',
1202+
'order': [1.0, 0]}], 'total_rows': 1,
1203+
'by': 'julia000'},
1204+
{'rows': [{'fields': {'name': 'julia'}, 'id': 'julia004',
1205+
'order': [1.0, 1]}], 'total_rows': 1,
1206+
'by': 'julia004'},
1207+
{'rows': [{'fields': {'name': 'julia'}, 'id': 'julia008',
1208+
'order': [1.0, 2]}], 'total_rows': 1,
1209+
'by': 'julia008'},
1210+
{'rows': [{'fields': {'name': 'julia'}, 'id': 'julia010',
1211+
'order': [1.0, 3]}], 'total_rows': 1,
1212+
'by': 'julia010'},
1213+
{'rows': [{'fields': {'name': 'julia'}, 'id': 'julia014',
1214+
'order': [1.0, 4]}], 'total_rows': 1,
1215+
'by': 'julia014'}
1216+
]}
1217+
)
1218+
10681219
if __name__ == '__main__':
10691220
unittest.main()

tests/unit/unit_t_db_base.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -228,3 +228,17 @@ def create_views(self):
228228
self.view004 = self.ddoc.get_view('view004')
229229
self.view005 = self.ddoc.get_view('view005')
230230
self.view006 = self.ddoc.get_view('view006')
231+
232+
def create_search_index(self):
233+
"""
234+
Create a design document with search indexes for use
235+
with search query tests.
236+
"""
237+
self.search_ddoc = DesignDocument(self.db, 'searchddoc001')
238+
self.search_ddoc['indexes'] = {'searchindex001': {
239+
'index': 'function (doc) {\n index("default", doc._id); \n '
240+
'if (doc.name) {\n index("name", doc.name, {"store": true}); \n} '
241+
'if (doc.age) {\n index("age", doc.age, {"facet": true}); \n} \n} '
242+
}
243+
}
244+
self.search_ddoc.save()

0 commit comments

Comments
 (0)