Skip to content

Commit 886b587

Browse files
Add chunk getting with integer values (#55)
* Add chunk getting with integer values * Add field getting for chunk json * Update test_doc_utils.py * Update test_doc_utils.py * Update test_doc_utils.py
1 parent 7a63258 commit 886b587

File tree

3 files changed

+42
-2
lines changed

3 files changed

+42
-2
lines changed

tests/test_doc_utils.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,25 @@
11
"""Testing for document utilities.
22
"""
3+
import pytest
4+
from vectorai.errors import MissingFieldError
35

46
def test_set_field(test_client):
57
sample = {}
68
test_client.set_field("simple", doc=sample, value=[0, 2])
79
assert test_client.get_field("simple", sample) == [0, 2]
810

11+
def test_get_field_chunk(test_client):
12+
sample = {
13+
'kfc': [{'food': 'chicken'}, {'food': 'prawns'}]}
14+
assert test_client.get_field('kfc.0.food', sample) == 'chicken'
15+
assert test_client.get_field('kfc.1.food', sample) == 'prawns'
16+
17+
def test_get_field_chunk_error(test_client):
18+
sample = {
19+
'kfc': [{'food': 'chicken'}, {'food': 'prawns'}]}
20+
with pytest.raises(MissingFieldError):
21+
test_client.get_field('kfc.food', sample)
22+
923
def test_get_fields(test_client):
1024
doc = test_client.create_sample_documents(1)[0]
1125
assert len(test_client.get_fields(['size.cm', 'size.feet'], doc)) == 2

vectorai/doc_utils.py

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,12 +33,29 @@ def get_field(self, field: str, doc: Dict):
3333
for f in field.split("."):
3434
try:
3535
d = d[f]
36-
except:
36+
except KeyError:
3737
try:
3838
return doc[field]
39-
except:
39+
except KeyError:
4040
raise MissingFieldError("Document is missing " + field)
41+
except TypeError:
42+
if self._is_string_integer(f):
43+
# Get the Get the chunk document out.
44+
d = d[int(f)]
45+
else:
46+
raise MissingFieldError("Document is missing " + f + ' of ' + field)
4147
return d
48+
49+
@classmethod
50+
def _is_string_integer(cls, x):
51+
"""Test if a string is numeric
52+
"""
53+
try:
54+
int(x)
55+
return True
56+
except:
57+
return False
58+
4259

4360
@classmethod
4461
def get_fields(self, fields: List[str], doc: Dict) -> List[Any]:

vectorai/utils.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -348,6 +348,15 @@ def show_json(self, json: dict, selected_fields: List[str]=None, image_fields: L
348348
image_fields=image_fields, audio_fields=audio_fields, image_width=image_width, include_vector=include_vector)
349349
return self.show_df(self.results_to_df(json).head(nrows)[image_fields + audio_fields + selected_fields],
350350
image_fields=image_fields, audio_fields=audio_fields, image_width=image_width, include_vector=include_vector)
351+
352+
def show_chunk_json(self, json: dict, selected_fields: List[str]=None, image_fields: List[str]=[],
353+
audio_fields: List[str]=[], nrows: int=5, image_width: int=60, include_vector=False):
354+
"""Show results if the documents are chunked.
355+
For images, concatenates the chunk images into the same numpy array
356+
For text, puts them one after the other with smaller index.
357+
No Audio chunking for now.
358+
"""
359+
raise NotImplementedError
351360

352361
def get_random_int(low=0, high=9999):
353362
"""

0 commit comments

Comments
 (0)