Skip to content

Commit 4b0894f

Browse files
authored
fix: support long texts for labels in ElasticsearchDocumentStore (#3346)
1 parent b93bbb1 commit 4b0894f

File tree

2 files changed

+26
-2
lines changed

2 files changed

+26
-2
lines changed

haystack/document_stores/elasticsearch.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -521,8 +521,8 @@ def _create_label_index(self, index_name: str, headers: Optional[Dict[str, str]]
521521
"mappings": {
522522
"properties": {
523523
"query": {"type": "text"},
524-
"answer": {"type": "flattened"}, # light-weight but less search options than full object
525-
"document": {"type": "flattened"},
524+
"answer": {"type": "nested"},
525+
"document": {"type": "nested"},
526526
"is_correct_answer": {"type": "boolean"},
527527
"is_correct_document": {"type": "boolean"},
528528
"origin": {"type": "keyword"}, # e.g. user-feedback or gold-label

test/document_stores/test_document_store.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -799,6 +799,30 @@ def test_labels(document_store: BaseDocumentStore):
799799
assert len(labels) == 0
800800

801801

802+
@pytest.mark.parametrize("document_store", ["elasticsearch", "opensearch"], indirect=True)
803+
def test_labels_with_long_texts(document_store: BaseDocumentStore):
804+
document_store.delete_index("label")
805+
label = Label(
806+
query="question1",
807+
answer=Answer(
808+
answer="answer",
809+
type="extractive",
810+
score=0.0,
811+
context="something " * 10_000,
812+
offsets_in_document=[Span(start=12, end=14)],
813+
offsets_in_context=[Span(start=12, end=14)],
814+
),
815+
is_correct_answer=True,
816+
is_correct_document=True,
817+
document=Document(content="something " * 10_000, id="123"),
818+
origin="gold-label",
819+
)
820+
document_store.write_labels(labels=[label], index="label")
821+
labels = document_store.get_all_labels(index="label")
822+
assert len(labels) == 1
823+
assert label == labels[0]
824+
825+
802826
# exclude weaviate because it does not support storing labels
803827
@pytest.mark.parametrize("document_store", ["elasticsearch", "faiss", "memory", "milvus1", "pinecone"], indirect=True)
804828
def test_multilabel(document_store: BaseDocumentStore):

0 commit comments

Comments
 (0)