Skip to content

Commit 37a618d

Browse files
committed
✅ Fix mlt test
1 parent ec268be commit 37a618d

File tree

1 file changed

+37
-5
lines changed

1 file changed

+37
-5
lines changed

tests/test_more_like_this.py

Lines changed: 37 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -401,7 +401,19 @@ def test_more_like_this_auth(
401401
"""Test that MoreLikeThisQuery respects authentication filters"""
402402
monkeypatch.setenv("OPENALEPH_SEARCH_AUTH", "true")
403403

404-
# Create test documents for different datasets
404+
from openaleph_search.core import get_es
405+
from openaleph_search.index.admin import delete_index, upgrade_search
406+
from openaleph_search.index.indexes import entities_read_index
407+
408+
# Recreate indexes from scratch to eliminate stale segment metadata from
409+
# previous tests that can corrupt MLT term statistics
410+
delete_index()
411+
upgrade_search()
412+
es = get_es()
413+
414+
# Create test documents for different datasets.
415+
# Documents need enough overlapping content (words >= 5 chars) for MLT
416+
# to find matches with min_word_length=5 on a small test dataset.
405417
public_docs = [
406418
make_entity(
407419
{
@@ -410,7 +422,12 @@ def test_more_like_this_auth(
410422
"properties": {
411423
"title": ["Public Machine Learning Paper"],
412424
"bodyText": [
413-
"This public document discusses machine learning algorithms, neural networks, and deep learning architectures for computer vision applications."
425+
"This paper discusses various machine learning algorithms including "
426+
"neural networks, decision trees, and support vector machines. We "
427+
"present novel approaches to deep learning architectures and their "
428+
"applications in computer vision and natural language processing "
429+
"tasks. Modern optimization techniques enable training of larger "
430+
"models with improved performance across multiple benchmarks."
414431
],
415432
},
416433
}
@@ -422,7 +439,12 @@ def test_more_like_this_auth(
422439
"properties": {
423440
"title": ["Public AI Research"],
424441
"bodyText": [
425-
"Public research on artificial intelligence, machine learning models, and neural network optimization techniques."
442+
"Artificial intelligence and machine learning have revolutionized "
443+
"many fields. This survey covers neural networks, deep learning "
444+
"models, and their applications in computer vision research. We "
445+
"also discuss natural language processing and automated decision "
446+
"making systems. Training optimization techniques continue to "
447+
"advance with novel architectures and improved benchmarks."
426448
],
427449
},
428450
}
@@ -437,7 +459,11 @@ def test_more_like_this_auth(
437459
"properties": {
438460
"title": ["Private ML Study"],
439461
"bodyText": [
440-
"Private study on machine learning applications, deep neural networks, and computer vision algorithms."
462+
"Private study on machine learning applications using deep neural "
463+
"networks and computer vision algorithms. This research explores "
464+
"novel architectures for natural language processing and presents "
465+
"optimization techniques for training large models with improved "
466+
"performance across standard benchmarks and evaluation metrics."
441467
],
442468
},
443469
}
@@ -449,7 +475,11 @@ def test_more_like_this_auth(
449475
"properties": {
450476
"title": ["Private Data Science"],
451477
"bodyText": [
452-
"Confidential data science research involving machine learning techniques and neural network architectures."
478+
"Confidential research involving machine learning techniques and "
479+
"neural network architectures for computer vision and natural "
480+
"language processing applications. Advanced optimization methods "
481+
"enable training of deep learning models with novel approaches "
482+
"to improve performance on established benchmarks."
453483
],
454484
},
455485
}
@@ -460,6 +490,8 @@ def test_more_like_this_auth(
460490
index_bulk("test_public", public_docs, sync=True)
461491
index_bulk("test_private", private_docs, sync=True)
462492

493+
es.indices.refresh(index=entities_read_index())
494+
463495
# Use first public doc as source for more-like-this
464496
source_entity = public_docs[0]
465497

0 commit comments

Comments
 (0)