@@ -401,7 +401,19 @@ def test_more_like_this_auth(
401401 """Test that MoreLikeThisQuery respects authentication filters"""
402402 monkeypatch .setenv ("OPENALEPH_SEARCH_AUTH" , "true" )
403403
404- # Create test documents for different datasets
404+ from openaleph_search .core import get_es
405+ from openaleph_search .index .admin import delete_index , upgrade_search
406+ from openaleph_search .index .indexes import entities_read_index
407+
408+ # Recreate indexes from scratch to eliminate stale segment metadata from
409+ # previous tests that can corrupt MLT term statistics
410+ delete_index ()
411+ upgrade_search ()
412+ es = get_es ()
413+
414+ # Create test documents for different datasets.
415+ # Documents need enough overlapping content (words >= 5 chars) for MLT
416+ # to find matches with min_word_length=5 on a small test dataset.
405417 public_docs = [
406418 make_entity (
407419 {
@@ -410,7 +422,12 @@ def test_more_like_this_auth(
410422 "properties" : {
411423 "title" : ["Public Machine Learning Paper" ],
412424 "bodyText" : [
413- "This public document discusses machine learning algorithms, neural networks, and deep learning architectures for computer vision applications."
425+ "This paper discusses various machine learning algorithms including "
426+ "neural networks, decision trees, and support vector machines. We "
427+ "present novel approaches to deep learning architectures and their "
428+ "applications in computer vision and natural language processing "
429+ "tasks. Modern optimization techniques enable training of larger "
430+ "models with improved performance across multiple benchmarks."
414431 ],
415432 },
416433 }
@@ -422,7 +439,12 @@ def test_more_like_this_auth(
422439 "properties" : {
423440 "title" : ["Public AI Research" ],
424441 "bodyText" : [
425- "Public research on artificial intelligence, machine learning models, and neural network optimization techniques."
442+ "Artificial intelligence and machine learning have revolutionized "
443+ "many fields. This survey covers neural networks, deep learning "
444+ "models, and their applications in computer vision research. We "
445+ "also discuss natural language processing and automated decision "
446+ "making systems. Training optimization techniques continue to "
447+ "advance with novel architectures and improved benchmarks."
426448 ],
427449 },
428450 }
@@ -437,7 +459,11 @@ def test_more_like_this_auth(
437459 "properties" : {
438460 "title" : ["Private ML Study" ],
439461 "bodyText" : [
440- "Private study on machine learning applications, deep neural networks, and computer vision algorithms."
462+ "Private study on machine learning applications using deep neural "
463+ "networks and computer vision algorithms. This research explores "
464+ "novel architectures for natural language processing and presents "
465+ "optimization techniques for training large models with improved "
466+ "performance across standard benchmarks and evaluation metrics."
441467 ],
442468 },
443469 }
@@ -449,7 +475,11 @@ def test_more_like_this_auth(
449475 "properties" : {
450476 "title" : ["Private Data Science" ],
451477 "bodyText" : [
452- "Confidential data science research involving machine learning techniques and neural network architectures."
478+ "Confidential research involving machine learning techniques and "
479+ "neural network architectures for computer vision and natural "
480+ "language processing applications. Advanced optimization methods "
481+ "enable training of deep learning models with novel approaches "
482+ "to improve performance on established benchmarks."
453483 ],
454484 },
455485 }
@@ -460,6 +490,8 @@ def test_more_like_this_auth(
460490 index_bulk ("test_public" , public_docs , sync = True )
461491 index_bulk ("test_private" , private_docs , sync = True )
462492
493+ es .indices .refresh (index = entities_read_index ())
494+
463495 # Use first public doc as source for more-like-this
464496 source_entity = public_docs [0 ]
465497
0 commit comments