Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import com.yahoo.document.datatypes.FieldValue;
import com.yahoo.language.Language;
import com.yahoo.language.Linguistics;
import com.yahoo.language.detect.Detection;


import java.time.Instant;
import java.util.HashMap;
Expand Down Expand Up @@ -113,13 +113,22 @@ public Language resolveLanguage(Linguistics linguistics) {
if (assignedLanguage != Language.UNKNOWN) return assignedLanguage;
if (detectedLanguage != Language.UNKNOWN) return detectedLanguage;
if (linguistics == null) return Language.ENGLISH;
detectedLanguage = detectLanguage(linguistics);
return detectedLanguage;
}

Detection detection = linguistics.getDetector().detect(String.valueOf(currentValue), null);
// Caching the result as language detection is expensive
private Language detectLanguage(Linguistics linguistics) {
record DetectedLanguageCacheKey(String text) {}
var text = String.valueOf(currentValue);
var cacheKey = new DetectedLanguageCacheKey(text);
if (cache.get(cacheKey) instanceof Language cached) return cached;
var detection = linguistics.getDetector().detect(text, null);
if (detection == null) return Language.ENGLISH;

detectedLanguage = detection.getLanguage();
if (detectedLanguage == Language.UNKNOWN) detectedLanguage = Language.ENGLISH;
return detectedLanguage;
var language = detection.getLanguage();
if (language == Language.UNKNOWN) language = Language.ENGLISH;
cache.put(cacheKey, language);
return language;
}

public boolean isReindexingOperation() { return isReindexingOperation; }
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,22 @@
import com.yahoo.document.datatypes.FieldValue;
import com.yahoo.document.datatypes.StringFieldValue;
import com.yahoo.language.Language;
import com.yahoo.language.Linguistics;
import com.yahoo.language.detect.Detection;
import com.yahoo.language.detect.Detector;
import com.yahoo.language.simple.SimpleLinguistics;
import org.junit.Test;

import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNull;
import static org.junit.Assert.assertSame;
import static org.junit.Assert.fail;
import static org.mockito.ArgumentMatchers.anyString;
import static org.mockito.ArgumentMatchers.isNull;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.times;
import static org.mockito.Mockito.verify;
import static org.mockito.Mockito.when;

/**
* @author Simon Thoresen Hult
Expand Down Expand Up @@ -117,6 +126,24 @@ public void requireThatClearResetsDetectedLanguage() {
assertEquals(Language.UNKNOWN, ctx.getLanguage());
}

@Test
public void requireThatDetectedLanguageIsCachedAcrossClear() {
var detector = mock(Detector.class);
when(detector.detect(anyString(), isNull())).thenReturn(new Detection(Language.JAPANESE, "UTF-8", false));
var linguistics = mock(Linguistics.class);
when(linguistics.getDetector()).thenReturn(detector);

var ctx = new ExecutionContext();
ctx.setCurrentValue(new StringFieldValue("text"));
assertEquals(Language.JAPANESE, ctx.resolveLanguage(linguistics));

ctx.clear();
ctx.setCurrentValue(new StringFieldValue("text"));
assertEquals(Language.JAPANESE, ctx.resolveLanguage(linguistics));

verify(detector, times(1)).detect(anyString(), isNull());
}

@Test
public void requireThatExplicitLanguagePreventsDetection() {
ExecutionContext ctx = new ExecutionContext();
Expand Down