|
| 1 | +/* |
| 2 | + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one |
| 3 | + * or more contributor license agreements. Licensed under the Elastic License |
| 4 | + * 2.0; you may not use this file except in compliance with the Elastic License |
| 5 | + * 2.0. |
| 6 | + */ |
| 7 | + |
| 8 | +package org.elasticsearch.xpack.esql.plugin; |
| 9 | + |
| 10 | +import org.elasticsearch.action.index.IndexRequest; |
| 11 | +import org.elasticsearch.action.support.WriteRequest; |
| 12 | +import org.elasticsearch.client.internal.IndicesAdminClient; |
| 13 | +import org.elasticsearch.common.settings.Settings; |
| 14 | +import org.elasticsearch.xpack.esql.action.AbstractEsqlIntegTestCase; |
| 15 | +import org.junit.Before; |
| 16 | + |
| 17 | +import java.util.Collections; |
| 18 | +import java.util.List; |
| 19 | +import java.util.function.Consumer; |
| 20 | + |
| 21 | +import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked; |
| 22 | + |
| 23 | +//@TestLogging(value = "org.elasticsearch.xpack.esql:TRACE,org.elasticsearch.compute:TRACE", reason = "debug") |
| 24 | +public class ExtractSnippetsFunctionIT extends AbstractEsqlIntegTestCase { |
| 25 | + |
| 26 | + private static final List<Object> EMPTY_RESULT = Collections.singletonList(null); |
| 27 | + |
| 28 | + @Before |
| 29 | + public void setupIndex() { |
| 30 | + createAndPopulateIndex(this::ensureYellow); |
| 31 | + } |
| 32 | + |
| 33 | + public void testExtractSnippets() { |
| 34 | + var query = """ |
| 35 | + FROM test |
| 36 | + | EVAL my_snippet = extract_snippets(content, "fox", 1, 15) |
| 37 | + | SORT my_snippet |
| 38 | + | KEEP my_snippet |
| 39 | + """; |
| 40 | + |
| 41 | + try (var resp = run(query)) { |
| 42 | + assertColumnNames(resp.columns(), List.of("my_snippet")); |
| 43 | + assertColumnTypes(resp.columns(), List.of("keyword")); |
| 44 | + assertValues( |
| 45 | + resp.values(), |
| 46 | + List.of(List.of("The quick brown"), List.of("This is a brown"), EMPTY_RESULT, EMPTY_RESULT, EMPTY_RESULT, EMPTY_RESULT) |
| 47 | + ); |
| 48 | + } |
| 49 | + } |
| 50 | + |
| 51 | + public void testExtractMultipleSnippets() { |
| 52 | + var query = """ |
| 53 | + FROM test |
| 54 | + | EVAL my_snippet = extract_snippets(content, "fox", 3, 15) |
| 55 | + | SORT my_snippet |
| 56 | + | KEEP my_snippet |
| 57 | + """; |
| 58 | + |
| 59 | + try (var resp = run(query)) { |
| 60 | + assertColumnNames(resp.columns(), List.of("my_snippet")); |
| 61 | + assertColumnTypes(resp.columns(), List.of("keyword")); |
| 62 | + assertValues( |
| 63 | + resp.values(), |
| 64 | + List.of( |
| 65 | + List.of(List.of("The quick brown", "Afterward, the")), |
| 66 | + List.of(List.of("This is a brown", "Sometimes the b")), |
| 67 | + EMPTY_RESULT, |
| 68 | + EMPTY_RESULT, |
| 69 | + EMPTY_RESULT, |
| 70 | + EMPTY_RESULT |
| 71 | + ) |
| 72 | + ); |
| 73 | + } |
| 74 | + } |
| 75 | + |
| 76 | + public void testExtractSnippetsWithMatch() { |
| 77 | + var query = """ |
| 78 | + FROM test METADATA _score |
| 79 | + | WHERE MATCH(content, "fox") |
| 80 | + | EVAL my_snippet = extract_snippets(content, "fox", 1, 15) |
| 81 | + | SORT my_snippet |
| 82 | + | KEEP my_snippet |
| 83 | + """; |
| 84 | + |
| 85 | + try (var resp = run(query)) { |
| 86 | + assertColumnNames(resp.columns(), List.of("my_snippet")); |
| 87 | + assertColumnTypes(resp.columns(), List.of("keyword")); |
| 88 | + assertValues(resp.values(), List.of(List.of("The quick brown"), List.of("This is a brown"))); |
| 89 | + } |
| 90 | + } |
| 91 | + |
| 92 | + public void testExtractMultipleSnippetsWithMatch() { |
| 93 | + var query = """ |
| 94 | + FROM test METADATA _score |
| 95 | + | WHERE MATCH(content, "fox") |
| 96 | + | EVAL my_snippet = extract_snippets(content, "fox", 3, 15) |
| 97 | + | SORT my_snippet |
| 98 | + | KEEP my_snippet |
| 99 | + """; |
| 100 | + |
| 101 | + try (var resp = run(query)) { |
| 102 | + assertColumnNames(resp.columns(), List.of("my_snippet")); |
| 103 | + assertColumnTypes(resp.columns(), List.of("keyword")); |
| 104 | + assertValues( |
| 105 | + resp.values(), |
| 106 | + List.of(List.of(List.of("The quick brown", "Afterward, the")), List.of(List.of("This is a brown", "Sometimes the b"))) |
| 107 | + ); |
| 108 | + } |
| 109 | + } |
| 110 | + |
| 111 | + public void testExtractSnippetDefaults() { |
| 112 | + var query = """ |
| 113 | + FROM test |
| 114 | + | EVAL my_snippet = extract_snippets(content, "fox") |
| 115 | + | SORT my_snippet |
| 116 | + | KEEP my_snippet |
| 117 | + """; |
| 118 | + |
| 119 | + try (var resp = run(query)) { |
| 120 | + assertColumnNames(resp.columns(), List.of("my_snippet")); |
| 121 | + assertColumnTypes(resp.columns(), List.of("keyword")); |
| 122 | + assertValues( |
| 123 | + resp.values(), |
| 124 | + List.of(List.of("is a brown"), List.of("quick brow"), EMPTY_RESULT, EMPTY_RESULT, EMPTY_RESULT, EMPTY_RESULT) |
| 125 | + ); |
| 126 | + } |
| 127 | + } |
| 128 | + |
| 129 | + public void testExtractSnippetDefaultLength() { |
| 130 | + var query = """ |
| 131 | + FROM test |
| 132 | + | EVAL my_snippet = extract_snippets(content, "fox", 3) |
| 133 | + | SORT my_snippet |
| 134 | + | KEEP my_snippet |
| 135 | + """; |
| 136 | + |
| 137 | + try (var resp = run(query)) { |
| 138 | + assertColumnNames(resp.columns(), List.of("my_snippet")); |
| 139 | + assertColumnTypes(resp.columns(), List.of("keyword")); |
| 140 | + assertValues( |
| 141 | + resp.values(), |
| 142 | + List.of( |
| 143 | + List.of(List.of("is a brown", "the brown")), |
| 144 | + List.of(List.of("quick brow", "the brown")), |
| 145 | + EMPTY_RESULT, |
| 146 | + EMPTY_RESULT, |
| 147 | + EMPTY_RESULT, |
| 148 | + EMPTY_RESULT |
| 149 | + ) |
| 150 | + ); |
| 151 | + } |
| 152 | + } |
| 153 | + |
| 154 | + static void createAndPopulateIndex(Consumer<String[]> ensureYellow) { |
| 155 | + var indexName = "test"; |
| 156 | + var client = client().admin().indices(); |
| 157 | + var createRequest = client.prepareCreate(indexName) |
| 158 | + .setSettings(Settings.builder().put("index.number_of_shards", 1)) |
| 159 | + .setMapping("id", "type=integer", "content", "type=text"); |
| 160 | + assertAcked(createRequest); |
| 161 | + client().prepareBulk().add(new IndexRequest(indexName).id("1").source("id", 1, "content", """ |
| 162 | + This is a brown fox that likes to run through the meadow. |
| 163 | + Sometimes the brown fox pauses to look around before continuing. |
| 164 | + """)).add(new IndexRequest(indexName).id("2").source("id", 2, "content", """ |
| 165 | + This is a brown dog that spends most of the day sleeping in the yard. |
| 166 | + The brown dog occasionally wakes up to bark at the mailman. |
| 167 | + """)).add(new IndexRequest(indexName).id("3").source("id", 3, "content", """ |
| 168 | + This dog is really brown and enjoys chasing sticks near the river. |
| 169 | + People often comment on how brown the dog looks in the sunlight. |
| 170 | + """)).add(new IndexRequest(indexName).id("4").source("id", 4, "content", """ |
| 171 | + The quick brown fox jumps over the lazy dog whenever it feels playful. |
| 172 | + Afterward, the brown fox runs off into the forest. |
| 173 | + """)).add(new IndexRequest(indexName).id("5").source("id", 5, "content", """ |
| 174 | + There is also a white cat that prefers to sit quietly by the window. |
| 175 | + Unlike the other animals, the white cat ignores everything around it. |
| 176 | + """)).add(new IndexRequest(indexName).id("6").source("id", 6, "content", """ |
| 177 | + The dog is brown but this document is very very long, filled with many words describing the scene. |
| 178 | + Even so, the brown dog is still the main focus of the story. |
| 179 | + """)).setRefreshPolicy(WriteRequest.RefreshPolicy.IMMEDIATE).get(); |
| 180 | + |
| 181 | + var lookupIndexName = "test_lookup"; |
| 182 | + createAndPopulateLookupIndex(client, lookupIndexName); |
| 183 | + |
| 184 | + ensureYellow.accept(new String[] { indexName, lookupIndexName }); |
| 185 | + } |
| 186 | + |
| 187 | + static void createAndPopulateLookupIndex(IndicesAdminClient client, String lookupIndexName) { |
| 188 | + var createRequest = client.prepareCreate(lookupIndexName) |
| 189 | + .setSettings(Settings.builder().put("index.number_of_shards", 1).put("index.mode", "lookup")) |
| 190 | + .setMapping("id", "type=integer", "lookup_content", "type=text"); |
| 191 | + assertAcked(createRequest); |
| 192 | + } |
| 193 | +} |
0 commit comments