Skip to content

Commit 9488594

Browse files
RyanHolstienesteban
andcommitted
feat(search): implement multi-client search engine shim for ES8 support (#14904)
Co-authored-by: Esteban Gutierrez <[email protected]>
1 parent 2e731de commit 9488594

File tree

153 files changed

+7421
-1485
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

153 files changed

+7421
-1485
lines changed

.github/workflows/build-and-test.yml

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,8 +64,15 @@ jobs:
6464
- name: Free up disk space
6565
run: |
6666
sudo apt-get remove 'dotnet-*' azure-cli || true
67+
sudo rm -rf /usr/local/.ghcup || true
68+
sudo rm -rf /usr/share/dotnet || true
69+
sudo rm -rf /usr/share/swift || true
70+
sudo rm -rf /usr/local/julia* || true
71+
sudo rm -rf /usr/local/share/powershell || true
72+
sudo rm -rf /usr/share/miniconda || true
6773
sudo rm -rf /usr/local/lib/android/ || true
68-
sudo docker image prune -a -f || true
74+
sudo docker system prune -a -f || true
75+
df -h
6976
- uses: szenius/[email protected]
7077
with:
7178
timezoneLinux: ${{ matrix.timezone }}
@@ -89,6 +96,13 @@ jobs:
8996
distribution: "zulu"
9097
java-version: 17
9198
- uses: gradle/actions/setup-gradle@v4
99+
- name: Disk Space Analysis
100+
run: |
101+
echo "=== Disk Usage Overview ==="
102+
df -h
103+
104+
echo -e "\n=== Docker Disk Usage ==="
105+
docker system df -v
92106
- name: Gradle build (and test) for NOT metadata ingestion
93107
if: ${{ matrix.command == 'except_metadata_ingestion' && needs.setup.outputs.backend_change == 'true' }}
94108
# datahub-schematron:cli excluded due to dependency on metadata-ingestion
@@ -112,6 +126,13 @@ jobs:
112126
-x :metadata-integration:java:datahub-schematron:cli:test
113127
env:
114128
CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
129+
- name: Disk Space Analysis
130+
run: |
131+
echo "=== Disk Usage Overview ==="
132+
df -h
133+
134+
echo -e "\n=== Docker Disk Usage ==="
135+
docker system df -v
115136
- name: Gradle build (and test) for frontend
116137
if: ${{ matrix.command == 'frontend' && needs.setup.outputs.frontend_change == 'true' }}
117138
run: |

.github/workflows/docker-unified.yml

Lines changed: 23 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -422,11 +422,17 @@ jobs:
422422
MIXPANEL_PROJECT_ID: ${{ secrets.MIXPANEL_PROJECT_ID }}
423423
steps:
424424
- name: Free up disk space
425-
if: ${{ !contains(needs.setup.outputs.test_runner_type, 'depot') }}
426425
run: |
427426
sudo apt-get remove 'dotnet-*' azure-cli || true
427+
sudo rm -rf /usr/local/.ghcup || true
428+
sudo rm -rf /usr/share/dotnet || true
429+
sudo rm -rf /usr/share/swift || true
430+
sudo rm -rf /usr/local/julia* || true
431+
sudo rm -rf /usr/local/share/powershell || true
432+
sudo rm -rf /usr/share/miniconda || true
428433
sudo rm -rf /usr/local/lib/android/ || true
429-
sudo docker image prune -a -f || true
434+
sudo docker system prune -a -f || true
435+
df -h
430436
431437
- uses: actions/cache/restore@v4
432438
with:
@@ -468,6 +474,14 @@ jobs:
468474
username: ${{ secrets.ACRYL_DOCKER_USERNAME }}
469475
password: ${{ secrets.ACRYL_DOCKER_PASSWORD }}
470476

477+
- name: Disk Space Analysis
478+
run: |
479+
echo "=== Disk Usage Overview ==="
480+
df -h
481+
482+
echo -e "\n=== Docker Disk Usage ==="
483+
docker system df -v
484+
471485
- name: build images
472486
if: ${{ needs.setup.outputs.use_depot_cache != 'true' }}
473487
run: |
@@ -482,6 +496,13 @@ jobs:
482496
depot pull --project ${{ env.DEPOT_PROJECT_ID }} ${{ needs.base_build.outputs.build_id }}
483497
docker images
484498
499+
- name: Disk Space Analysis
500+
run: |
501+
echo "=== Disk Usage Overview ==="
502+
df -h
503+
504+
echo -e "\n=== Docker Disk Usage ==="
505+
docker system df -v
485506
- name: run quickstart
486507
env:
487508
DATAHUB_TELEMETRY_ENABLED: false

build.gradle

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ buildscript {
4646
ext.neo4jApocVersion = '5.20.0'
4747
ext.testContainersVersion = '1.21.1'
4848
ext.elasticsearchVersion = '2.11.1' // ES 7.10, Opensearch 1.x, 2.x
49+
ext.elasticsearch8Version = '8.17.4' // ES 8.x Java client
4950
ext.jacksonVersion = '2.18.4'
5051
ext.jettyVersion = '12.0.21'
5152
// see also datahub-frontend/play.gradle
@@ -151,6 +152,8 @@ project.ext.externalDependency = [
151152
'ebeanDdl': 'io.ebean:ebean-ddl-generator:' + ebeanVersion,
152153
'ebeanQueryBean': 'io.ebean:querybean-generator:' + ebeanVersion,
153154
'elasticSearchRest': 'org.opensearch.client:opensearch-rest-high-level-client:' + elasticsearchVersion,
155+
// Multi-client shim dependencies
156+
'elasticsearch8Client': 'co.elastic.clients:elasticsearch-java:' + elasticsearch8Version,
154157
'findbugsAnnotations': 'com.google.code.findbugs:annotations:3.0.1',
155158
'graphqlJava': 'com.graphql-java:graphql-java:22.3',
156159
'graphqlJavaScalars': 'com.graphql-java:graphql-java-extended-scalars:22.0',

datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/analytics/service/AnalyticsService.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
import com.linkedin.datahub.graphql.types.entitytype.EntityTypeMapper;
1414
import com.linkedin.metadata.datahubusage.DataHubUsageEventConstants;
1515
import com.linkedin.metadata.utils.elasticsearch.IndexConvention;
16+
import com.linkedin.metadata.utils.elasticsearch.SearchClientShim;
1617
import java.util.List;
1718
import java.util.Map;
1819
import java.util.Optional;
@@ -24,7 +25,6 @@
2425
import org.opensearch.action.search.SearchRequest;
2526
import org.opensearch.action.search.SearchResponse;
2627
import org.opensearch.client.RequestOptions;
27-
import org.opensearch.client.RestHighLevelClient;
2828
import org.opensearch.index.query.BoolQueryBuilder;
2929
import org.opensearch.index.query.QueryBuilder;
3030
import org.opensearch.index.query.QueryBuilders;
@@ -45,7 +45,7 @@
4545
@RequiredArgsConstructor
4646
public class AnalyticsService {
4747

48-
private final RestHighLevelClient _elasticClient;
48+
private final SearchClientShim<?> _elasticClient;
4949
private final IndexConvention _indexConvention;
5050

5151
private static final String FILTERED = "filtered";

datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/NonBlockingConfigs.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,8 @@
1818
import com.linkedin.metadata.search.SearchService;
1919
import com.linkedin.metadata.search.elasticsearch.ElasticSearchService;
2020
import com.linkedin.metadata.search.elasticsearch.update.ESWriteDAO;
21+
import com.linkedin.metadata.utils.elasticsearch.SearchClientShim;
2122
import io.datahubproject.metadata.context.OperationContext;
22-
import org.opensearch.client.RestHighLevelClient;
2323
import org.springframework.beans.factory.annotation.Autowired;
2424
import org.springframework.beans.factory.annotation.Qualifier;
2525
import org.springframework.beans.factory.annotation.Value;
@@ -75,7 +75,7 @@ public NonBlockingSystemUpgrade backfillProcessInstancesHasRunEvents(
7575
final OperationContext opContext,
7676
EntityService<?> entityService,
7777
ElasticSearchService elasticSearchService,
78-
RestHighLevelClient restHighLevelClient,
78+
SearchClientShim<?> restHighLevelClient,
7979
@Value("${systemUpdate.processInstanceHasRunEvents.enabled}") final boolean enabled,
8080
@Value("${systemUpdate.processInstanceHasRunEvents.reprocess.enabled}")
8181
boolean reprocessEnabled,

datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/dataprocessinstances/BackfillDataProcessInstances.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,9 @@
55
import com.linkedin.datahub.upgrade.system.NonBlockingSystemUpgrade;
66
import com.linkedin.metadata.entity.EntityService;
77
import com.linkedin.metadata.search.elasticsearch.ElasticSearchService;
8+
import com.linkedin.metadata.utils.elasticsearch.SearchClientShim;
89
import io.datahubproject.metadata.context.OperationContext;
910
import java.util.List;
10-
import org.opensearch.client.RestHighLevelClient;
1111

1212
public class BackfillDataProcessInstances implements NonBlockingSystemUpgrade {
1313

@@ -17,7 +17,7 @@ public BackfillDataProcessInstances(
1717
OperationContext opContext,
1818
EntityService<?> entityService,
1919
ElasticSearchService elasticSearchService,
20-
RestHighLevelClient restHighLevelClient,
20+
SearchClientShim<?> restHighLevelClient,
2121
boolean enabled,
2222
boolean reprocessEnabled,
2323
Integer batchSize,

datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/dataprocessinstances/BackfillDataProcessInstancesHasRunEventsStep.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
import com.linkedin.metadata.entity.EntityService;
1515
import com.linkedin.metadata.search.elasticsearch.ElasticSearchService;
1616
import com.linkedin.metadata.utils.elasticsearch.IndexConvention;
17+
import com.linkedin.metadata.utils.elasticsearch.SearchClientShim;
1718
import com.linkedin.upgrade.DataHubUpgradeState;
1819
import io.datahubproject.metadata.context.OperationContext;
1920
import java.io.IOException;
@@ -28,7 +29,6 @@
2829
import org.opensearch.action.search.SearchRequest;
2930
import org.opensearch.action.search.SearchResponse;
3031
import org.opensearch.client.RequestOptions;
31-
import org.opensearch.client.RestHighLevelClient;
3232
import org.opensearch.index.query.QueryBuilder;
3333
import org.opensearch.index.query.QueryBuilders;
3434
import org.opensearch.search.aggregations.Aggregation;
@@ -47,7 +47,7 @@ public class BackfillDataProcessInstancesHasRunEventsStep implements UpgradeStep
4747
private final OperationContext opContext;
4848
private final EntityService<?> entityService;
4949
private final ElasticSearchService elasticSearchService;
50-
private final RestHighLevelClient restHighLevelClient;
50+
private final SearchClientShim<?> restHighLevelClient;
5151

5252
private final boolean reprocessEnabled;
5353
private final Integer batchSize;
@@ -60,7 +60,7 @@ public BackfillDataProcessInstancesHasRunEventsStep(
6060
OperationContext opContext,
6161
EntityService<?> entityService,
6262
ElasticSearchService elasticSearchService,
63-
RestHighLevelClient restHighLevelClient,
63+
SearchClientShim<?> restHighLevelClient,
6464
boolean reprocessEnabled,
6565
Integer batchSize,
6666
Integer batchDelayMs,

datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/elasticsearch/steps/BuildIndicesPostStep.java

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -63,8 +63,7 @@ public Function<UpgradeContext, UpgradeStepResult> executable() {
6363
boolean ack =
6464
esComponents
6565
.getSearchClient()
66-
.indices()
67-
.putSettings(request, RequestOptions.DEFAULT)
66+
.updateIndexSettings(request, RequestOptions.DEFAULT)
6867
.isAcknowledged();
6968
log.info(
7069
"Updated index {} with new settings. Settings: {}, Acknowledged: {}",

datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/elasticsearch/steps/BuildIndicesPreStep.java

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -80,8 +80,7 @@ public Function<UpgradeContext, UpgradeStepResult> executable() {
8080
boolean cloneAck =
8181
esComponents
8282
.getSearchClient()
83-
.indices()
84-
.clone(resizeRequest, RequestOptions.DEFAULT)
83+
.cloneIndex(resizeRequest, RequestOptions.DEFAULT)
8584
.isAcknowledged();
8685
log.info("Cloned index {} into {}, Acknowledged: {}", indexName, clonedName, cloneAck);
8786
if (!cloneAck) {
@@ -110,8 +109,7 @@ private boolean blockWrites(String indexName) throws InterruptedException, IOExc
110109
ack =
111110
esComponents
112111
.getSearchClient()
113-
.indices()
114-
.putSettings(request, RequestOptions.DEFAULT)
112+
.updateIndexSettings(request, RequestOptions.DEFAULT)
115113
.isAcknowledged();
116114
log.info(
117115
"Updated index {} with new settings. Settings: {}, Acknowledged: {}",

datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/elasticsearch/steps/CleanIndicesStep.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,24 +9,24 @@
99
import com.linkedin.metadata.config.search.ElasticSearchConfiguration;
1010
import com.linkedin.metadata.search.elasticsearch.indexbuilder.ESIndexBuilder;
1111
import com.linkedin.metadata.shared.ElasticSearchIndexed;
12+
import com.linkedin.metadata.utils.elasticsearch.SearchClientShim;
1213
import com.linkedin.structured.StructuredPropertyDefinition;
1314
import com.linkedin.upgrade.DataHubUpgradeState;
1415
import com.linkedin.util.Pair;
1516
import java.util.List;
1617
import java.util.Set;
1718
import java.util.function.Function;
1819
import lombok.extern.slf4j.Slf4j;
19-
import org.opensearch.client.RestHighLevelClient;
2020

2121
@Slf4j
2222
public class CleanIndicesStep implements UpgradeStep {
23-
private final RestHighLevelClient searchClient;
23+
private final SearchClientShim<?> searchClient;
2424
private final ElasticSearchConfiguration esConfig;
2525
private final List<ElasticSearchIndexed> indexedServices;
2626
private final Set<Pair<Urn, StructuredPropertyDefinition>> structuredProperties;
2727

2828
public CleanIndicesStep(
29-
final RestHighLevelClient searchClient,
29+
final SearchClientShim<?> searchClient,
3030
final ElasticSearchConfiguration esConfig,
3131
final List<ElasticSearchIndexed> indexedServices,
3232
final Set<Pair<Urn, StructuredPropertyDefinition>> structuredProperties) {

0 commit comments

Comments
 (0)