Skip to content

Commit 0111498

Browse files
Merge branch 'main' into fix-persona-plugin
2 parents 36f6190 + 9a49169 commit 0111498

File tree

69 files changed

+2421
-186
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

69 files changed

+2421
-186
lines changed

bootstrap/sql/migrations/native/1.12.0/mysql/schemaChanges.sql

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -214,9 +214,11 @@ CREATE TABLE IF NOT EXISTS search_index_server_stats (
214214
serverId VARCHAR(256) NOT NULL,
215215
readerSuccess BIGINT DEFAULT 0,
216216
readerFailed BIGINT DEFAULT 0,
217+
readerWarnings BIGINT DEFAULT 0,
217218
sinkTotal BIGINT DEFAULT 0,
218219
sinkSuccess BIGINT DEFAULT 0,
219220
sinkFailed BIGINT DEFAULT 0,
221+
sinkWarnings BIGINT DEFAULT 0,
220222
entityBuildFailures BIGINT DEFAULT 0,
221223
partitionsCompleted INT DEFAULT 0,
222224
partitionsFailed INT DEFAULT 0,

bootstrap/sql/migrations/native/1.12.0/postgres/schemaChanges.sql

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -234,9 +234,11 @@ CREATE TABLE IF NOT EXISTS search_index_server_stats (
234234
serverId VARCHAR(256) NOT NULL,
235235
readerSuccess BIGINT DEFAULT 0,
236236
readerFailed BIGINT DEFAULT 0,
237+
readerWarnings BIGINT DEFAULT 0,
237238
sinkTotal BIGINT DEFAULT 0,
238239
sinkSuccess BIGINT DEFAULT 0,
239240
sinkFailed BIGINT DEFAULT 0,
241+
sinkWarnings BIGINT DEFAULT 0,
240242
entityBuildFailures BIGINT DEFAULT 0,
241243
partitionsCompleted INT DEFAULT 0,
242244
partitionsFailed INT DEFAULT 0,

docker/development/distributed-test/local/server1.yaml

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -370,6 +370,7 @@ elasticsearch:
370370
# IAM auth is automatically enabled when AWS_DEFAULT_REGION is set
371371
# Credentials: Use AWS_ACCESS_KEY_ID/AWS_SECRET_ACCESS_KEY or rely on AWS SDK default credential provider chain
372372
aws:
373+
enabled: ${SEARCH_AWS_IAM_AUTH_ENABLED:-false}
373374
region: ${AWS_DEFAULT_REGION:-""}
374375
accessKeyId: ${AWS_ACCESS_KEY_ID:-""}
375376
secretAccessKey: ${AWS_SECRET_ACCESS_KEY:-""}
@@ -381,6 +382,7 @@ elasticsearch:
381382
providerClass: ${NATURAL_LANGUAGE_SEARCH_PROVIDER_CLASS:-org.openmetadata.service.search.nlq.NoOpNLQService}
382383
bedrock:
383384
awsConfig:
385+
enabled: false
384386
region: ${AWS_BEDROCK_REGION:-""}
385387
accessKeyId: ${AWS_BEDROCK_ACCESS_KEY:-""}
386388
secretAccessKey: ${AWS_BEDROCK_SECRET_KEY:-""}
@@ -431,6 +433,7 @@ pipelineServiceClientConfiguration:
431433
sseAlgorithm: ${PIPELINE_SERVICE_CLIENT_LOG_SSE_ALGORITHM:-"AES256"} # Allowed values: "AES256" or "aws:kms"
432434
kmsKeyId: ${PIPELINE_SERVICE_CLIENT_LOG_KMS_KEY_ID:-""} # Required only if sseAlgorithm is "aws:kms"
433435
awsConfig:
436+
enabled: ${PIPELINE_SERVICE_CLIENT_AWS_IAM_AUTH_ENABLED:-false}
434437
awsAccessKeyId: ${PIPELINE_SERVICE_CLIENT_LOG_AWS_ACCESS_KEY_ID:-""}
435438
awsSecretAccessKey: ${PIPELINE_SERVICE_CLIENT_LOG_AWS_SECRET_ACCESS_KEY:-""}
436439
awsRegion: ${PIPELINE_SERVICE_CLIENT_LOG_REGION:-""}
@@ -610,10 +613,11 @@ cache:
610613
connectTimeoutMs: ${CACHE_REDIS_CONNECT_TIMEOUT:-2000}
611614

612615
# AWS ElastiCache IAM Authentication (only if using ElastiCache)
613-
useIamAuth: ${CACHE_REDIS_USE_IAM:-false}
614-
awsRegion: ${CACHE_REDIS_AWS_REGION:-}
615-
awsUseInstanceProfile: ${CACHE_REDIS_AWS_INSTANCE_PROFILE:-true}
616-
# If not using instance profile, provide credentials:
617-
awsAccessKey: ${AWS_ACCESS_KEY_ID:-}
618-
awsSecretKey: ${AWS_SECRET_ACCESS_KEY:-}
619-
tokenRefreshIntervalSeconds: ${CACHE_REDIS_TOKEN_REFRESH:-900} # 15 minutes
616+
aws:
617+
enabled: ${CACHE_REDIS_AWS_IAM_AUTH_ENABLED:-false}
618+
region: ${CACHE_REDIS_AWS_REGION:-""}
619+
useInstanceProfile: ${CACHE_REDIS_AWS_INSTANCE_PROFILE:-true}
620+
# If not using instance profile, provide credentials:
621+
accessKeyId: ${AWS_ACCESS_KEY_ID:-""}
622+
secretAccessKey: ${AWS_SECRET_ACCESS_KEY:-""}
623+
tokenRefreshIntervalSeconds: ${CACHE_REDIS_TOKEN_REFRESH:-900} # 15 minutes

docker/development/distributed-test/local/server2.yaml

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -370,17 +370,19 @@ elasticsearch:
370370
# IAM auth is automatically enabled when AWS_DEFAULT_REGION is set
371371
# Credentials: Use AWS_ACCESS_KEY_ID/AWS_SECRET_ACCESS_KEY or rely on AWS SDK default credential provider chain
372372
aws:
373+
enabled: ${SEARCH_AWS_IAM_AUTH_ENABLED:-false}
373374
region: ${AWS_DEFAULT_REGION:-""}
374375
accessKeyId: ${AWS_ACCESS_KEY_ID:-""}
375376
secretAccessKey: ${AWS_SECRET_ACCESS_KEY:-""}
376377
sessionToken: ${AWS_SESSION_TOKEN:-""}
377378
serviceName: ${SEARCH_AWS_SERVICE_NAME:-"es"} # Use "es" for OpenSearch, "aoss" for OpenSearch Serverless
378379
naturalLanguageSearch:
379-
enabled: false
380+
enabled: ${NATURAL_LANGUAGE_SEARCH_ENABLED:-false}
380381
embeddingProvider: ${EMBEDDING_PROVIDER:-bedrock}
381382
providerClass: ${NATURAL_LANGUAGE_SEARCH_PROVIDER_CLASS:-org.openmetadata.service.search.nlq.NoOpNLQService}
382383
bedrock:
383384
awsConfig:
385+
enabled: ${BEDROCK_AWS_IAM_AUTH_ENABLED:-false}
384386
region: ${AWS_BEDROCK_REGION:-""}
385387
accessKeyId: ${AWS_BEDROCK_ACCESS_KEY:-""}
386388
secretAccessKey: ${AWS_BEDROCK_SECRET_KEY:-""}
@@ -431,6 +433,7 @@ pipelineServiceClientConfiguration:
431433
sseAlgorithm: ${PIPELINE_SERVICE_CLIENT_LOG_SSE_ALGORITHM:-"AES256"} # Allowed values: "AES256" or "aws:kms"
432434
kmsKeyId: ${PIPELINE_SERVICE_CLIENT_LOG_KMS_KEY_ID:-""} # Required only if sseAlgorithm is "aws:kms"
433435
awsConfig:
436+
enabled: ${PIPELINE_SERVICE_CLIENT_AWS_IAM_AUTH_ENABLED:-false}
434437
awsAccessKeyId: ${PIPELINE_SERVICE_CLIENT_LOG_AWS_ACCESS_KEY_ID:-""}
435438
awsSecretAccessKey: ${PIPELINE_SERVICE_CLIENT_LOG_AWS_SECRET_ACCESS_KEY:-""}
436439
awsRegion: ${PIPELINE_SERVICE_CLIENT_LOG_REGION:-""}
@@ -610,10 +613,11 @@ cache:
610613
connectTimeoutMs: ${CACHE_REDIS_CONNECT_TIMEOUT:-2000}
611614

612615
# AWS ElastiCache IAM Authentication (only if using ElastiCache)
613-
useIamAuth: ${CACHE_REDIS_USE_IAM:-false}
614-
awsRegion: ${CACHE_REDIS_AWS_REGION:-}
615-
awsUseInstanceProfile: ${CACHE_REDIS_AWS_INSTANCE_PROFILE:-true}
616-
# If not using instance profile, provide credentials:
617-
awsAccessKey: ${AWS_ACCESS_KEY_ID:-}
618-
awsSecretKey: ${AWS_SECRET_ACCESS_KEY:-}
619-
tokenRefreshIntervalSeconds: ${CACHE_REDIS_TOKEN_REFRESH:-900} # 15 minutes
616+
aws:
617+
enabled: ${CACHE_REDIS_AWS_IAM_AUTH_ENABLED:-false}
618+
region: ${CACHE_REDIS_AWS_REGION:-""}
619+
useInstanceProfile: ${CACHE_REDIS_AWS_INSTANCE_PROFILE:-true}
620+
# If not using instance profile, provide credentials:
621+
accessKeyId: ${AWS_ACCESS_KEY_ID:-""}
622+
secretAccessKey: ${AWS_SECRET_ACCESS_KEY:-""}
623+
tokenRefreshIntervalSeconds: ${CACHE_REDIS_TOKEN_REFRESH:-900} # 15 minutes

docker/development/distributed-test/local/server3.yaml

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -371,17 +371,19 @@ elasticsearch:
371371
# IAM auth is automatically enabled when AWS_DEFAULT_REGION is set
372372
# Credentials: Use AWS_ACCESS_KEY_ID/AWS_SECRET_ACCESS_KEY or rely on AWS SDK default credential provider chain
373373
aws:
374+
enabled: ${SEARCH_AWS_IAM_AUTH_ENABLED:-false}
374375
region: ${AWS_DEFAULT_REGION:-""}
375376
accessKeyId: ${AWS_ACCESS_KEY_ID:-""}
376377
secretAccessKey: ${AWS_SECRET_ACCESS_KEY:-""}
377378
sessionToken: ${AWS_SESSION_TOKEN:-""}
378379
serviceName: ${SEARCH_AWS_SERVICE_NAME:-"es"} # Use "es" for OpenSearch, "aoss" for OpenSearch Serverless
379380
naturalLanguageSearch:
380-
enabled: false
381+
enabled: ${NATURAL_LANGUAGE_SEARCH_ENABLED:-false}
381382
embeddingProvider: ${EMBEDDING_PROVIDER:-bedrock}
382383
providerClass: ${NATURAL_LANGUAGE_SEARCH_PROVIDER_CLASS:-org.openmetadata.service.search.nlq.NoOpNLQService}
383384
bedrock:
384385
awsConfig:
386+
enabled: ${BEDROCK_AWS_IAM_AUTH_ENABLED:-false}
385387
region: ${AWS_BEDROCK_REGION:-""}
386388
accessKeyId: ${AWS_BEDROCK_ACCESS_KEY:-""}
387389
secretAccessKey: ${AWS_BEDROCK_SECRET_KEY:-""}
@@ -432,6 +434,7 @@ pipelineServiceClientConfiguration:
432434
sseAlgorithm: ${PIPELINE_SERVICE_CLIENT_LOG_SSE_ALGORITHM:-"AES256"} # Allowed values: "AES256" or "aws:kms"
433435
kmsKeyId: ${PIPELINE_SERVICE_CLIENT_LOG_KMS_KEY_ID:-""} # Required only if sseAlgorithm is "aws:kms"
434436
awsConfig:
437+
enabled: ${PIPELINE_SERVICE_CLIENT_AWS_IAM_AUTH_ENABLED:-false}
435438
awsAccessKeyId: ${PIPELINE_SERVICE_CLIENT_LOG_AWS_ACCESS_KEY_ID:-""}
436439
awsSecretAccessKey: ${PIPELINE_SERVICE_CLIENT_LOG_AWS_SECRET_ACCESS_KEY:-""}
437440
awsRegion: ${PIPELINE_SERVICE_CLIENT_LOG_REGION:-""}
@@ -611,10 +614,11 @@ cache:
611614
connectTimeoutMs: ${CACHE_REDIS_CONNECT_TIMEOUT:-2000}
612615

613616
# AWS ElastiCache IAM Authentication (only if using ElastiCache)
614-
useIamAuth: ${CACHE_REDIS_USE_IAM:-false}
615-
awsRegion: ${CACHE_REDIS_AWS_REGION:-}
616-
awsUseInstanceProfile: ${CACHE_REDIS_AWS_INSTANCE_PROFILE:-true}
617-
# If not using instance profile, provide credentials:
618-
awsAccessKey: ${AWS_ACCESS_KEY_ID:-}
619-
awsSecretKey: ${AWS_SECRET_ACCESS_KEY:-}
620-
tokenRefreshIntervalSeconds: ${CACHE_REDIS_TOKEN_REFRESH:-900} # 15 minutes
617+
aws:
618+
enabled: ${CACHE_REDIS_AWS_IAM_AUTH_ENABLED:-false}
619+
region: ${CACHE_REDIS_AWS_REGION:-""}
620+
useInstanceProfile: ${CACHE_REDIS_AWS_INSTANCE_PROFILE:-true}
621+
# If not using instance profile, provide credentials:
622+
accessKeyId: ${AWS_ACCESS_KEY_ID:-""}
623+
secretAccessKey: ${AWS_SECRET_ACCESS_KEY:-""}
624+
tokenRefreshIntervalSeconds: ${CACHE_REDIS_TOKEN_REFRESH:-900} # 15 minutes

ingestion/tests/integration/auto_classification/test_tag_processor.py

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -180,13 +180,7 @@ def test_it_returns_the_expected_classifications(
180180
),
181181
),
182182
]
183-
assert timestamp_column.tags == [
184-
IsInstance(TagLabel)
185-
& HasAttributes(
186-
tagFQN=HasAttributes(root="PII.NonSensitive"),
187-
reason=Contains("Detected by `SpacyRecognizer`"),
188-
),
189-
]
183+
assert timestamp_column.tags == []
190184
assert version_column.tags == []
191185
assert order_date_column.tags == [
192186
IsInstance(TagLabel)

0 commit comments

Comments
 (0)