Skip to content

Commit 59e0357

Browse files
authored
Merge branch 'main' into kinesis-source-metrics-config
Signed-off-by: Rashmi <ras.xena@gmail.com>
2 parents 2a54f6b + 90d76bb commit 59e0357

File tree

697 files changed

+8346
-969
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

697 files changed

+8346
-969
lines changed
Lines changed: 149 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,149 @@
1+
#!/usr/bin/env python3
2+
3+
#
4+
# Copyright OpenSearch Contributors
5+
# SPDX-License-Identifier: Apache-2.0
6+
#
7+
# The OpenSearch Contributors require contributions made to
8+
# this file be licensed under the Apache-2.0 license or a
9+
# compatible open source license.
10+
#
11+
12+
"""
13+
License Header Compliance Checker for OpenSearch Data Prepper
14+
15+
This script checks that files contain the required license headers
16+
as specified in CONTRIBUTING.md.
17+
18+
Usage:
19+
python check-license-headers.py file1.java file2.py ...
20+
echo "file1.java\nfile2.py" | python check-license-headers.py
21+
"""
22+
23+
import os
24+
import sys
25+
from pathlib import Path
26+
from typing import List
27+
28+
# File extensions that require license headers
29+
SUPPORTED_EXTENSIONS = {
30+
'.java', '.groovy', '.gradle', # Java ecosystem
31+
'.py', # Python
32+
'.sh', '.bash', '.zsh', # Shell scripts
33+
'.yaml', '.yml', # YAML files
34+
'.properties', # Properties files
35+
}
36+
37+
def needs_license_header(file_path: str) -> bool:
38+
"""Check if a file needs a license header based on its extension."""
39+
path = Path(file_path)
40+
return path.suffix.lower() in SUPPORTED_EXTENSIONS
41+
42+
def check_file_header(file_path: str) -> bool:
43+
"""Check if a file has the required complete license header."""
44+
if not Path(file_path).exists():
45+
return True
46+
47+
try:
48+
with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
49+
# Read first 15 lines to check for license header
50+
lines = []
51+
for i, line in enumerate(f):
52+
if i >= 15: # Only check first 15 lines
53+
break
54+
lines.append(line)
55+
56+
content = ''.join(lines)
57+
58+
# Check for all 5 required license header components
59+
required_components = [
60+
'Copyright OpenSearch Contributors',
61+
'SPDX-License-Identifier: Apache-2.0',
62+
'The OpenSearch Contributors require contributions made to',
63+
'this file be licensed under the Apache-2.0 license or a',
64+
'compatible open source license.'
65+
]
66+
67+
# All components must be present
68+
for component in required_components:
69+
if component not in content:
70+
return False
71+
72+
return True
73+
74+
except Exception as e:
75+
print(f"Error reading file {file_path}: {e}", file=sys.stderr)
76+
return True # Skip files we can't read
77+
78+
def get_files_to_check() -> List[str]:
79+
"""Get files to check from command line args or stdin."""
80+
if len(sys.argv) > 1:
81+
# Files provided as command line arguments
82+
return sys.argv[1:]
83+
else:
84+
# Read files from stdin
85+
files = []
86+
for line in sys.stdin:
87+
file_path = line.strip()
88+
if file_path:
89+
files.append(file_path)
90+
return files
91+
92+
def main():
93+
"""Main function to check license headers."""
94+
files_to_check = get_files_to_check()
95+
96+
if not files_to_check:
97+
print("No files to check", file=sys.stderr)
98+
return
99+
100+
print(f"Checking {len(files_to_check)} files for license headers.")
101+
102+
violations = []
103+
104+
for file_path in files_to_check:
105+
print(f"Checking: {file_path}")
106+
107+
if not Path(file_path).exists():
108+
print(f" File not found: {file_path}")
109+
continue
110+
111+
# Skip if doesn't need header
112+
if not needs_license_header(file_path):
113+
print(f" Skipped (no header needed): {file_path}")
114+
continue
115+
116+
# Check header
117+
if not check_file_header(file_path):
118+
violations.append(f"- `{file_path}`")
119+
print(f" ❌ Missing license header: {file_path}")
120+
else:
121+
print(f" ✅ Header OK: {file_path}")
122+
123+
# Output results
124+
if violations:
125+
print(f"\n❌ Found {len(violations)} license header violations:")
126+
127+
violation_text = '\n'.join(violations)
128+
129+
# Set output for GitHub Actions
130+
github_output = os.environ.get('GITHUB_OUTPUT')
131+
if github_output:
132+
with open(github_output, 'a') as f:
133+
f.write(f"violations<<EOF\n{violation_text}\nEOF\n")
134+
135+
print("\nViolations:")
136+
for violation in violations:
137+
print(f" {violation}")
138+
139+
sys.exit(1)
140+
else:
141+
print("\n✅ All files have proper license headers!")
142+
# Set empty output for GitHub Actions
143+
github_output = os.environ.get('GITHUB_OUTPUT')
144+
if github_output:
145+
with open(github_output, 'a') as f:
146+
f.write("violations=\n")
147+
148+
if __name__ == "__main__":
149+
main()

.github/scripts/get-new-files.py

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
#!/usr/bin/env python3
2+
3+
#
4+
# Copyright OpenSearch Contributors
5+
# SPDX-License-Identifier: Apache-2.0
6+
#
7+
# The OpenSearch Contributors require contributions made to
8+
# this file be licensed under the Apache-2.0 license or a
9+
# compatible open source license.
10+
#
11+
12+
"""
13+
Get newly added files from Git.
14+
15+
This script identifies files added in the current PR and outputs them
16+
one per line to stdout.
17+
"""
18+
19+
import os
20+
import subprocess
21+
import sys
22+
23+
def get_newly_added_files():
24+
"""Get list of files added in this PR."""
25+
try:
26+
# Get the base branch (usually main)
27+
base_ref = os.environ.get('GITHUB_BASE_REF', 'main')
28+
29+
# Get added files in this PR
30+
result = subprocess.run([
31+
'git', 'diff', '--name-only', '--diff-filter=A',
32+
f'origin/{base_ref}...HEAD'
33+
], capture_output=True, text=True, check=True)
34+
35+
files = [f.strip() for f in result.stdout.split('\n') if f.strip()]
36+
return files
37+
38+
except subprocess.CalledProcessError as e:
39+
print(f"Error getting changed files: {e}", file=sys.stderr)
40+
return []
41+
42+
def main():
43+
"""Main function to get newly added files."""
44+
files = get_newly_added_files()
45+
46+
if not files:
47+
print("No newly added files found", file=sys.stderr)
48+
sys.exit(0)
49+
50+
# Output files one per line
51+
for file_path in files:
52+
print(file_path)
53+
54+
if __name__ == "__main__":
55+
main()
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
# This workflow will build a Java project with Gradle
2+
# For more information see: https://help.github.com/actions/language-and-framework-guides/building-and-testing-java-with-gradle
3+
4+
name: Data Prepper Kafka Backward Compatibility End-to-end test with Gradle
5+
6+
on:
7+
push:
8+
branches: [ main ]
9+
pull_request:
10+
workflow_dispatch:
11+
12+
jobs:
13+
build:
14+
strategy:
15+
matrix:
16+
java: [11, 17, 21, docker]
17+
fail-fast: false
18+
19+
runs-on: ubuntu-latest
20+
21+
steps:
22+
- name: Set up JDK 11
23+
uses: actions/setup-java@v1
24+
with:
25+
java-version: 11
26+
- name: Checkout Data Prepper
27+
uses: actions/checkout@v2
28+
- name: Run Kafka backward compatibility end-to-end tests with Gradle
29+
run: ./gradlew -PendToEndJavaVersion=${{ matrix.java }} :e2e-test:kafka-buffer-backward-compatibility:kafkaBufferBackwardCompatibilityTest
Lines changed: 117 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,117 @@
1+
#
2+
# Copyright OpenSearch Contributors
3+
# SPDX-License-Identifier: Apache-2.0
4+
#
5+
# The OpenSearch Contributors require contributions made to
6+
# this file be licensed under the Apache-2.0 license or a
7+
# compatible open source license.
8+
#
9+
10+
# Performs a license header check on new files.
11+
# It will comment on PRs if it finds violations.
12+
13+
name: License Header Check
14+
15+
on:
16+
pull_request:
17+
types: [opened, synchronize, reopened]
18+
19+
jobs:
20+
license-header-check:
21+
runs-on: ubuntu-latest
22+
name: Check License Headers on New Files
23+
24+
steps:
25+
- name: Checkout code
26+
uses: actions/checkout@v6
27+
with:
28+
fetch-depth: 0
29+
30+
- name: Setup Python
31+
uses: actions/setup-python@v6
32+
with:
33+
python-version: '3.14'
34+
35+
- name: Run License Header Check
36+
id: license-check
37+
run: |
38+
python .github/scripts/get-new-files.py | python .github/scripts/check-license-headers.py
39+
40+
- name: Comment on PR
41+
if: failure() && steps.license-check.outputs.violations != ''
42+
uses: actions/github-script@v8
43+
with:
44+
script: |
45+
const violations = process.env.VIOLATIONS;
46+
47+
const body = [
48+
'## ⚠️ License Header Violations Found',
49+
'',
50+
'The following newly added files are missing required license headers:',
51+
'',
52+
violations,
53+
'',
54+
'Please add the appropriate license header to each file and push your changes.',
55+
'',
56+
'**See the license header requirements:** https://github.com/opensearch-project/data-prepper/blob/main/CONTRIBUTING.md#license-headers'
57+
].join('\n');
58+
59+
const { data: comments } = await github.rest.issues.listComments({
60+
owner: context.repo.owner,
61+
repo: context.repo.repo,
62+
issue_number: context.issue.number,
63+
});
64+
65+
const botComment = comments.find(comment =>
66+
comment.user.type === 'Bot' &&
67+
comment.body.includes('License Header Violations Found')
68+
);
69+
70+
if (botComment) {
71+
await github.rest.issues.updateComment({
72+
owner: context.repo.owner,
73+
repo: context.repo.repo,
74+
comment_id: botComment.id,
75+
body: body
76+
});
77+
} else {
78+
await github.rest.issues.createComment({
79+
owner: context.repo.owner,
80+
repo: context.repo.repo,
81+
issue_number: context.issue.number,
82+
body: body
83+
});
84+
}
85+
env:
86+
VIOLATIONS: ${{ steps.license-check.outputs.violations }}
87+
88+
- name: Update PR comment (all violations resolved)
89+
if: success()
90+
uses: actions/github-script@v8
91+
with:
92+
script: |
93+
const { data: comments } = await github.rest.issues.listComments({
94+
owner: context.repo.owner,
95+
repo: context.repo.repo,
96+
issue_number: context.issue.number,
97+
});
98+
99+
const botComment = comments.find(comment =>
100+
comment.user.type === 'Bot' &&
101+
(comment.body.includes('License Header Violations Found') || comment.body.includes('License Header Check Passed'))
102+
);
103+
104+
if (botComment) {
105+
const successBody = [
106+
'## ✅ License Header Check Passed',
107+
'',
108+
'All newly added files have proper license headers. Great work! 🎉'
109+
].join('\n');
110+
111+
await github.rest.issues.updateComment({
112+
owner: context.repo.owner,
113+
repo: context.repo.repo,
114+
comment_id: botComment.id,
115+
body: successBody
116+
});
117+
}

.github/workflows/maven-publish-snapshot.yml

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,8 +30,14 @@ jobs:
3030
export-env: true
3131
env:
3232
OP_SERVICE_ACCOUNT_TOKEN: ${{ secrets.OP_SERVICE_ACCOUNT_TOKEN }}
33-
SONATYPE_USERNAME: op://opensearch-infra-secrets/maven-central-portal-credentials/username
34-
SONATYPE_PASSWORD: op://opensearch-infra-secrets/maven-central-portal-credentials/password
33+
MAVEN_SNAPSHOTS_S3_REPO: op://opensearch-infra-secrets/maven-snapshots-s3/repo
34+
MAVEN_SNAPSHOTS_S3_ROLE: op://opensearch-infra-secrets/maven-snapshots-s3/role
35+
36+
- name: Configure AWS credentials
37+
uses: aws-actions/configure-aws-credentials@v5
38+
with:
39+
role-to-assume: ${{ env.MAVEN_SNAPSHOTS_S3_ROLE }}
40+
aws-region: us-east-1
3541

3642
- name: Publish snapshots to Maven
3743
run: |

.github/workflows/release.yml

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -47,9 +47,6 @@ jobs:
4747
- name: Build Maven Artifacts
4848
run: ./gradlew publishAllPublicationsToMavenRepository
4949

50-
- name: Build Docker Image
51-
run: ./gradlew :release:docker:docker
52-
5350
- name: Upload Archives to Archives Bucket
5451
run: ./gradlew :release:archives:uploadArchives -Pregion=us-east-1 -Pbucket=${{ secrets.ARCHIVES_BUCKET_NAME }} -Pprofile=default -PbuildNumber=${{ github.run_number }}
5552

@@ -63,10 +60,8 @@ jobs:
6360
registry: public.ecr.aws
6461
env:
6562
AWS_REGION: us-east-1
66-
- name: Push Image to Staging ECR
67-
run: |
68-
docker tag opensearch-data-prepper:${{ env.version }} ${{ secrets.ECR_REPOSITORY_URL }}:${{ env.version }}-${{ github.run_number }}
69-
docker push ${{ secrets.ECR_REPOSITORY_URL }}:${{ env.version }}-${{ github.run_number }}
63+
- name: Build and Push Multi-Architecture Docker Image to Staging ECR
64+
run: ./gradlew :release:docker:dockerMultiArchitecture -PdockerRepository=${{ secrets.ECR_REPOSITORY_URL }}:${{ env.version }}-${{ github.run_number }}
7065

7166
validate-docker:
7267
runs-on: ubuntu-latest

0 commit comments

Comments
 (0)