Skip to content

Commit 5f34543

Browse files
authored
New: [AEA-5671] - Trigger Document Ingestion on S3 Upload (#20)
## Summary 🎫 [AEA-5671](https://nhsd-jira.digital.nhs.uk/browse/AEA-5671) Trigger Document Ingestion on S3 Upload - ✨ New Feature ### Details This pull request is to implement a mechanism that listens for new document uploads to an S3 bucket and triggers a Lambda function in response. The Lambda function is responsible for initiating the knowledge base ingestion process. Key changes include: - Setup of S3 event notifications for ObjectCreated events. - Creation of a Lambda function to handle the trigger and call the ingestion service. - IAM role and permission updates to enable S3 → Lambda → ingestion flow. - Basic error handling and CloudWatch logging for observability. - Initial test validation to ensure end-to-end triggering and ingestion.
1 parent aeb5e30 commit 5f34543

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

44 files changed

+1740
-1404
lines changed

.github/workflows/cdk_package_code.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,7 @@ jobs:
6363
run: |
6464
pip3 install -r packages/slackBotFunction/requirements.txt -t packages/slackBotFunction
6565
pip3 install -r packages/createIndexFunction/requirements.txt -t packages/createIndexFunction
66+
pip3 install -r packages/syncKnowledgeBaseFunction/requirements.txt -t packages/syncKnowledgeBaseFunction
6667
6768
- name: 'Tar files'
6869
run: |

.vscode/eps-assist-me.code-workspace

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,10 @@
1515
{
1616
"name": "packages/slackBotFunction",
1717
"path": "../packages/slackBotFunction"
18+
},
19+
{
20+
"name": "packages/syncKnowledgeBaseFunction",
21+
"path": "../packages/syncKnowledgeBaseFunction"
1822
}
1923
],
2024
"settings": {

Makefile

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,9 @@ install: install-python install-hooks install-node
1010

1111
install-python:
1212
poetry install
13-
cd packages/slackBotFunction && pip install -r requirements.txt && pip install -r requirements-test.txt
1413
cd packages/createIndexFunction && pip install -r requirements.txt && pip install -r requirements-test.txt
14+
cd packages/slackBotFunction && pip install -r requirements.txt && pip install -r requirements-test.txt
15+
cd packages/syncKnowledgeBaseFunction && pip install -r requirements.txt && pip install -r requirements-test.txt
1516

1617
install-hooks: install-python
1718
poetry run pre-commit install --install-hooks --overwrite
@@ -45,16 +46,19 @@ lint-flake8:
4546
poetry run flake8 .
4647

4748
test:
48-
cd packages/slackBotFunction && PYTHONPATH=. COVERAGE_FILE=coverage/.coverage python -m pytest
4949
cd packages/createIndexFunction && PYTHONPATH=. COVERAGE_FILE=coverage/.coverage python -m pytest
50+
cd packages/slackBotFunction && PYTHONPATH=. COVERAGE_FILE=coverage/.coverage python -m pytest
51+
cd packages/syncKnowledgeBaseFunction && PYTHONPATH=. COVERAGE_FILE=coverage/.coverage python -m pytest
5052

5153
clean:
5254
rm -rf packages/cdk/coverage
5355
rm -rf packages/cdk/lib
54-
rm -rf packages/slackBotFunction/coverage
55-
rm -rf packages/slackBotFunction/.coverage
5656
rm -rf packages/createIndexFunction/coverage
5757
rm -rf packages/createIndexFunction/.coverage
58+
rm -rf packages/slackBotFunction/coverage
59+
rm -rf packages/slackBotFunction/.coverage
60+
rm -rf packages/syncKnowledgeBaseFunction/coverage
61+
rm -rf packages/syncKnowledgeBaseFunction/.coverage
5862
rm -rf cdk.out
5963
rm -rf .build
6064

@@ -107,6 +111,7 @@ cdk-synth:
107111
--context logRetentionInDays=30 \
108112
--context slackBotToken=dummy \
109113
--context slackSigningSecret=dummy
114+
./scripts/fix_cfn_guard.sh
110115

111116
cdk-diff:
112117
npx cdk diff \

README.md

Lines changed: 28 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -9,9 +9,10 @@ The solution consists of:
99

1010
- **Slack Bot Function**: AWS Lambda function that handles Slack slash commands and integrates with Amazon Bedrock Knowledge Base
1111
- **Create Index Function**: AWS Lambda function that creates and manages OpenSearch vector indices for the knowledge base
12+
- **Sync Knowledge Base Function**: AWS Lambda function that automatically triggers knowledge base ingestion when documents are uploaded to S3
1213
- **OpenSearch Serverless**: Vector database for storing and searching document embeddings
1314
- **Amazon Bedrock Knowledge Base**: RAG (Retrieval-Augmented Generation) service with guardrails
14-
- **S3 Storage**: Document storage for the knowledge base
15+
- **S3 Storage**: Document storage for the knowledge base with automatic sync triggers
1516
- **AWS CDK**: Infrastructure as Code for deployment
1617

1718
## Project Structure
@@ -20,13 +21,29 @@ This is a monorepo with the following structure:
2021

2122
```
2223
packages/
23-
├── cdk/ # AWS CDK infrastructure code
24-
│ ├── bin/ # CDK app entry point
25-
│ ├── constructs/ # Reusable CDK constructs
26-
│ ├── resources/ # AWS resource definitions
27-
│ └── stacks/ # CDK stack definitions
28-
├── createIndexFunction/ # Lambda function for OpenSearch index management
29-
└── slackBotFunction/ # Lambda function for Slack bot integration
24+
├── cdk/ # AWS CDK infrastructure code
25+
│ ├── bin/ # CDK app entry point
26+
│ │ └── utils/ # CDK utility functions
27+
│ ├── constructs/ # Reusable CDK constructs
28+
│ │ └── RestApiGateway/ # API Gateway specific constructs
29+
│ ├── resources/ # AWS resource definitions
30+
│ └── stacks/ # CDK stack definitions
31+
├── createIndexFunction/ # Lambda function for OpenSearch index management
32+
│ ├── app/ # Application code
33+
│ │ ├── config/ # Configuration and environment variables
34+
│ │ └── handler.py # Lambda handler
35+
│ └── tests/ # Unit tests
36+
├── slackBotFunction/ # Lambda function for Slack bot integration
37+
│ ├── app/ # Application code
38+
│ │ ├── config/ # Configuration and environment variables
39+
│ │ ├── slack/ # Slack-specific logic
40+
│ │ └── handler.py # Lambda handler
41+
│ └── tests/ # Unit tests
42+
└── syncKnowledgeBaseFunction/ # Lambda function for automatic knowledge base sync
43+
├── app/ # Application code
44+
│ ├── config/ # Configuration and environment variables
45+
│ └── handler.py # Lambda handler
46+
└── tests/ # Unit tests
3047
```
3148

3249
## Contributing
@@ -149,14 +166,15 @@ These are used to do common commands related to cdk
149166

150167
#### Linting and testing
151168

152-
- `lint` Runs lint for GitHub Actions and scripts.
169+
- `lint` Runs all linting checks
153170
- `lint-black` Runs black formatter on Python code.
154171
- `lint-flake8` Runs flake8 linter on Python code.
155172
- `lint-githubactions` Lints the repository's GitHub Actions workflows.
156173
- `lint-githubaction-scripts` Lints all shell scripts in `.github/scripts` using ShellCheck.
157-
- `test` Runs unit tests for Lambda functions.
158174
- `cfn-guard` Runs cfn-guard against CDK resources.
175+
- `git-secrets-docker-setup` Sets up git-secrets Docker container.
159176
- `pre-commit` Runs pre-commit hooks on all files.
177+
- `test` Runs unit tests for Lambda functions.
160178

161179
#### Compiling
162180

package.json

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,9 +14,7 @@
1414
"author": "NHS Digital",
1515
"license": "MIT",
1616
"workspaces": [
17-
"packages/cdk",
18-
"packages/querytool",
19-
"packages/slackbot"
17+
"packages/cdk"
2018
],
2119
"devDependencies": {
2220
"@semantic-release/changelog": "^6.0.3",

packages/cdk/constructs/LambdaFunction.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -132,7 +132,7 @@ export class LambdaFunction extends Construct {
132132
memorySize: 256,
133133
timeout: Duration.seconds(50),
134134
architecture: Architecture.X86_64,
135-
handler: "app.handler",
135+
handler: "app.handler.handler",
136136
code: Code.fromAsset(props.packageBasePath),
137137
role,
138138
environment: {

packages/cdk/constructs/OpenSearchCollection.ts

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,15 +4,26 @@ import {CfnCollection, CfnSecurityPolicy, CfnAccessPolicy} from "aws-cdk-lib/aws
44
export interface OpenSearchCollectionProps {
55
readonly collectionName: string
66
readonly principals: Array<string>
7+
readonly region: string
8+
readonly account: string
79
}
810

911
export class OpenSearchCollection extends Construct {
1012
public readonly collection: CfnCollection
1113
public readonly endpoint: string
14+
private readonly region: string
15+
private readonly account: string
16+
17+
public get collectionArn(): string {
18+
return `arn:aws:aoss:${this.region}:${this.account}:collection/${this.collection.attrId}`
19+
}
1220

1321
constructor(scope: Construct, id: string, props: OpenSearchCollectionProps) {
1422
super(scope, id)
1523

24+
this.region = props.region
25+
this.account = props.account
26+
1627
// Encryption policy using AWS-managed keys
1728
const encryptionPolicy = new CfnSecurityPolicy(this, "EncryptionPolicy", {
1829
name: `${props.collectionName}-encryption`,
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
import {Construct} from "constructs"
2+
import {Bucket, EventType} from "aws-cdk-lib/aws-s3"
3+
import {LambdaDestination} from "aws-cdk-lib/aws-s3-notifications"
4+
import {Function as LambdaFunction} from "aws-cdk-lib/aws-lambda"
5+
6+
export interface S3LambdaNotificationProps {
7+
bucket: Bucket
8+
lambdaFunction: LambdaFunction
9+
}
10+
11+
export class S3LambdaNotification extends Construct {
12+
constructor(scope: Construct, id: string, props: S3LambdaNotificationProps) {
13+
super(scope, id)
14+
15+
const lambdaDestination = new LambdaDestination(props.lambdaFunction)
16+
17+
// Trigger knowledge base sync only for supported document types
18+
const supportedExtensions = [".pdf", ".txt", ".md", ".csv", ".doc", ".docx", ".xls", ".xlsx", ".html", ".json"]
19+
20+
supportedExtensions.forEach(ext => {
21+
// Handle all file creation/modification events
22+
props.bucket.addEventNotification(
23+
EventType.OBJECT_CREATED,
24+
lambdaDestination,
25+
{suffix: ext}
26+
)
27+
28+
// Handle all file deletion events
29+
props.bucket.addEventNotification(
30+
EventType.OBJECT_REMOVED,
31+
lambdaDestination,
32+
{suffix: ext}
33+
)
34+
})
35+
}
36+
}

packages/cdk/nagSuppressions.ts

Lines changed: 58 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,21 @@ export const nagSuppressions = (stack: Stack) => {
3535
]
3636
)
3737

38+
// Suppress wildcard log permissions for SyncKnowledgeBase Lambda
39+
safeAddNagSuppression(
40+
stack,
41+
"/EpsAssistMeStack/Functions/SyncKnowledgeBaseFunction/LambdaPutLogsManagedPolicy/Resource",
42+
[
43+
{
44+
id: "AwsSolutions-IAM5",
45+
reason: "Wildcard permissions are required for log stream access under known paths.",
46+
appliesTo: [
47+
"Resource::<FunctionsSyncKnowledgeBaseFunctionLambdaLogGroupB19BE2BE.Arn>:log-stream:*"
48+
]
49+
}
50+
]
51+
)
52+
3853
// Suppress API Gateway validation warning for Apis construct
3954
safeAddNagSuppression(
4055
stack,
@@ -87,18 +102,18 @@ export const nagSuppressions = (stack: Stack) => {
87102
]
88103
)
89104

90-
// Suppress IAM wildcard permissions for Bedrock execution managed policy
105+
// Suppress IAM wildcard permissions for Bedrock execution role policy
91106
safeAddNagSuppression(
92107
stack,
93-
"/EpsAssistMeStack/IamResources/BedrockExecutionManagedPolicy/Resource",
108+
"/EpsAssistMeStack/BedrockExecutionRole/Policy/Resource",
94109
[
95110
{
96111
id: "AwsSolutions-IAM5",
97112
reason: "Bedrock Knowledge Base requires these permissions to access S3 documents and OpenSearch collection.",
98113
appliesTo: [
99-
"Action::bedrock:Delete*",
100114
"Resource::<StorageDocsBucketepsamDocsF25F63F1.Arn>/*",
101-
"Resource::<StorageDocsBucketepsampr16Docs240CC945.Arn>/*",
115+
"Resource::<StorageDocsBucketepsampr20Docs075F648F.Arn>/*",
116+
"Action::bedrock:Delete*",
102117
`Resource::arn:aws:bedrock:eu-west-2:${account}:knowledge-base/*`,
103118
`Resource::arn:aws:aoss:eu-west-2:${account}:collection/*`,
104119
"Resource::*"
@@ -107,10 +122,10 @@ export const nagSuppressions = (stack: Stack) => {
107122
]
108123
)
109124

110-
// Suppress wildcard permissions for CreateIndex managed policy
125+
// Suppress wildcard permissions for CreateIndex policy
111126
safeAddNagSuppression(
112127
stack,
113-
"/EpsAssistMeStack/IamResources/CreateIndexManagedPolicy/Resource",
128+
"/EpsAssistMeStack/RuntimePolicies/CreateIndexPolicy/Resource",
114129
[
115130
{
116131
id: "AwsSolutions-IAM5",
@@ -123,18 +138,16 @@ export const nagSuppressions = (stack: Stack) => {
123138
]
124139
)
125140

126-
// Suppress wildcard permissions for SlackBot managed policy
141+
// Suppress wildcard permissions for SlackBot policy
127142
safeAddNagSuppression(
128143
stack,
129-
"/EpsAssistMeStack/IamResources/SlackBotManagedPolicy/Resource",
144+
"/EpsAssistMeStack/RuntimePolicies/SlackBotPolicy/Resource",
130145
[
131146
{
132147
id: "AwsSolutions-IAM5",
133-
reason: "SlackBot Lambda needs access to all guardrails, knowledge bases, and functions for content filtering and self-invocation.",
148+
reason: "SlackBot Lambda needs wildcard access for Lambda functions (self-invocation) and KMS operations.",
134149
appliesTo: [
135150
`Resource::arn:aws:lambda:eu-west-2:${account}:function:*`,
136-
`Resource::arn:aws:bedrock:eu-west-2:${account}:guardrail/*`,
137-
`Resource::arn:aws:bedrock:eu-west-2:${account}:knowledge-base/*`,
138151
"Action::kms:GenerateDataKey*",
139152
"Action::kms:ReEncrypt*"
140153
]
@@ -177,6 +190,40 @@ export const nagSuppressions = (stack: Stack) => {
177190
]
178191
)
179192

193+
// Suppress AWS managed policy usage in BucketNotificationsHandler (wildcard for any hash)
194+
const bucketNotificationHandlers = stack.node.findAll().filter(node =>
195+
node.node.id.startsWith("BucketNotificationsHandler")
196+
)
197+
198+
bucketNotificationHandlers.forEach(handler => {
199+
safeAddNagSuppression(
200+
stack,
201+
`${handler.node.path}/Role/Resource`,
202+
[
203+
{
204+
id: "AwsSolutions-IAM4",
205+
reason: "Auto-generated CDK role uses AWS managed policy for basic Lambda execution.",
206+
appliesTo: [
207+
"Policy::arn:<AWS::Partition>:iam::aws:policy/service-role/AWSLambdaBasicExecutionRole"
208+
]
209+
}
210+
]
211+
)
212+
213+
safeAddNagSuppression(
214+
stack,
215+
`${handler.node.path}/Role/DefaultPolicy/Resource`,
216+
[
217+
{
218+
id: "AwsSolutions-IAM5",
219+
reason: "Auto-generated CDK role requires wildcard permissions for S3 bucket notifications.",
220+
appliesTo: [
221+
"Resource::*"
222+
]
223+
}
224+
]
225+
)
226+
})
180227
}
181228

182229
const safeAddNagSuppression = (stack: Stack, path: string, suppressions: Array<NagPackSuppression>) => {

0 commit comments

Comments
 (0)