docs(k8s): document k8s deployement with links to repo and argocd

v-rocheleau · v-rocheleau · commit 0955ffcdd7c2 · 2026-03-06T15:49:06.000-05:00
diff --git a/submission-snapshot/README.md b/submission-snapshot/README.md
@@ -16,23 +16,19 @@ the JSONs from S3 instead of hooking on a live API.
 The loaded data is organised like-so in the S3 bucket:
 ```bash
 <DESTINATION__FILESYSTEM__BUCKET_URL>
-├── category               # "category" resource from /category endpoint
-│   └── 2026-02-16
-│       └── 1771268036.7864842.3722039a90.jsonl # JSONL data from /category for 2026-02-16
-├── category_data          # "category_data" resouce from /data/category/{categoryId}
-│   └── 2026-02-16
-│       └── 1771268036.7864842.4a41d98fad.jsonl # JSONL data from /data/category/{categoryId} for 2026-02-16
-├── _dlt_loads             # One file per pipeline run (load), describes the load
-│   └── submission_source__1771268036.7864842.jsonl   
-├── _dlt_pipeline_state    # Pipeline state files
-│   └── submission-snapshot__1771267844.1206408__998e553c0cea456594bce118ab30fc8850159efc09fbfb1e5179df2b13293c46.jsonl
-├── _dlt_version           # Dataset schema versioning
-│   └── submission_source__1771267974.1898882__998e553c0cea456594bce118ab30fc8850159efc09fbfb1e5179df2b13293c46.jsonl
+├── category
+│   └── 2026-03-03-data.jsonl   # JSONL data from /category for 2026-03-03
+├── category_data
+│   └── 2026-03-03-data.jsonl   # JSONL data from /data/category/{categoryId} for 2026-03-03
+├── _dlt_loads                  # Pipeline run metadata files
+├── _dlt_pipeline_state         # Pipeline state files
+├── _dlt_version                # Dataset schema versioning
 └── init
 ```
 
 > [!NOTE]
-> We include the `Category.id` and `Category.studyId` values from the `/category` endpoint in the
+> We include the `Category.id` and `Category.studyId` values from the `/category` endpoint in the `category_data` items, 
+> so that downstream ingestions can take the full JSONL file and load each item in the appropriate dataset.
 
 ### Getting a Submission API OIDC bearer token
 
@@ -67,7 +63,7 @@ base_url = "<BASE SUBMISSION API URL>"
 [destination.filesystem]
 # s3 bucket, use 'file://<ABSOLUTE PATH>' to use the local filesystem
 bucket_url = "s3://<BUCKET NAME>" # replace with bucket name/path
-layout = "{table_name}/{YYYY}-{MM}-{DD}/{load_id}.{file_id}.{ext}"
+layout = "{table_name}/{YYYY}-{MM}-{DD}-data.{ext}"
 
 [destination.filesystem.credentials]
 # doesn't matter if using a local filesystem
@@ -215,7 +211,12 @@ spec:
 
 ### Ingest snapshots into Bento with Bento-ETL
 
-TODO: Document how to do this. 
+The `submission-snapshot` workflow is deployed in PCGL's `dev` cluster at the moment.
 
-~~Need to implement S3 source first.~~
-Bento-ETL S3 source has been implemented, ready to integrate.
+Kustomization base [link](https://github.com/Pan-Canadian-Genome-Library/deployment/blob/main/base/research-portal/submission-snapshots-cronjob/kustomization.yaml).
+
+| Environement | Repo location                                                                                                                                  | ArgoCD Application                                                                                                       |
+| ------------ | ---------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------ |
+| `dev`        | [Kustomization Link](https://github.com/Pan-Canadian-Genome-Library/deployment/blob/main/dev/research/submission-snapshots/kustomization.yaml) | [App link](https://argocd.ingress.dev.k8s.pcgl.dev-sd4h.ca/applications/argocd/submission-snapshots?view=tree&resource=) |
+| `staging`    | n/a                                                                                                                                            |                                                                                                                          |
+| `prod`       | n/a                                                                                                                                            |                                                                                                                          |