Ingest v2: Jira source connector (#512)

Paul-Cornell · web-flow · commit 2e8fd86a2910 · 2025-03-05T16:48:22.000-08:00
diff --git a/ingestion/source-connectors/jira.mdx b/ingestion/source-connectors/jira.mdx
@@ -2,19 +2,28 @@
 title: Jira
 ---
 
-import SharedContentJira from '/snippets/sc-shared-text/jira.mdx';
+import NewDocument from '/snippets/general-shared-text/new-document.mdx';
+
+<NewDocument />
+
+import SharedContentJira from '/snippets/sc-shared-text/jira-cli-api.mdx';
 
 <SharedContentJira/>
 
+Now call the Unstructured CLI or Python. The destination connector can be any of the ones supported. This example uses the local destination connector.
+
+This example sends data to Unstructured for processing by default. To process data locally instead, see the instructions at the end of this page.
+
 import JiraSh from '/snippets/source_connectors/jira.sh.mdx';
-import JiraPy from '/snippets/source_connectors/jira.py.mdx';
+import JiraPyV2 from '/snippets/source_connectors/jira.v2.py.mdx';
+import JiraPyV1 from '/snippets/source_connectors/jira.v1.py.mdx';
 
 <CodeGroup>
-
   <JiraSh />
-
-  <JiraPy />
-
+  <JiraPyV2 />
+  <JiraPyV1 />
 </CodeGroup>
 
-For a full list of the options the Unstructured Ingest CLI accepts check `unstructured-ingest jira --help`.
+import SharedPartitionByAPIOSS from '/snippets/ingest-configuration-shared/partition-by-api-oss.mdx';
+
+<SharedPartitionByAPIOSS/>
diff --git a/snippets/general-shared-text/jira-cli-api.mdx b/snippets/general-shared-text/jira-cli-api.mdx
@@ -0,0 +1,38 @@
+The Jira connector dependencies:
+
+  ```bash CLI, Python
+  pip install "unstructured-ingest[jira]"
+  ```
+
+import AdditionalIngestDependencies from '/snippets/general-shared-text/ingest-dependencies.mdx';
+
+<AdditionalIngestDependencies />
+
+The following environment variables:
+
+- `JIRA_URL` - The site URL for your Jira Data Center installation or Jira Cloud account, represented by `--url` (CLI) or `url` (Python).
+- One of the following:
+
+  - For Jira Cloud or Jira Data Center, the target user's name or email address, and password, as follows:
+
+    - `JIRA_USERNAME` - The name or email address of the target user, represented by `--username` (CLI) or `username` (Python).
+    - `JIRA_PASSWORD_OR_API_TOKEN` - The user's password, represented by `--password` (CLI) or `password` (Python).
+
+  - For Jira Cloud only, the target user's name or email address, and API token, as follows:
+
+    - `JIRA_USERNAME` - The name or email address of the target user, represented by `--username` (CLI) or `username` (Python).
+    - `JIRA_PASSWORD_OR_API_TOKEN` - The user's API token, represented by `--password` (CLI) or `password` (Python).
+
+  - For Jira Data Center only, the target user's personal access token (PAT), as follows:
+
+    - `JIRA_PERSONAL_ACCESS_TOKEN` - The user's personal access token (PAT), represented by `--token` (CLI) or `token` (Python).
+
+Also:
+
+- For Jira Cloud, you must specify `--cloud` (CLI) or set `cloud` to `True` (Python).
+- For Jira Data Center, you can specify `--no-cloud` (CLI) or set `cloud` to `False` (Python). This is the default if not otherwise specified.
+- To process specific projects, boards, or issues, use:
+
+  - `--projects` with a comma-delimited list of target project IDs (CLI) or `project` with an array of target project IDs (Python).
+  - `--boards` with a comma-delmited list of target board IDs (CLI) or `boards` with an array of target board IDs (Python).
+  - `--issues` with a comma-delimited list of target issue IDs (CLI) or `issues` with an array of target issue IDs (Python).
diff --git a/snippets/general-shared-text/jira.mdx b/snippets/general-shared-text/jira.mdx
@@ -0,0 +1,28 @@
+- A [Jira Cloud account](https://www.atlassian.com/try/cloud/signup?bundle=jira-software&edition=free) or 
+    [Jira Data Center installation](https://confluence.atlassian.com/adminjiraserver/installing-jira-data-center-938846870.html).
+- The site URL for your [Jira Data Center installation](https://confluence.atlassian.com/jirakb/find-your-site-url-to-set-up-the-jira-data-center-and-server-mobile-app-954244798.html) or Jira Cloud account. 
+  For Jira Cloud, open Jira in your web browser and copy the address from the browser's address bar. 
+  If you're unsure, check the dashboard URL, or if viewing an issue, project or board, the site URL is typically everything that comes before and including `/jira`, such as 
+  `https://<organization>.atlassian.net/jira`. 
+- To process Jira projects, provide the IDs for the target projects. To get a project's ID, sign in to your Jira Cloud account or Jira Data Center installation, and then go to the following URL: `https://<organization>.atlassian.net/rest/api/latest/project/<project-key>`, 
+  replacing `<organization>` with yours, and replacing `<project-key>` with the target project's key. In the 
+  response, look for the URL `https://<organization>.atlassian.net/rest/api/3/project/<project-id>`, where `<project-id>` is the target project's ID.
+- To process Jira boards, the IDs for the target boards. To get a board's ID, sign in to your Jira Cloud account or Jira Data Center installation, and then go to the following URL: `https://<organization>.atlassian.net/rest/agile/1.0/board?projectKeyOrId=<project-key-or-id>`, 
+  replacing `<organization>` with yours, and `<project-key-or-id>` with the associated project's key or ID. In the 
+  response, look for the URL `https://<organization>.atlassian.net/rest/agile/1.0/board/<board-id>`, where `<board-id>` is the board's ID.
+- To process Jira issues, the IDs for the target issues. To get an issue's ID, sign in to your Jia Cloud account or Jira Data Center installation, open the issue, and then look at the URL in your browser's address bar. The issue ID is the string of characters after the final slash in the URL.
+- A user in your [Jira Cloud account](https://support.atlassian.com/jira-cloud-administration/docs/manage-users-groups-permissions-and-roles-in-jira-cloud/) or 
+  [Jira Data Center installation](https://confluence.atlassian.com/adminjiraserver/create-edit-or-remove-a-user-938847025.html).
+- The user must have the correct permissions in your 
+  [Jira Cloud account](https://support.atlassian.com/jira-cloud-administration/docs/manage-users-groups-permissions-and-roles-in-jira-cloud/) or 
+  [Jira Data Center installation](https://confluence.atlassian.com/jirakb/permissions-made-simple-for-jira-server-717062767.html) to 
+  access the target projects, boards, and issues.
+- One of the following:
+
+  - For Jira Cloud or Jira Data Center, the target user's name or email address, and password. 
+    [Change a Jira Cloud user's password](https://support.atlassian.com/user-management/docs/change-password-for-portal-only-customers/). 
+    [Change a Jira Data Center user's password](https://confluence.atlassian.com/adminjiraserver/create-edit-or-remove-a-user-938847025.html).
+  - For Jira Cloud only, the target user's name or email address, and API token. 
+    [Create an API token](https://support.atlassian.com/atlassian-account/docs/manage-api-tokens-for-your-atlassian-account/).
+  - For Jira Data Center only, the target user's personal access token (PAT). 
+    [Create a PAT](https://confluence.atlassian.com/enterprise/using-personal-access-tokens-1026032365.html).
diff --git a/snippets/sc-shared-text/jira-cli-api.mdx b/snippets/sc-shared-text/jira-cli-api.mdx
@@ -0,0 +1,9 @@
+Connect Jira to your preprocessing pipeline, and use the Unstructured Ingest CLI or the Unstructured Ingest Python library to batch process all your documents and store structured outputs locally on your filesystem.
+
+The requirements are as follows. 
+
+import SharedJira from '/snippets/general-shared-text/jira.mdx';
+import SharedJiraCLIAPI from '/snippets/general-shared-text/jira-cli-api.mdx';
+
+<SharedJira />
+<SharedJiraCLIAPI />
diff --git a/snippets/sc-shared-text/jira.mdx b/snippets/sc-shared-text/jira.mdx
diff --git a/snippets/source_connectors/jira.sh.mdx b/snippets/source_connectors/jira.sh.mdx
@@ -1,13 +1,22 @@
-```bash Shell
+```bash CLI
 #!/usr/bin/env bash
 
+# Chunking and embedding are optional.
+
 unstructured-ingest \
   jira \
-  --metadata-exclude filename,file_directory,metadata.data_source.date_processed \
-  --url https://unstructured-jira-connector-test.atlassian.net \
-  --user-email 12345678@unstructured.io \
-  --api-token ABCDE1234ABDE1234ABCDE1234 \
-  --output-dir $LOCAL_FILE_OUTPUT_DIR \
-  --num-processes 2 \
-  --strategy hi_res
-```
+    --url $JIRA_URL \
+    --username $JIRA_USERNAME \
+    --password $JIRA_PASSWORD_OR_API_TOKEN \ # Password or API token authentication.
+    --token $JIRA_PERSONAL_ACCESS_TOKEN \ # Personal access token authentication only.
+    --cloud \ # True for Jira Cloud.
+    --no-cloud \ # For Jira Data Center (default).
+    --output-dir $LOCAL_FILE_OUTPUT_DIR \
+    --chunking-strategy by_title \
+    --embedding-provider huggingface \
+    --partition-by-api \
+    --api-key $UNSTRUCTURED_API_KEY \
+    --partition-endpoint $UNSTRUCTURED_API_URL \
+    --strategy hi_res \
+    --additional-partition-args="{\"split_pdf_page\":\"true\", \"split_pdf_allow_failed\":\"true\", \"split_pdf_concurrency_level\": 15}"
+```
diff --git a/snippets/source_connectors/jira.v1.py.mdx b/snippets/source_connectors/jira.v1.py.mdx
@@ -0,0 +1,29 @@
+```python Python Ingest v1
+import os
+
+from unstructured_ingest.connector.jira import JiraAccessConfig, SimpleJiraConfig
+from unstructured_ingest.interfaces import PartitionConfig, ProcessorConfig, ReadConfig
+from unstructured_ingest.runner import JiraRunner
+
+if __name__ == "__main__":
+    runner = JiraRunner(
+        processor_config=ProcessorConfig(
+            verbose=True,
+            output_dir=os.getenv("LOCAL_FILE_OUTPUT_DIR"),
+            num_processes=2,
+        ),
+        read_config=ReadConfig(),
+        partition_config=PartitionConfig(
+            metadata_exclude=["filename", "file_directory", "metadata.data_source.date_processed"],
+            partition_by_api=True,
+            api_key=os.getenv("UNSTRUCTURED_API_KEY"),
+            strategy="hi_res",
+        ),
+        connector_config=SimpleJiraConfig(
+            access_config=JiraAccessConfig(api_token=os.getenv("JIRA_PERSONAL_ACCESS_TOKEN")),
+            url=os.getenv("JIRA_URL"),
+            user_email=os.getenv("JIRA_USERNAME"),
+        ),
+    )
+    runner.run()
+```
diff --git a/snippets/source_connectors/jira.v2.py.mdx b/snippets/source_connectors/jira.v2.py.mdx
@@ -0,0 +1,60 @@
+```python Python Ingest v2
+import os
+
+from unstructured_ingest.v2.pipeline.pipeline import Pipeline
+from unstructured_ingest.v2.interfaces import ProcessorConfig
+
+from unstructured_ingest.v2.processes.connectors.jira import (
+    JiraIndexerConfig,
+    JiraDownloaderConfig,
+    JiraConnectionConfig,
+    JiraAccessConfig
+)
+
+from unstructured_ingest.v2.processes.partitioner import PartitionerConfig
+from unstructured_ingest.v2.processes.chunker import ChunkerConfig
+from unstructured_ingest.v2.processes.embedder import EmbedderConfig
+from unstructured_ingest.v2.processes.connectors.local import LocalUploaderConfig
+
+if __name__ == "__main__":
+    Pipeline.from_configs(
+        context=ProcessorConfig(),
+        indexer_config=JiraIndexerConfig(
+            # projects=[
+            #     "project-id",
+            #     "project-id"
+            # ],
+            # boards=[
+            #     "board-id",
+            #     "board-id"
+            # ],
+            # issues=[
+            #     "issue-id",
+            #     "issue-id"
+            # ]
+        ),
+        downloader_config=JiraDownloaderConfig(download_dir=os.getenv("LOCAL_FILE_DOWNLOAD_DIR")),
+        source_connection_config=JiraConnectionConfig(
+            access_config=JiraAccessConfig(
+                password=os.getenv("JIRA_PASSWORD_OR_API_TOKEN"), # Password or API token authentication.
+                # token=os.getenv("JIRA_PERSONAL_ACCES_TOKEN") # Personal access token authentication only.
+            ),
+            url=os.getenv("JIRA_URL"),
+            username=os.getenv("JIRA_USERNAME"), # For password or API token authentication.
+            cloud=True # True for Jira Cloud, False (default) for Jira Data Center.
+        ),
+        partitioner_config=PartitionerConfig(
+            partition_by_api=True,
+            api_key=os.getenv("UNSTRUCTURED_API_KEY"),
+            partition_endpoint=os.getenv("UNSTRUCTURED_API_URL"),
+            additional_partition_args={
+                "split_pdf_page": True,
+                "split_pdf_allow_failed": True,
+                "split_pdf_concurrency_level": 15
+            }
+        ),
+        chunker_config=ChunkerConfig(chunking_strategy="by_title"),
+        embedder_config=EmbedderConfig(embedding_provider="huggingface"),
+        uploader_config=LocalUploaderConfig(output_dir=os.getenv("LOCAL_FILE_OUTPUT_DIR"))
+    ).run()
+```
diff --git a/snippets/source_connectors/jira_api.sh.mdx b/snippets/source_connectors/jira_api.sh.mdx