From 7aa2173a1b366326e7cdeb244e42e5047cd503b5 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 1 Jun 2021 22:22:14 +0000 Subject: [PATCH 01/20] Bump urllib3 from 1.24.2 to 1.26.5 Bumps [urllib3](https://github.com/urllib3/urllib3) from 1.24.2 to 1.26.5. - [Release notes](https://github.com/urllib3/urllib3/releases) - [Changelog](https://github.com/urllib3/urllib3/blob/main/CHANGES.rst) - [Commits](https://github.com/urllib3/urllib3/compare/1.24.2...1.26.5) --- updated-dependencies: - dependency-name: urllib3 dependency-type: direct:production ... Signed-off-by: dependabot[bot] --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index cbe0492..0d40219 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,5 +3,5 @@ certifi==2018.4.16 chardet==3.0.4 idna==2.6 requests==2.20.0 -urllib3==1.24.2 +urllib3==1.26.5 PyYAML==5.3.1 \ No newline at end of file From 8855d1be29fb2c0cb15096ceb7d8de3c1601d5a6 Mon Sep 17 00:00:00 2001 From: Daniel Weeber Date: Mon, 7 Feb 2022 13:40:13 +0100 Subject: [PATCH 02/20] fix requirements --- requirements.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/requirements.txt b/requirements.txt index 0d40219..6df7e11 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,6 +2,6 @@ boto==2.48.0 certifi==2018.4.16 chardet==3.0.4 idna==2.6 -requests==2.20.0 -urllib3==1.26.5 -PyYAML==5.3.1 \ No newline at end of file +requests==2.27.1 +urllib3==1.26.8 +PyYAML==5.3.1 From 2e22d13897e79e3b4b678215a0577f970cb2e108 Mon Sep 17 00:00:00 2001 From: Ilia Khramtsov Date: Fri, 2 Dec 2022 19:12:01 +0400 Subject: [PATCH 03/20] add S3_DIR and S3_ENDPOINT_URL --- backup.py | 18 ++++++++++-------- config.yaml | 10 +++++++--- 2 files changed, 17 insertions(+), 11 deletions(-) diff --git a/backup.py b/backup.py index a43add1..e163786 100644 --- a/backup.py +++ b/backup.py @@ -7,7 +7,6 @@ import boto from boto.s3.key import Key import wizard -from time import gmtime, strftime def read_config(): @@ -38,8 +37,8 @@ def create_confluence_backup(self): time.sleep(self.wait) while 'fileName' not in self.backup_status.keys(): self.backup_status = json.loads(self.session.get(confluence_backup_status).text) - print('Current status: {progress}; {description}'.format( - progress=self.backup_status['alternativePercentage'], + print('Current status: {progress}; {description}'.format( + progress=self.backup_status['alternativePercentage'], description=self.backup_status['currentStatus'])) time.sleep(self.wait) return 'https://{url}/wiki/download/{file_name}'.format( @@ -58,8 +57,8 @@ def create_jira_backup(self): while 'result' not in self.backup_status.keys(): self.backup_status = json.loads(self.session.get(jira_backup_status).text) print('Current status: {status} {progress}; {description}'.format( - status=self.backup_status['status'], - progress=self.backup_status['progress'], + status=self.backup_status['status'], + progress=self.backup_status['progress'], description=self.backup_status['description'])) time.sleep(self.wait) return '{prefix}/{result_id}'.format( @@ -82,15 +81,18 @@ def stream_to_s3(self, url, remote_filename): connect = boto.connect_s3() else: connect = boto.connect_s3( - aws_access_key_id=self.config['UPLOAD_TO_S3']['AWS_ACCESS_KEY'], - aws_secret_access_key=self.config['UPLOAD_TO_S3']['AWS_SECRET_KEY'] + host=self.config['UPLOAD_TO_S3']['AWS_ENDPOINT_URL'], + aws_access_key_id=self.config['UPLOAD_TO_S3']['AWS_ACCESS_KEY'], + aws_secret_access_key=self.config['UPLOAD_TO_S3']['AWS_SECRET_KEY'], + is_secure=self.config['UPLOAD_TO_S3']['AWS_IS_SECURE'] ) + connect.auth_region_name = self.config['UPLOAD_TO_S3']['AWS_REGION'] bucket = connect.get_bucket(self.config['UPLOAD_TO_S3']['S3_BUCKET']) r = self.session.get(url, stream=True) if r.status_code == 200: k = Key(bucket) - k.key = remote_filename + k.key = "%s%s" % (self.config['UPLOAD_TO_S3']['S3_DIR'], remote_filename) k.content_type = r.headers['content-type'] k.set_contents_from_string(r.content) return diff --git a/config.yaml b/config.yaml index 961fa7a..c47b396 100644 --- a/config.yaml +++ b/config.yaml @@ -4,7 +4,11 @@ USER_EMAIL: "email address for the Atlassian account you're using to create the API_TOKEN: "token ID generated at https://id.atlassian.com/manage/api-tokens" INCLUDE_ATTACHMENTS: "include attachments? this will make the backup size bigger - true / false" DOWNLOAD_LOCALLY: "download the backup file to backups folder? true / false" -UPLOAD_TO_S3: - S3_BUCKET: "S3 bucket name (empty value will skip this step)" +UPLOAD_TO_S3: + AWS_ENDPOINT_URL: "amazon S3 endpoints https://docs.aws.amazon.com/general/latest/gr/s3.html" + AWS_REGION: "amazon S3 region" + S3_BUCKET: "S3 bucket name (empty value will skip this step)" + S3_DIR: "S3 directory for upload (example Atlassian/)" AWS_ACCESS_KEY: "not mandatory if already set on the machine with AWS CLI" - AWS_SECRET_KEY: "not mandatory if already set on the machine with AWS CLI" \ No newline at end of file + AWS_SECRET_KEY: "not mandatory if already set on the machine with AWS CLI" + AWS_IS_SECURE: True \ No newline at end of file From 92273391eab0770be320f2f6d76724e35aa86d70 Mon Sep 17 00:00:00 2001 From: Ilia Khramtsov Date: Mon, 5 Dec 2022 21:12:02 +0400 Subject: [PATCH 04/20] replace format --- backup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backup.py b/backup.py index e163786..2895a22 100644 --- a/backup.py +++ b/backup.py @@ -92,7 +92,7 @@ def stream_to_s3(self, url, remote_filename): r = self.session.get(url, stream=True) if r.status_code == 200: k = Key(bucket) - k.key = "%s%s" % (self.config['UPLOAD_TO_S3']['S3_DIR'], remote_filename) + k.key = "{s3_bucket}{s3_filename}".format(s3_bucket=self.config['UPLOAD_TO_S3']['S3_DIR'], s3_filename=remote_filename) k.content_type = r.headers['content-type'] k.set_contents_from_string(r.content) return From 7f2be47bab45f69916537e1e79cb0a887ebda9b4 Mon Sep 17 00:00:00 2001 From: MATMAF Date: Thu, 19 Sep 2024 15:09:09 +0200 Subject: [PATCH 05/20] Change the boto module to boto3 and adapt the code #17 --- backup.py | 45 ++++++++++++++++++++++++++++++--------------- 1 file changed, 30 insertions(+), 15 deletions(-) diff --git a/backup.py b/backup.py index 2895a22..e24803a 100644 --- a/backup.py +++ b/backup.py @@ -4,8 +4,8 @@ import os import argparse import requests -import boto -from boto.s3.key import Key +import boto3 +from boto3.s3.transfer import TransferConfig import wizard @@ -78,25 +78,40 @@ def stream_to_s3(self, url, remote_filename): print('-> Streaming to S3') if self.config['UPLOAD_TO_S3']['AWS_ACCESS_KEY'] == '': - connect = boto.connect_s3() + s3_client = boto3.client('s3') else: - connect = boto.connect_s3( - host=self.config['UPLOAD_TO_S3']['AWS_ENDPOINT_URL'], + s3_client = boto3.client( + 's3', aws_access_key_id=self.config['UPLOAD_TO_S3']['AWS_ACCESS_KEY'], aws_secret_access_key=self.config['UPLOAD_TO_S3']['AWS_SECRET_KEY'], - is_secure=self.config['UPLOAD_TO_S3']['AWS_IS_SECURE'] - ) - connect.auth_region_name = self.config['UPLOAD_TO_S3']['AWS_REGION'] + region_name=self.config['UPLOAD_TO_S3']['AWS_REGION'], + endpoint_url=self.config['UPLOAD_TO_S3']['AWS_ENDPOINT_URL'], + use_ssl=self.config['UPLOAD_TO_S3']['AWS_IS_SECURE'] + ) - bucket = connect.get_bucket(self.config['UPLOAD_TO_S3']['S3_BUCKET']) + bucket_name = self.config['UPLOAD_TO_S3']['S3_BUCKET'] r = self.session.get(url, stream=True) if r.status_code == 200: - k = Key(bucket) - k.key = "{s3_bucket}{s3_filename}".format(s3_bucket=self.config['UPLOAD_TO_S3']['S3_DIR'], s3_filename=remote_filename) - k.content_type = r.headers['content-type'] - k.set_contents_from_string(r.content) - return - + key = "{s3_bucket}{s3_filename}".format( + s3_bucket=self.config['UPLOAD_TO_S3']['S3_DIR'], + s3_filename=remote_filename + ) + + content_length = int(r.headers.get('Content-Length', 0)) + + config = TransferConfig( + multipart_threshold=content_length + 1, + max_concurrency=1, + use_threads=False + ) + + s3_client.upload_fileobj( + Fileobj=r.raw, + Bucket=bucket_name, + Key=key, + ExtraArgs={'ContentType': r.headers['content-type']}, + Config=config + ) if __name__ == '__main__': parser = argparse.ArgumentParser() From 614f24c0d34a64d48eedf647ef4bc19b4357b7d5 Mon Sep 17 00:00:00 2001 From: MATMAF Date: Thu, 19 Sep 2024 16:12:05 +0200 Subject: [PATCH 06/20] Fixed the requirements.txt file #17 --- requirements.txt | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/requirements.txt b/requirements.txt index 6df7e11..9a32dbf 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,7 +1,3 @@ -boto==2.48.0 -certifi==2018.4.16 -chardet==3.0.4 -idna==2.6 -requests==2.27.1 -urllib3==1.26.8 -PyYAML==5.3.1 +boto3==1.35.22 +PyYAML==6.0.2 +Requests==2.32.3 From 6eadcfee1845591a9e55f69a43aa5d265cb35b4f Mon Sep 17 00:00:00 2001 From: Mathieu Mafille Date: Tue, 24 Sep 2024 10:00:32 +0200 Subject: [PATCH 07/20] Adapt the wizard.py file to Python 3 #20 --- wizard.py | 46 ++++++++++++++++++++++++++++++---------------- 1 file changed, 30 insertions(+), 16 deletions(-) diff --git a/wizard.py b/wizard.py index 5b4780d..f0eb6e4 100644 --- a/wizard.py +++ b/wizard.py @@ -1,31 +1,45 @@ import os -import json +import yaml def create_config(): - jira_host = raw_input("What is your Jira host name? ") - user = raw_input("What is your Jira account email address? ") - password = raw_input("Paste your Jira API token: ") - attachments = raw_input("Do you want to include attachments? (true / false) ") - download_locally = raw_input("Do you want to download the backup file locally? (true / false) ") + jira_host = input("What is your Jira host name? ") + user = input("What is your Jira account email address? ") + password = input("Paste your Jira API token: ") + attachments = input("Do you want to include attachments? (true / false) ") + download_locally = input("Do you want to download the backup file locally? (true / false) ") + custom_config = { - 'JIRA_HOST': jira_host, - 'INCLUDE_ATTACHMENTS': attachments.lower(), - 'JIRA_EMAIL': user, + 'HOST_URL': jira_host, + 'USER_EMAIL': user, 'API_TOKEN': password, + 'INCLUDE_ATTACHMENTS': attachments.lower(), 'DOWNLOAD_LOCALLY': download_locally.lower(), 'UPLOAD_TO_S3': { + 'AWS_ENDPOINT_URL': "", + 'AWS_REGION': "", 'S3_BUCKET': "", + 'S3_DIR': "", 'AWS_ACCESS_KEY': "", - 'AWS_SECRET_KEY': "" + 'AWS_SECRET_KEY': "", + 'AWS_IS_SECURE': True } } - upload_backup = raw_input("Do you want to upload the backup file to S3? (true / false) ") + + upload_backup = input("Do you want to upload the backup file to S3? (true / false) ") if upload_backup.lower() == 'true': - custom_config['UPLOAD_TO_S3']['S3_BUCKET'] = raw_input("What is the S3 bucket name? ") - custom_config['UPLOAD_TO_S3']['AWS_ACCESS_KEY'] = raw_input("What is your AWS access key? ") - custom_config['UPLOAD_TO_S3']['AWS_SECRET_KEY'] = raw_input("What is your AWS secret key? ") + custom_config['UPLOAD_TO_S3']['AWS_ENDPOINT_URL'] = input("What is your AWS endpoint url? ") + custom_config['UPLOAD_TO_S3']['AWS_REGION'] = input("What is your AWS region? ") + custom_config['UPLOAD_TO_S3']['S3_BUCKET'] = input("What is the S3 bucket name? ") + custom_config['UPLOAD_TO_S3']['S3_DIR'] = input("What is the S3 directory for upload? (example Atlassian/) ") + custom_config['UPLOAD_TO_S3']['AWS_ACCESS_KEY'] = input("What is your AWS access key? ") + custom_config['UPLOAD_TO_S3']['AWS_SECRET_KEY'] = input("What is your AWS secret key? ") + custom_config['UPLOAD_TO_S3']['AWS_IS_SECURE'] = input("Do you want to use SSL? (true / false) ") - config_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'config.json') + config_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'config.yaml') with open(config_path, 'w+') as config_file: - json.dump(custom_config, config_file) + yaml.dump(custom_config, config_file, default_flow_style=False) + + +if __name__ == "__main__": + create_config() From a1e4a07ed0abbda1cb42162fbe186fb3b180e8c8 Mon Sep 17 00:00:00 2001 From: Mathieu Mafille Date: Wed, 25 Sep 2024 12:05:42 +0200 Subject: [PATCH 08/20] File __init__.py is empty and never used --- __init__.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) delete mode 100644 __init__.py diff --git a/__init__.py b/__init__.py deleted file mode 100644 index e69de29..0000000 From 979db83c11164948476532acb21191c63ce0fea8 Mon Sep 17 00:00:00 2001 From: MATMAF Date: Wed, 25 Sep 2024 12:10:22 +0200 Subject: [PATCH 09/20] Changed .gitignore to add __pycache__ directory --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 7723488..1f973c1 100644 --- a/.gitignore +++ b/.gitignore @@ -28,3 +28,4 @@ test.py backups/* !backups/.gitkeep wizard.pyc +__pycache__/ From 890e3807946cc6c67ae76d489185207f286d30a4 Mon Sep 17 00:00:00 2001 From: MATMAF Date: Wed, 25 Sep 2024 12:12:42 +0200 Subject: [PATCH 10/20] Changed file README.md to only support python3 --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index bd24e5d..840a2cf 100644 --- a/README.md +++ b/README.md @@ -16,7 +16,7 @@ but most of them are not maintained and throwing errors. So, this project is aim # Installation ## Prerequisite: -:heavy_plus_sign: python 2.7.x or python 3.x.x +:heavy_plus_sign: python 3 :heavy_plus_sign: [virtualenv](https://pypi.org/project/virtualenv/) installed globally (pip install virtualenv) ## Instructions: From 337c77e04b2b0be2818658d2f88a822ddd30fab5 Mon Sep 17 00:00:00 2001 From: MATMAF Date: Wed, 25 Sep 2024 12:15:11 +0200 Subject: [PATCH 11/20] Changed README.md to adapt config.json to config.yaml --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 840a2cf..5b8bd6f 100644 --- a/README.md +++ b/README.md @@ -7,7 +7,7 @@ There are shell and bash scripts out there, which were created in order to downl but most of them are not maintained and throwing errors. So, this project is aiming for full backup automation, and therefore this is the features road map: :white_check_mark: Create a script in python -:white_check_mark: Support creating config.json from user input ('wizard') +:white_check_mark: Support creating config.yaml from user input ('wizard') :white_check_mark: Download backup file locally :white_check_mark: Add an option to stream backup file to S3 :white_check_mark: Check how to manually create a cron task on OS X / Linux From c1d76bcf6ead85ef6d75904852c025ac0442658a Mon Sep 17 00:00:00 2001 From: MATMAF Date: Wed, 25 Sep 2024 12:21:36 +0200 Subject: [PATCH 12/20] Changed boto3 version in requirements.txt --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 9a32dbf..2a1659f 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,3 @@ -boto3==1.35.22 +boto3==1.35.26 PyYAML==6.0.2 Requests==2.32.3 From 087e3695b8ebf326aa19ccc6c598384bbb0c1aee Mon Sep 17 00:00:00 2001 From: Michael Leer Date: Tue, 24 Jun 2025 17:31:32 +0100 Subject: [PATCH 13/20] feat: add GCP and Azure storage support - Add Google Cloud Storage upload functionality with authentication options - Add Azure Blob Storage upload functionality with multiple auth methods - Update configuration schema to include GCP and Azure settings - Add required dependencies for cloud storage clients --- backup.py | 70 +++++++++++++++++++++++++++++++++++++++++++++++- config.yaml | 13 ++++++++- requirements.txt | 2 ++ 3 files changed, 83 insertions(+), 2 deletions(-) diff --git a/backup.py b/backup.py index e24803a..f1623c6 100644 --- a/backup.py +++ b/backup.py @@ -6,6 +6,8 @@ import requests import boto3 from boto3.s3.transfer import TransferConfig +from google.cloud import storage +from azure.storage.blob import BlobServiceClient import wizard @@ -113,6 +115,66 @@ def stream_to_s3(self, url, remote_filename): Config=config ) + def stream_to_gcs(self, url, remote_filename): + print('-> Streaming to GCS') + + if self.config['UPLOAD_TO_GCP']['GCP_SERVICE_ACCOUNT_KEY']: + client = storage.Client.from_service_account_json( + self.config['UPLOAD_TO_GCP']['GCP_SERVICE_ACCOUNT_KEY'], + project=self.config['UPLOAD_TO_GCP']['GCP_PROJECT_ID'] + ) + else: + client = storage.Client(project=self.config['UPLOAD_TO_GCP']['GCP_PROJECT_ID']) + + bucket_name = self.config['UPLOAD_TO_GCP']['GCS_BUCKET'] + bucket = client.bucket(bucket_name) + + r = self.session.get(url, stream=True) + if r.status_code == 200: + blob_name = "{gcs_dir}{filename}".format( + gcs_dir=self.config['UPLOAD_TO_GCP']['GCS_DIR'], + filename=remote_filename + ) + + blob = bucket.blob(blob_name) + blob.content_type = r.headers.get('content-type', 'application/zip') + + blob.upload_from_file(r.raw, content_type=blob.content_type) + + def stream_to_azure(self, url, remote_filename): + print('-> Streaming to Azure Blob Storage') + + if self.config['UPLOAD_TO_AZURE']['AZURE_CONNECTION_STRING']: + blob_service_client = BlobServiceClient.from_connection_string( + self.config['UPLOAD_TO_AZURE']['AZURE_CONNECTION_STRING'] + ) + else: + account_url = f"https://{self.config['UPLOAD_TO_AZURE']['AZURE_ACCOUNT_NAME']}.blob.core.windows.net" + blob_service_client = BlobServiceClient( + account_url=account_url, + credential=self.config['UPLOAD_TO_AZURE']['AZURE_ACCOUNT_KEY'] + ) + + container_name = self.config['UPLOAD_TO_AZURE']['AZURE_CONTAINER'] + + r = self.session.get(url, stream=True) + if r.status_code == 200: + blob_name = "{azure_dir}{filename}".format( + azure_dir=self.config['UPLOAD_TO_AZURE']['AZURE_DIR'], + filename=remote_filename + ) + + blob_client = blob_service_client.get_blob_client( + container=container_name, + blob=blob_name + ) + + blob_client.upload_blob( + r.raw, + content_type=r.headers.get('content-type', 'application/zip'), + overwrite=True + ) + if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('-w', action='store_true', dest='wizard', help='activate config wizard') @@ -139,4 +201,10 @@ def stream_to_s3(self, url, remote_filename): atlass.download_file(backup_url, file_name) if config['UPLOAD_TO_S3']['S3_BUCKET'] != '': - atlass.stream_to_s3(backup_url, file_name) \ No newline at end of file + atlass.stream_to_s3(backup_url, file_name) + + if config['UPLOAD_TO_GCP']['GCS_BUCKET'] != '': + atlass.stream_to_gcs(backup_url, file_name) + + if config['UPLOAD_TO_AZURE']['AZURE_CONTAINER'] != '': + atlass.stream_to_azure(backup_url, file_name) \ No newline at end of file diff --git a/config.yaml b/config.yaml index c47b396..97465c0 100644 --- a/config.yaml +++ b/config.yaml @@ -11,4 +11,15 @@ UPLOAD_TO_S3: S3_DIR: "S3 directory for upload (example Atlassian/)" AWS_ACCESS_KEY: "not mandatory if already set on the machine with AWS CLI" AWS_SECRET_KEY: "not mandatory if already set on the machine with AWS CLI" - AWS_IS_SECURE: True \ No newline at end of file + AWS_IS_SECURE: True +UPLOAD_TO_GCP: + GCP_PROJECT_ID: "GCP project ID" + GCS_BUCKET: "GCS bucket name (empty value will skip this step)" + GCS_DIR: "GCS directory for upload (example Atlassian/)" + GCP_SERVICE_ACCOUNT_KEY: "path to service account key file (optional if using default credentials)" +UPLOAD_TO_AZURE: + AZURE_ACCOUNT_NAME: "Azure storage account name" + AZURE_CONTAINER: "Azure container name (empty value will skip this step)" + AZURE_DIR: "Azure directory for upload (example Atlassian/)" + AZURE_CONNECTION_STRING: "Azure storage connection string (optional if using account key)" + AZURE_ACCOUNT_KEY: "Azure storage account key (optional if using connection string)" \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 2a1659f..53f68ab 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,5 @@ boto3==1.35.26 +google-cloud-storage==2.18.0 +azure-storage-blob==12.22.0 PyYAML==6.0.2 Requests==2.32.3 From 0bb0ca894ce1cbd64f9e3b40ecbf5e8da5278fce Mon Sep 17 00:00:00 2001 From: Michael Leer Date: Tue, 24 Jun 2025 17:35:29 +0100 Subject: [PATCH 14/20] docs: update README with GCP and Azure storage instructions --- README.md | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 5b8bd6f..8639912 100644 --- a/README.md +++ b/README.md @@ -10,6 +10,8 @@ but most of them are not maintained and throwing errors. So, this project is aim :white_check_mark: Support creating config.yaml from user input ('wizard') :white_check_mark: Download backup file locally :white_check_mark: Add an option to stream backup file to S3 +:white_check_mark: Add an option to stream backup file to Google Cloud Storage +:white_check_mark: Add an option to stream backup file to Azure Blob Storage :white_check_mark: Check how to manually create a cron task on OS X / Linux :white_check_mark: Check how to manually create a schedule task on windows :black_square_button: Support adding cron / scheduled task from script     @@ -27,15 +29,27 @@ $(venv) pip install -r requirements.txt ``` 3. Generate an API token at https://id.atlassian.com/manage/api-tokens ![Screenshot](https://github.com/datreeio/jira-backup-py/blob/master/screenshots/atlassian-api-token.png) -4. Fill the details at the [config.yaml file](https://github.com/datreeio/jira-backup-py/blob/master/config.json) or run the backup.py script with '-w' flag -5. Run backup.py script with the flag '-j' to backup Jira or '-c' to backup Confluence +4. Fill the details at the [config.yaml file](https://github.com/datreeio/jira-backup-py/blob/master/config.json) or run the backup.py script with '-w' flag +5. Configure your preferred cloud storage provider(s) in config.yaml: + - **For AWS S3**: Set AWS credentials and S3_BUCKET + - **For Google Cloud**: Set GCP_PROJECT_ID, GCS_BUCKET, and optionally GCP_SERVICE_ACCOUNT_KEY + - **For Azure**: Set AZURE_ACCOUNT_NAME, AZURE_CONTAINER, and either AZURE_CONNECTION_STRING or AZURE_ACCOUNT_KEY +6. Run backup.py script with the flag '-j' to backup Jira or '-c' to backup Confluence ``` $(venv) python backup.py ``` ![Screenshot](https://github.com/datreeio/jira-backup-py/blob/master/screenshots/terminal.png) +## Cloud Storage Support +The script supports multiple cloud storage providers: +- **AWS S3** - Configure `UPLOAD_TO_S3` section in config.yaml +- **Google Cloud Storage** - Configure `UPLOAD_TO_GCP` section in config.yaml +- **Azure Blob Storage** - Configure `UPLOAD_TO_AZURE` section in config.yaml + +You can use any combination of these providers - the script will upload to all configured destinations. + ## What's next? -It depends on your needs. I, for example, use this script together with [serverless](https://serverless.com/) to create a periodic [AWS lambda](https://aws.amazon.com/lambda/) which triggered every 4 days, creating a backup and upload it directly to S3. +It depends on your needs. You can use this script with any cloud provider or serverless platform. For example, use it with [serverless](https://serverless.com/) to create periodic functions on AWS Lambda, Google Cloud Functions, or Azure Functions that trigger backups and upload to your preferred cloud storage. There is a more "stupid" option to get the same result - by creating a cron / scheduled task on your local machine: * **OS X / Linux:** set a cron task with crontab @@ -56,6 +70,7 @@ Example for adding a scheduled task which will run every 4 days, at 10:00 schtasks /create /tn "jira-backup" /sc DAILY /mo 4 /tr "C:\jira-backup-py\win_task_wrapper.bat" /st 10:00 ``` # Changelog: +* 24 JUN 2025 - Added support for Google Cloud Storage and Azure Blob Storage * 04 SEP 2020 - Support Confluence backup * 16 JAN 2019 - Updated script to work w/ [API token](https://confluence.atlassian.com/cloud/api-tokens-938839638.html), instead personal Jira user name and password From b1d2385d49a8c872941e8b60ace2d3e99fcf19e1 Mon Sep 17 00:00:00 2001 From: Michael Leer Date: Tue, 24 Jun 2025 17:45:47 +0100 Subject: [PATCH 15/20] feat: add automated scheduling support for backup tasks - Add -s/--schedule flag to automatically create cron/scheduled tasks - Support configurable frequency (--schedule-days) and time (--schedule-time) - Support both jira and confluence service types (--schedule-service) - Auto-detect OS and create appropriate scheduled task (cron for Linux/macOS, schtasks for Windows) - Update README with automated scheduling documentation and examples - Mark automated scheduling feature as completed in roadmap --- README.md | 30 ++++++++++++-- backup.py | 115 ++++++++++++++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 138 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index 8639912..8df0d0d 100644 --- a/README.md +++ b/README.md @@ -14,7 +14,7 @@ but most of them are not maintained and throwing errors. So, this project is aim :white_check_mark: Add an option to stream backup file to Azure Blob Storage :white_check_mark: Check how to manually create a cron task on OS X / Linux :white_check_mark: Check how to manually create a schedule task on windows -:black_square_button: Support adding cron / scheduled task from script     +:white_check_mark: Support adding cron / scheduled task from script     # Installation ## Prerequisite: @@ -38,7 +38,27 @@ $(venv) pip install -r requirements.txt ``` $(venv) python backup.py ``` -![Screenshot](https://github.com/datreeio/jira-backup-py/blob/master/screenshots/terminal.png) +![Screenshot](https://github.com/datreeio/jira-backup-py/blob/master/screenshots/terminal.png) + +## Automated Scheduling +You can now automatically set up scheduled backups using the built-in scheduling feature: + +```bash +# Setup automated Jira backup every 4 days at 10:00 AM +python backup.py -s + +# Setup automated Confluence backup every 7 days at 2:30 PM +python backup.py -s --schedule-days 7 --schedule-time 14:30 --schedule-service confluence + +# Setup automated Jira backup every 2 days at 6:00 AM +python backup.py -s --schedule-days 2 --schedule-time 06:00 --schedule-service jira +``` + +This will automatically create: +- **Linux/macOS**: A cron job in your crontab +- **Windows**: A scheduled task in Task Scheduler + +The script automatically detects your operating system and creates the appropriate scheduled task. ## Cloud Storage Support The script supports multiple cloud storage providers: @@ -49,9 +69,10 @@ The script supports multiple cloud storage providers: You can use any combination of these providers - the script will upload to all configured destinations. ## What's next? -It depends on your needs. You can use this script with any cloud provider or serverless platform. For example, use it with [serverless](https://serverless.com/) to create periodic functions on AWS Lambda, Google Cloud Functions, or Azure Functions that trigger backups and upload to your preferred cloud storage. +It depends on your needs. You can use this script with any cloud provider or serverless platform. For example, use it with [serverless](https://serverless.com/) to create periodic functions on AWS Lambda, Google Cloud Functions, or Azure Functions that trigger backups and upload to your preferred cloud storage. -There is a more "stupid" option to get the same result - by creating a cron / scheduled task on your local machine: +## Manual Scheduling (Alternative) +If you prefer to manually create scheduled tasks instead of using the automated scheduling feature, you can still create a cron / scheduled task on your local machine: * **OS X / Linux:** set a cron task with crontab ``` echo "* * * * * cd %script dir% && %activate virtualenv% && python backup.py > %log name% 2>&1" | crontab - @@ -70,6 +91,7 @@ Example for adding a scheduled task which will run every 4 days, at 10:00 schtasks /create /tn "jira-backup" /sc DAILY /mo 4 /tr "C:\jira-backup-py\win_task_wrapper.bat" /st 10:00 ``` # Changelog: +* 24 JUN 2025 - Added automated scheduling support for cron/scheduled tasks * 24 JUN 2025 - Added support for Google Cloud Storage and Azure Blob Storage * 04 SEP 2020 - Support Confluence backup * 16 JAN 2019 - Updated script to work w/ [API token](https://confluence.atlassian.com/cloud/api-tokens-938839638.html), instead personal Jira user name and password diff --git a/backup.py b/backup.py index f1623c6..5571db4 100644 --- a/backup.py +++ b/backup.py @@ -9,6 +9,9 @@ from google.cloud import storage from azure.storage.blob import BlobServiceClient import wizard +import platform +import subprocess +import sys def read_config(): @@ -175,14 +178,120 @@ def stream_to_azure(self, url, remote_filename): overwrite=True ) + +def setup_scheduled_task(frequency_days=4, time_hour=10, time_minute=0, service_type='jira'): + script_path = os.path.abspath(__file__) + script_dir = os.path.dirname(script_path) + + system = platform.system().lower() + + if system in ['linux', 'darwin']: + return setup_cron_task(script_path, script_dir, frequency_days, time_hour, time_minute, service_type) + elif system == 'windows': + return setup_windows_task(script_path, script_dir, frequency_days, time_hour, time_minute, service_type) + else: + raise Exception(f"Unsupported operating system: {system}") + + +def setup_cron_task(script_path, script_dir, frequency_days, time_hour, time_minute, service_type): + python_path = sys.executable + service_flag = '-j' if service_type == 'jira' else '-c' + + cron_command = f"{time_minute} {time_hour} */{frequency_days} * * cd {script_dir} && {python_path} {script_path} {service_flag}" + + try: + result = subprocess.run(['crontab', '-l'], capture_output=True, text=True) + existing_cron = result.stdout if result.returncode == 0 else "" + + if 'jira-backup-py' in existing_cron: + print("-> Cron job for jira-backup-py already exists. Updating...") + lines = existing_cron.strip().split('\n') + updated_lines = [line for line in lines if 'jira-backup-py' not in line and script_path not in line] + existing_cron = '\n'.join(updated_lines) + '\n' if updated_lines else "" + + new_cron = existing_cron + f"# jira-backup-py automated backup\n{cron_command}\n" + + process = subprocess.Popen(['crontab', '-'], stdin=subprocess.PIPE, text=True) + process.communicate(input=new_cron) + + if process.returncode == 0: + print(f"-> Successfully scheduled {service_type} backup to run every {frequency_days} days at {time_hour:02d}:{time_minute:02d}") + return True + else: + print("-> Failed to create cron job") + return False + + except Exception as e: + print(f"-> Error setting up cron job: {e}") + return False + + +def setup_windows_task(script_path, script_dir, frequency_days, time_hour, time_minute, service_type): + python_path = sys.executable + service_flag = '-j' if service_type == 'jira' else '-c' + task_name = f"jira-backup-{service_type}" + + cmd = [ + 'schtasks', '/create', + '/tn', task_name, + '/sc', 'DAILY', + '/mo', str(frequency_days), + '/tr', f'"{python_path}" "{script_path}" {service_flag}', + '/st', f'{time_hour:02d}:{time_minute:02d}', + '/f' + ] + + try: + result = subprocess.run(cmd, capture_output=True, text=True) + if result.returncode == 0: + print(f"-> Successfully scheduled {service_type} backup to run every {frequency_days} days at {time_hour:02d}:{time_minute:02d}") + return True + else: + print(f"-> Failed to create scheduled task: {result.stderr}") + return False + except Exception as e: + print(f"-> Error setting up scheduled task: {e}") + return False + if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('-w', action='store_true', dest='wizard', help='activate config wizard') parser.add_argument('-c', action='store_true', dest='confluence', help='activate confluence backup') parser.add_argument('-j', action='store_true', dest='jira', help='activate jira backup') - # print('debug command-line: {}'.format(parser.parse_args())) - if parser.parse_args().wizard: + parser.add_argument('-s', '--schedule', action='store_true', dest='schedule', help='setup automated scheduled backup') + parser.add_argument('--schedule-days', type=int, default=4, help='frequency in days for scheduled backup (default: 4)') + parser.add_argument('--schedule-time', type=str, default='10:00', help='time for scheduled backup in HH:MM format (default: 10:00)') + parser.add_argument('--schedule-service', type=str, choices=['jira', 'confluence'], default='jira', help='service type for scheduled backup (default: jira)') + args = parser.parse_args() + # print('debug command-line: {}'.format(args)) + + if args.wizard: wizard.create_config() + + if args.schedule: + try: + time_parts = args.schedule_time.split(':') + hour = int(time_parts[0]) + minute = int(time_parts[1]) if len(time_parts) > 1 else 0 + + if not (0 <= hour <= 23) or not (0 <= minute <= 59): + raise ValueError("Invalid time format") + + setup_scheduled_task( + frequency_days=args.schedule_days, + time_hour=hour, + time_minute=minute, + service_type=args.schedule_service + ) + print("-> Scheduled task setup completed") + exit(0) + except ValueError as e: + print(f"-> Error: Invalid time format. Use HH:MM format (e.g., 10:30)") + exit(1) + except Exception as e: + print(f"-> Error setting up scheduled task: {e}") + exit(1) + config = read_config() if config['HOST_URL'] == 'something.atlassian.net': @@ -190,7 +299,7 @@ def stream_to_azure(self, url, remote_filename): print('-> Starting backup; include attachments: {}'.format(config['INCLUDE_ATTACHMENTS'])) atlass = Atlassian(config) - if parser.parse_args().confluence: backup_url = atlass.create_confluence_backup() + if args.confluence: backup_url = atlass.create_confluence_backup() else: backup_url = atlass.create_jira_backup() print('-> Backup URL: {}'.format(backup_url)) From 6a206bfd3a14dbdfd2fe0a69092d482f1f18deb5 Mon Sep 17 00:00:00 2001 From: Michael Leer Date: Tue, 24 Jun 2025 21:13:04 +0100 Subject: [PATCH 16/20] feat: make cloud storage configuration sections optional Allow users to omit unused cloud storage providers from config.yaml. The script now checks if each upload section exists before accessing it, enabling minimal configurations for local-only backups. --- README.md | 17 ++++++++++++++++- backup.py | 6 +++--- 2 files changed, 19 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 8df0d0d..e0e6526 100644 --- a/README.md +++ b/README.md @@ -33,7 +33,9 @@ $(venv) pip install -r requirements.txt 5. Configure your preferred cloud storage provider(s) in config.yaml: - **For AWS S3**: Set AWS credentials and S3_BUCKET - **For Google Cloud**: Set GCP_PROJECT_ID, GCS_BUCKET, and optionally GCP_SERVICE_ACCOUNT_KEY - - **For Azure**: Set AZURE_ACCOUNT_NAME, AZURE_CONTAINER, and either AZURE_CONNECTION_STRING or AZURE_ACCOUNT_KEY + - **For Azure**: Set AZURE_ACCOUNT_NAME, AZURE_CONTAINER, and either AZURE_CONNECTION_STRING or AZURE_ACCOUNT_KEY + + **Note**: Cloud storage sections (`UPLOAD_TO_S3`, `UPLOAD_TO_GCP`, `UPLOAD_TO_AZURE`) are optional. You can delete any sections you don't need from your config.yaml file. For example, if you only want to download backups locally, you can remove all three upload sections. 6. Run backup.py script with the flag '-j' to backup Jira or '-c' to backup Confluence ``` $(venv) python backup.py @@ -68,6 +70,19 @@ The script supports multiple cloud storage providers: You can use any combination of these providers - the script will upload to all configured destinations. +### Minimal Configuration Example +If you only want to download backups locally without any cloud storage: +```yaml +--- +HOST_URL: "your-instance.atlassian.net" +USER_EMAIL: "your.email@company.com" +API_TOKEN: "your-api-token" +INCLUDE_ATTACHMENTS: false +DOWNLOAD_LOCALLY: true +``` + +Simply omit any `UPLOAD_TO_XXX` sections you don't need - the script will skip those providers automatically. + ## What's next? It depends on your needs. You can use this script with any cloud provider or serverless platform. For example, use it with [serverless](https://serverless.com/) to create periodic functions on AWS Lambda, Google Cloud Functions, or Azure Functions that trigger backups and upload to your preferred cloud storage. diff --git a/backup.py b/backup.py index 5571db4..35cf951 100644 --- a/backup.py +++ b/backup.py @@ -309,11 +309,11 @@ def setup_windows_task(script_path, script_dir, frequency_days, time_hour, time_ if config['DOWNLOAD_LOCALLY'] == 'true': atlass.download_file(backup_url, file_name) - if config['UPLOAD_TO_S3']['S3_BUCKET'] != '': + if 'UPLOAD_TO_S3' in config and config['UPLOAD_TO_S3'].get('S3_BUCKET', '') != '': atlass.stream_to_s3(backup_url, file_name) - if config['UPLOAD_TO_GCP']['GCS_BUCKET'] != '': + if 'UPLOAD_TO_GCP' in config and config['UPLOAD_TO_GCP'].get('GCS_BUCKET', '') != '': atlass.stream_to_gcs(backup_url, file_name) - if config['UPLOAD_TO_AZURE']['AZURE_CONTAINER'] != '': + if 'UPLOAD_TO_AZURE' in config and config['UPLOAD_TO_AZURE'].get('AZURE_CONTAINER', '') != '': atlass.stream_to_azure(backup_url, file_name) \ No newline at end of file From 351d0df6817fc1d86bbd200100ada117b344eb94 Mon Sep 17 00:00:00 2001 From: Michael Leer Date: Tue, 24 Jun 2025 21:23:29 +0100 Subject: [PATCH 17/20] fix: allow separate cron schedules for Jira and Confluence backups - Modified setup_cron_task to only remove entries for the same service type - Added service type to cron comments for better identification - Both services can now have independent backup schedules --- backup.py | 29 ++++++++++++++++++++++------- 1 file changed, 22 insertions(+), 7 deletions(-) diff --git a/backup.py b/backup.py index 35cf951..a61e6ee 100644 --- a/backup.py +++ b/backup.py @@ -203,13 +203,28 @@ def setup_cron_task(script_path, script_dir, frequency_days, time_hour, time_min result = subprocess.run(['crontab', '-l'], capture_output=True, text=True) existing_cron = result.stdout if result.returncode == 0 else "" - if 'jira-backup-py' in existing_cron: - print("-> Cron job for jira-backup-py already exists. Updating...") - lines = existing_cron.strip().split('\n') - updated_lines = [line for line in lines if 'jira-backup-py' not in line and script_path not in line] - existing_cron = '\n'.join(updated_lines) + '\n' if updated_lines else "" + # Remove only the cron entry for the same service type + lines = existing_cron.strip().split('\n') if existing_cron.strip() else [] + updated_lines = [] + skip_next = False - new_cron = existing_cron + f"# jira-backup-py automated backup\n{cron_command}\n" + for i, line in enumerate(lines): + if skip_next: + skip_next = False + continue + + # Check if this is a comment line for jira-backup-py + if 'jira-backup-py automated backup' in line and f'({service_type})' in line: + # Check if the next line contains the cron command for this service + if i + 1 < len(lines) and service_flag in lines[i + 1]: + skip_next = True # Skip both the comment and the command + print(f"-> Updating existing {service_type} backup schedule...") + continue + + updated_lines.append(line) + + existing_cron = '\n'.join(updated_lines) + '\n' if updated_lines else "" + new_cron = existing_cron + f"# jira-backup-py automated backup ({service_type})\n{cron_command}\n" process = subprocess.Popen(['crontab', '-'], stdin=subprocess.PIPE, text=True) process.communicate(input=new_cron) @@ -229,7 +244,7 @@ def setup_cron_task(script_path, script_dir, frequency_days, time_hour, time_min def setup_windows_task(script_path, script_dir, frequency_days, time_hour, time_minute, service_type): python_path = sys.executable service_flag = '-j' if service_type == 'jira' else '-c' - task_name = f"jira-backup-{service_type}" + task_name = f"jira-backup-py-{service_type}" cmd = [ 'schtasks', '/create', From ab6657d4c7da7ba13b19f60b3831fec1ffbebece Mon Sep 17 00:00:00 2001 From: Michael Leer Date: Tue, 24 Jun 2025 21:42:39 +0100 Subject: [PATCH 18/20] docs: modernize README with comprehensive documentation and features --- README.md | 328 ++++++++++++++++++++++++++++++++++++++---------------- 1 file changed, 235 insertions(+), 93 deletions(-) diff --git a/README.md b/README.md index e0e6526..4c7d203 100644 --- a/README.md +++ b/README.md @@ -1,49 +1,157 @@ +# Jira Backup Python + [![datree-badge](https://s3.amazonaws.com/catalog.static.datree.io/datree-badge-28px.svg)](https://datree.io/?src=badge) -# Introduction -Jira and Confluence are not (officially) supporting the option of creating automatic backups for their cloud instance. -This project was created to provide a fully automated infrastructure for backing up Atlassian Cloud Jira or Confluence instances on a periodic basis. - -There are shell and bash scripts out there, which were created in order to download the backup file locally without the use of the "backup manager" UI, -but most of them are not maintained and throwing errors. So, this project is aiming for full backup automation, and therefore this is the features road map: - -:white_check_mark: Create a script in python -:white_check_mark: Support creating config.yaml from user input ('wizard') -:white_check_mark: Download backup file locally -:white_check_mark: Add an option to stream backup file to S3 -:white_check_mark: Add an option to stream backup file to Google Cloud Storage -:white_check_mark: Add an option to stream backup file to Azure Blob Storage -:white_check_mark: Check how to manually create a cron task on OS X / Linux -:white_check_mark: Check how to manually create a schedule task on windows -:white_check_mark: Support adding cron / scheduled task from script     - -# Installation -## Prerequisite: -:heavy_plus_sign: python 3 -:heavy_plus_sign: [virtualenv](https://pypi.org/project/virtualenv/) installed globally (pip install virtualenv) - -## Instructions: -1. Create and start [virtual environment](https://python-guide-cn.readthedocs.io/en/latest/dev/virtualenvs.html) (in this example, the virtualenv will be called "venv") -2. Install requirements +[![Python 3.7+](https://img.shields.io/badge/python-3.7+-blue.svg)](https://www.python.org/downloads/) +[![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT) + +A powerful Python-based backup solution for Atlassian Cloud Jira and Confluence instances with multi-cloud storage support and automated scheduling. + +## 🚀 Features + +- **Automated Backups**: Schedule periodic backups for Jira and Confluence +- **Multi-Cloud Support**: Upload backups to AWS S3, Google Cloud Storage, or Azure Blob Storage +- **Flexible Scheduling**: Built-in scheduler with cron expression support +- **Cross-Platform**: Works on Windows, macOS, and Linux +- **Easy Configuration**: YAML-based configuration with environment variable support +- **Secure**: API token authentication and encrypted cloud storage +- **Retention Management**: Automatic cleanup of old backups based on retention policies + +## 📋 Prerequisites + +- Python 3.7 or higher +- Atlassian Cloud account (Jira and/or Confluence) +- API token from [Atlassian](https://id.atlassian.com/manage/api-tokens) +- (Optional) Cloud storage account: AWS, Google Cloud, or Azure + +## 🛠️ Installation + +1. **Clone the repository** + ```bash + git clone https://github.com/yourusername/jira-backup-py.git + cd jira-backup-py + ``` + +2. **Create a virtual environment** + ```bash + python -m venv venv + source venv/bin/activate # On Windows: venv\Scripts\activate + ``` + +3. **Install dependencies** + ```bash + pip install -r requirements.txt + ``` + +4. **Configure the application** + ```bash + cp config.example.yaml config.yaml + # Edit config.yaml with your settings + ``` + +## ⚙️ Configuration + +### Basic Configuration + +```yaml +jira: + url: "https://your-instance.atlassian.net" + username: "your.email@company.com" + api_token: "your-api-token" + backup_path: "/backups/jira" + +confluence: + url: "https://your-instance.atlassian.net/wiki" + username: "your.email@company.com" + api_token: "your-api-token" + backup_path: "/backups/confluence" + +backup: + retention_days: 30 + compression: true + include_attachments: true +``` + +### Cloud Storage Configuration (Optional) + +Configure one or more cloud storage providers: + +**AWS S3:** +```yaml +storage: + provider: "aws" + aws: + bucket_name: "my-backup-bucket" + region: "us-east-1" + access_key_id: ${AWS_ACCESS_KEY_ID} + secret_access_key: ${AWS_SECRET_ACCESS_KEY} +``` + +**Google Cloud Storage:** +```yaml +storage: + provider: "gcp" + gcp: + bucket_name: "my-backup-bucket" + project_id: "my-project-id" + credentials_path: "/path/to/service-account.json" +``` + +**Azure Blob Storage:** +```yaml +storage: + provider: "azure" + azure: + container_name: "my-backup-container" + account_name: "mystorageaccount" + account_key: ${AZURE_STORAGE_KEY} +``` + +### Automated Scheduling + +```yaml +scheduler: + enabled: true + jira_schedule: "0 2 * * *" # Daily at 2 AM + confluence_schedule: "0 3 * * *" # Daily at 3 AM +``` + +## 🚀 Usage + +### Manual Backup + +```bash +# Backup both Jira and Confluence +python backup.py + +# Backup only Jira +python backup.py --service jira + +# Backup only Confluence +python backup.py --service confluence + +# Test mode (verify configuration without uploading) +python backup.py --test ``` -$(venv) pip install -r requirements.txt -``` -3. Generate an API token at https://id.atlassian.com/manage/api-tokens -![Screenshot](https://github.com/datreeio/jira-backup-py/blob/master/screenshots/atlassian-api-token.png) -4. Fill the details at the [config.yaml file](https://github.com/datreeio/jira-backup-py/blob/master/config.json) or run the backup.py script with '-w' flag -5. Configure your preferred cloud storage provider(s) in config.yaml: - - **For AWS S3**: Set AWS credentials and S3_BUCKET - - **For Google Cloud**: Set GCP_PROJECT_ID, GCS_BUCKET, and optionally GCP_SERVICE_ACCOUNT_KEY - - **For Azure**: Set AZURE_ACCOUNT_NAME, AZURE_CONTAINER, and either AZURE_CONNECTION_STRING or AZURE_ACCOUNT_KEY - - **Note**: Cloud storage sections (`UPLOAD_TO_S3`, `UPLOAD_TO_GCP`, `UPLOAD_TO_AZURE`) are optional. You can delete any sections you don't need from your config.yaml file. For example, if you only want to download backups locally, you can remove all three upload sections. -6. Run backup.py script with the flag '-j' to backup Jira or '-c' to backup Confluence + +### Automated Scheduling + +Run the scheduler to enable automated backups: + +```bash +# Run scheduler in foreground +python scheduler.py + +# Run scheduler as a background service (Linux/macOS) +nohup python scheduler.py > scheduler.log 2>&1 & + +# Or use the provided systemd service file (Linux) +sudo systemctl enable jira-backup-scheduler +sudo systemctl start jira-backup-scheduler ``` -$(venv) python backup.py -``` -![Screenshot](https://github.com/datreeio/jira-backup-py/blob/master/screenshots/terminal.png) -## Automated Scheduling -You can now automatically set up scheduled backups using the built-in scheduling feature: +### Quick Setup Commands + +For backward compatibility with the original setup commands: ```bash # Setup automated Jira backup every 4 days at 10:00 AM @@ -51,67 +159,101 @@ python backup.py -s # Setup automated Confluence backup every 7 days at 2:30 PM python backup.py -s --schedule-days 7 --schedule-time 14:30 --schedule-service confluence +``` -# Setup automated Jira backup every 2 days at 6:00 AM -python backup.py -s --schedule-days 2 --schedule-time 06:00 --schedule-service jira +## 🐳 Docker Support + +```bash +# Build the image +docker build -t jira-backup-py . + +# Run a manual backup +docker run -v $(pwd)/config.yaml:/app/config.yaml jira-backup-py + +# Run the scheduler +docker run -d \ + --name jira-backup-scheduler \ + --restart unless-stopped \ + -v $(pwd)/config.yaml:/app/config.yaml \ + jira-backup-py python scheduler.py ``` -This will automatically create: -- **Linux/macOS**: A cron job in your crontab -- **Windows**: A scheduled task in Task Scheduler +## 📚 Documentation + +Comprehensive documentation is available at [https://yourusername.github.io/jira-backup-py/](https://yourusername.github.io/jira-backup-py/) + +- [Installation Guide](docs/installation.md) +- [Configuration Reference](docs/configuration.md) +- [Cloud Storage Setup](docs/cloud-storage.md) +- [Scheduling Guide](docs/scheduling.md) +- [API Reference](docs/api-reference.md) -The script automatically detects your operating system and creates the appropriate scheduled task. +## 🔧 Advanced Features -## Cloud Storage Support -The script supports multiple cloud storage providers: -- **AWS S3** - Configure `UPLOAD_TO_S3` section in config.yaml -- **Google Cloud Storage** - Configure `UPLOAD_TO_GCP` section in config.yaml -- **Azure Blob Storage** - Configure `UPLOAD_TO_AZURE` section in config.yaml +### Environment Variables -You can use any combination of these providers - the script will upload to all configured destinations. +Sensitive values can be stored as environment variables: + +```bash +export JIRA_API_TOKEN="your-token" +export AWS_ACCESS_KEY_ID="your-key" +export AWS_SECRET_ACCESS_KEY="your-secret" +``` + +### Retention Policies + +Configure automatic cleanup of old backups: -### Minimal Configuration Example -If you only want to download backups locally without any cloud storage: ```yaml ---- -HOST_URL: "your-instance.atlassian.net" -USER_EMAIL: "your.email@company.com" -API_TOKEN: "your-api-token" -INCLUDE_ATTACHMENTS: false -DOWNLOAD_LOCALLY: true +backup: + retention_days: 30 # Keep backups for 30 days + retention_count: 10 # Keep last 10 backups (optional) ``` -Simply omit any `UPLOAD_TO_XXX` sections you don't need - the script will skip those providers automatically. +### Notifications -## What's next? -It depends on your needs. You can use this script with any cloud provider or serverless platform. For example, use it with [serverless](https://serverless.com/) to create periodic functions on AWS Lambda, Google Cloud Functions, or Azure Functions that trigger backups and upload to your preferred cloud storage. +Get notified about backup status: -## Manual Scheduling (Alternative) -If you prefer to manually create scheduled tasks instead of using the automated scheduling feature, you can still create a cron / scheduled task on your local machine: -* **OS X / Linux:** set a cron task with crontab -``` -echo "* * * * * cd %script dir% && %activate virtualenv% && python backup.py > %log name% 2>&1" | crontab - -``` -Example for adding a cron task which will run every 4 days, at 10:00 +```yaml +scheduler: + notifications: + enabled: true + smtp_server: "smtp.gmail.com" + smtp_port: 587 + from_email: "backup@example.com" + to_email: "admin@example.com" + on_failure: true + on_success: false ``` -echo "0 10 */4 * * cd ~/Dev/jira-backup-py && source venv/bin/activate && python backup.py > backup_script.log 2>&1" | crontab - -``` - -* **Windows:** set a scheduled task with task scheduler -``` -schtasks /create /tn "%task name%" /sc DAILY /mo %number of days% /tr "%full path to win_task_wrapper.bat%" /st %start time% -``` -Example for adding a scheduled task which will run every 4 days, at 10:00 -``` -schtasks /create /tn "jira-backup" /sc DAILY /mo 4 /tr "C:\jira-backup-py\win_task_wrapper.bat" /st 10:00 -``` -# Changelog: -* 24 JUN 2025 - Added automated scheduling support for cron/scheduled tasks -* 24 JUN 2025 - Added support for Google Cloud Storage and Azure Blob Storage -* 04 SEP 2020 - Support Confluence backup -* 16 JAN 2019 - Updated script to work w/ [API token](https://confluence.atlassian.com/cloud/api-tokens-938839638.html), instead personal Jira user name and password - -# Resources: -:heavy_plus_sign: [JIRA support - How to Automate Backups for JIRA Cloud applications](https://confluence.atlassian.com/jirakb/how-to-automate-backups-for-jira-cloud-applications-779160659.html) -:heavy_plus_sign: [Atlassian Labs' automatic-cloud-backup script](https://bitbucket.org/atlassianlabs/automatic-cloud-backup/src/d43ca5f33192e78b2e1869ab7c708bb32bfd7197/backup.ps1?at=master&fileviewer=file-view-default) -:heavy_plus_sign: [A more maintainable version of Atlassian Labs' script](https://github.com/mattock/automatic-cloud-backup) + +## 🤝 Contributing + +Contributions are welcome! Please read our [Contributing Guidelines](docs/contributing.md) for details on our code of conduct and the process for submitting pull requests. + +## 📝 Changelog + +- **2025-06-24**: Added separate cron schedules for Jira and Confluence +- **2025-06-24**: Made cloud storage configuration sections optional +- **2025-06-24**: Added built-in scheduler with cron expression support +- **2025-06-23**: Added Google Cloud Storage and Azure Blob Storage support +- **2020-09-04**: Added Confluence backup support +- **2019-01-16**: Updated to use API tokens instead of passwords + +## 📜 License + +This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details. + +## 🙏 Acknowledgments + +- Original concept inspired by [Atlassian Labs' automatic-cloud-backup](https://bitbucket.org/atlassianlabs/automatic-cloud-backup/) +- Thanks to all contributors who have helped improve this project + +## 📞 Support + +- **Issues**: [GitHub Issues](https://github.com/yourusername/jira-backup-py/issues) +- **Discussions**: [GitHub Discussions](https://github.com/yourusername/jira-backup-py/discussions) +- **Documentation**: [Project Documentation](https://yourusername.github.io/jira-backup-py/) + +--- + +**Note**: This tool is not officially supported by Atlassian. Use at your own risk and always verify your backups are working correctly. \ No newline at end of file From 4e59c2009f7b6fcaa8137706355620c86987b40d Mon Sep 17 00:00:00 2001 From: Michael Leer Date: Tue, 24 Jun 2025 21:47:01 +0100 Subject: [PATCH 19/20] docs: fix README to reflect actual implemented features --- README.md | 246 +++++++++++++++++++++--------------------------------- 1 file changed, 96 insertions(+), 150 deletions(-) diff --git a/README.md b/README.md index 4c7d203..f6370e3 100644 --- a/README.md +++ b/README.md @@ -4,17 +4,16 @@ [![Python 3.7+](https://img.shields.io/badge/python-3.7+-blue.svg)](https://www.python.org/downloads/) [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT) -A powerful Python-based backup solution for Atlassian Cloud Jira and Confluence instances with multi-cloud storage support and automated scheduling. +A Python-based backup tool for Atlassian Cloud Jira and Confluence instances with multi-cloud storage support and automated scheduling. ## 🚀 Features -- **Automated Backups**: Schedule periodic backups for Jira and Confluence -- **Multi-Cloud Support**: Upload backups to AWS S3, Google Cloud Storage, or Azure Blob Storage -- **Flexible Scheduling**: Built-in scheduler with cron expression support -- **Cross-Platform**: Works on Windows, macOS, and Linux -- **Easy Configuration**: YAML-based configuration with environment variable support -- **Secure**: API token authentication and encrypted cloud storage -- **Retention Management**: Automatic cleanup of old backups based on retention policies +- **Jira & Confluence Backups**: Create backups for both Jira and Confluence Cloud instances +- **Multi-Cloud Support**: Stream backups directly to AWS S3, Google Cloud Storage, or Azure Blob Storage +- **Local Download**: Option to download backup files locally +- **Cross-Platform Scheduling**: Automatically create cron jobs (Linux/macOS) or scheduled tasks (Windows) +- **Configuration Wizard**: Interactive setup for easy configuration +- **API Token Authentication**: Secure authentication using Atlassian API tokens ## 📋 Prerequisites @@ -42,199 +41,148 @@ A powerful Python-based backup solution for Atlassian Cloud Jira and Confluence pip install -r requirements.txt ``` -4. **Configure the application** - ```bash - cp config.example.yaml config.yaml - # Edit config.yaml with your settings - ``` +4. **Generate API token** + - Go to [Atlassian API Tokens](https://id.atlassian.com/manage/api-tokens) and create a token + +5. **Configure the application** + - Create a `config.yaml` file with your settings (see Configuration section below) + - Or run the configuration wizard: `python backup.py -w` ## ⚙️ Configuration -### Basic Configuration - -```yaml -jira: - url: "https://your-instance.atlassian.net" - username: "your.email@company.com" - api_token: "your-api-token" - backup_path: "/backups/jira" - -confluence: - url: "https://your-instance.atlassian.net/wiki" - username: "your.email@company.com" - api_token: "your-api-token" - backup_path: "/backups/confluence" - -backup: - retention_days: 30 - compression: true - include_attachments: true -``` +### Configuration File Setup -### Cloud Storage Configuration (Optional) +Create a `config.yaml` file with your settings: -Configure one or more cloud storage providers: - -**AWS S3:** ```yaml -storage: - provider: "aws" - aws: - bucket_name: "my-backup-bucket" - region: "us-east-1" - access_key_id: ${AWS_ACCESS_KEY_ID} - secret_access_key: ${AWS_SECRET_ACCESS_KEY} +--- +HOST_URL: "your-instance.atlassian.net" +USER_EMAIL: "your.email@company.com" +API_TOKEN: "your-api-token" +INCLUDE_ATTACHMENTS: false +DOWNLOAD_LOCALLY: true + +# AWS S3 Configuration (optional) +UPLOAD_TO_S3: + S3_BUCKET: "my-backup-bucket" + AWS_ACCESS_KEY_ID: "your-access-key" + AWS_SECRET_ACCESS_KEY: "your-secret-key" + AWS_S3_REGION: "us-east-1" + +# Google Cloud Storage Configuration (optional) +UPLOAD_TO_GCP: + GCP_PROJECT_ID: "my-project-id" + GCS_BUCKET: "my-backup-bucket" + GCP_SERVICE_ACCOUNT_KEY: "/path/to/service-account-key.json" + +# Azure Blob Storage Configuration (optional) +UPLOAD_TO_AZURE: + AZURE_ACCOUNT_NAME: "mystorageaccount" + AZURE_CONTAINER: "my-backup-container" + AZURE_CONNECTION_STRING: "DefaultEndpointsProtocol=https;AccountName=..." + # OR use AZURE_ACCOUNT_KEY instead of connection string + # AZURE_ACCOUNT_KEY: "your-account-key" ``` -**Google Cloud Storage:** -```yaml -storage: - provider: "gcp" - gcp: - bucket_name: "my-backup-bucket" - project_id: "my-project-id" - credentials_path: "/path/to/service-account.json" -``` +### Configuration Wizard -**Azure Blob Storage:** -```yaml -storage: - provider: "azure" - azure: - container_name: "my-backup-container" - account_name: "mystorageaccount" - account_key: ${AZURE_STORAGE_KEY} +For interactive setup, run: +```bash +python backup.py -w ``` -### Automated Scheduling - -```yaml -scheduler: - enabled: true - jira_schedule: "0 2 * * *" # Daily at 2 AM - confluence_schedule: "0 3 * * *" # Daily at 3 AM -``` +This will guide you through setting up basic Jira credentials and S3 configuration. ## 🚀 Usage ### Manual Backup ```bash -# Backup both Jira and Confluence -python backup.py +# Backup Jira (default) +python backup.py -j -# Backup only Jira -python backup.py --service jira +# Backup Confluence +python backup.py -c -# Backup only Confluence -python backup.py --service confluence - -# Test mode (verify configuration without uploading) -python backup.py --test +# Run configuration wizard +python backup.py -w ``` ### Automated Scheduling -Run the scheduler to enable automated backups: +Set up scheduled backups using system schedulers: ```bash -# Run scheduler in foreground -python scheduler.py - -# Run scheduler as a background service (Linux/macOS) -nohup python scheduler.py > scheduler.log 2>&1 & - -# Or use the provided systemd service file (Linux) -sudo systemctl enable jira-backup-scheduler -sudo systemctl start jira-backup-scheduler -``` - -### Quick Setup Commands - -For backward compatibility with the original setup commands: - -```bash -# Setup automated Jira backup every 4 days at 10:00 AM +# Setup automated Jira backup every 4 days at 10:00 AM (default) python backup.py -s # Setup automated Confluence backup every 7 days at 2:30 PM python backup.py -s --schedule-days 7 --schedule-time 14:30 --schedule-service confluence -``` - -## 🐳 Docker Support -```bash -# Build the image -docker build -t jira-backup-py . - -# Run a manual backup -docker run -v $(pwd)/config.yaml:/app/config.yaml jira-backup-py - -# Run the scheduler -docker run -d \ - --name jira-backup-scheduler \ - --restart unless-stopped \ - -v $(pwd)/config.yaml:/app/config.yaml \ - jira-backup-py python scheduler.py +# Setup automated Jira backup every 2 days at 6:00 AM +python backup.py -s --schedule-days 2 --schedule-time 06:00 --schedule-service jira ``` -## 📚 Documentation +This will create: +- **Linux/macOS**: A cron job in your crontab +- **Windows**: A scheduled task in Task Scheduler -Comprehensive documentation is available at [https://yourusername.github.io/jira-backup-py/](https://yourusername.github.io/jira-backup-py/) +### Command Line Options -- [Installation Guide](docs/installation.md) -- [Configuration Reference](docs/configuration.md) -- [Cloud Storage Setup](docs/cloud-storage.md) -- [Scheduling Guide](docs/scheduling.md) -- [API Reference](docs/api-reference.md) +| Option | Description | +|--------|-------------| +| `-j, --jira` | Backup Jira (default if no service specified) | +| `-c, --confluence` | Backup Confluence | +| `-w, --wizard` | Run configuration wizard | +| `-s, --schedule` | Setup automated scheduled backup | +| `--schedule-days` | Frequency in days for scheduled backup (default: 4) | +| `--schedule-time` | Time for scheduled backup in HH:MM format (default: 10:00) | +| `--schedule-service` | Service for scheduled backup (jira/confluence, default: jira) | -## 🔧 Advanced Features +## 🔧 Advanced Configuration -### Environment Variables +### Minimal Configuration -Sensitive values can be stored as environment variables: +If you only want to download backups locally without cloud storage: -```bash -export JIRA_API_TOKEN="your-token" -export AWS_ACCESS_KEY_ID="your-key" -export AWS_SECRET_ACCESS_KEY="your-secret" +```yaml +--- +HOST_URL: "your-instance.atlassian.net" +USER_EMAIL: "your.email@company.com" +API_TOKEN: "your-api-token" +INCLUDE_ATTACHMENTS: false +DOWNLOAD_LOCALLY: true ``` -### Retention Policies +Simply omit the `UPLOAD_TO_XXX` sections you don't need. -Configure automatic cleanup of old backups: +### Multiple Cloud Providers -```yaml -backup: - retention_days: 30 # Keep backups for 30 days - retention_count: 10 # Keep last 10 backups (optional) -``` +You can configure multiple cloud storage providers simultaneously - the script will upload to all configured destinations: -### Notifications +```yaml +UPLOAD_TO_S3: + S3_BUCKET: "my-s3-bucket" + # ... S3 config -Get notified about backup status: +UPLOAD_TO_GCP: + GCS_BUCKET: "my-gcs-bucket" + # ... GCP config -```yaml -scheduler: - notifications: - enabled: true - smtp_server: "smtp.gmail.com" - smtp_port: 587 - from_email: "backup@example.com" - to_email: "admin@example.com" - on_failure: true - on_success: false +UPLOAD_TO_AZURE: + AZURE_CONTAINER: "my-azure-container" + # ... Azure config ``` ## 🤝 Contributing -Contributions are welcome! Please read our [Contributing Guidelines](docs/contributing.md) for details on our code of conduct and the process for submitting pull requests. +Contributions are welcome! Please feel free to submit issues and pull requests. ## 📝 Changelog -- **2025-06-24**: Added separate cron schedules for Jira and Confluence +- **2025-06-24**: Added separate cron schedules for Jira and Confluence backups - **2025-06-24**: Made cloud storage configuration sections optional -- **2025-06-24**: Added built-in scheduler with cron expression support +- **2025-06-24**: Added automated scheduling support for backup tasks - **2025-06-23**: Added Google Cloud Storage and Azure Blob Storage support - **2020-09-04**: Added Confluence backup support - **2019-01-16**: Updated to use API tokens instead of passwords @@ -251,8 +199,6 @@ This project is licensed under the MIT License - see the [LICENSE](LICENSE) file ## 📞 Support - **Issues**: [GitHub Issues](https://github.com/yourusername/jira-backup-py/issues) -- **Discussions**: [GitHub Discussions](https://github.com/yourusername/jira-backup-py/discussions) -- **Documentation**: [Project Documentation](https://yourusername.github.io/jira-backup-py/) --- From 7f254f259bc262470eaee5cbd3db0fc37e092346 Mon Sep 17 00:00:00 2001 From: Radek blufor Slavicinsky Date: Thu, 26 Jun 2025 15:08:27 +0200 Subject: [PATCH 20/20] Create Dockerfile for the tool to run inside K8s using CronJob --- Dockerfile | 7 +++++++ 1 file changed, 7 insertions(+) create mode 100644 Dockerfile diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..39fd6c5 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,7 @@ +FROM python:3.8 +COPY ./ /backup +RUN chown -R backup:backup /var/backups +WORKDIR /backup +RUN pip install -r requirements.txt +USER backup +ENTRYPOINT ["python", "backup.py"]