From 514797c2bf6eb1762801daf05873bb8f6f500f13 Mon Sep 17 00:00:00 2001 From: Melody Ren Date: Mon, 10 Mar 2025 21:02:12 +0000 Subject: [PATCH 01/11] Add test data for LFS Signed-off-by: Melody Ren --- .gitattributes | 1 + assets/datasets/mock_data.json | 3 +++ 2 files changed, 4 insertions(+) create mode 100644 .gitattributes create mode 100644 assets/datasets/mock_data.json diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 00000000..38cc2ce0 --- /dev/null +++ b/.gitattributes @@ -0,0 +1 @@ +assets/datasets/* filter=lfs diff=lfs merge=lfs -text diff --git a/assets/datasets/mock_data.json b/assets/datasets/mock_data.json new file mode 100644 index 00000000..ef7f1d41 --- /dev/null +++ b/assets/datasets/mock_data.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2ec2bb92217d74a252b94a7cb70617a8fcb7d4939dd7c0307a8f5b9fb6bf3028 +size 69 From a63730d9f6a1c2574851eb2130bf84b329da47fb Mon Sep 17 00:00:00 2001 From: Melody Ren Date: Mon, 10 Mar 2025 23:25:05 +0000 Subject: [PATCH 02/11] Add another mock data for lfs Signed-off-by: Melody Ren --- .gitattributes | 1 + assets/datasets/mock_data2.bin | 3 +++ 2 files changed, 4 insertions(+) create mode 100644 assets/datasets/mock_data2.bin diff --git a/.gitattributes b/.gitattributes index 38cc2ce0..4620fa17 100644 --- a/.gitattributes +++ b/.gitattributes @@ -1 +1,2 @@ assets/datasets/* filter=lfs diff=lfs merge=lfs -text +assets/datasets/mock_data2.bin filter=lfs diff=lfs merge=lfs -text diff --git a/assets/datasets/mock_data2.bin b/assets/datasets/mock_data2.bin new file mode 100644 index 00000000..f0659cfc --- /dev/null +++ b/assets/datasets/mock_data2.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c0f5efbef0fe98aa90619444250b1a5eb23158d6686f0b190838f3d544ec85b9 +size 10 From a088fabad2a4b31458945c1efac7f66b28c7d5a7 Mon Sep 17 00:00:00 2001 From: Melody Ren Date: Mon, 10 Mar 2025 23:45:24 +0000 Subject: [PATCH 03/11] remove a lfs file --- assets/datasets/mock_data2.bin | 3 --- 1 file changed, 3 deletions(-) delete mode 100644 assets/datasets/mock_data2.bin diff --git a/assets/datasets/mock_data2.bin b/assets/datasets/mock_data2.bin deleted file mode 100644 index f0659cfc..00000000 --- a/assets/datasets/mock_data2.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:c0f5efbef0fe98aa90619444250b1a5eb23158d6686f0b190838f3d544ec85b9 -size 10 From 8a5cbbbb25dc8acb88c18421d5979c18b8767ae7 Mon Sep 17 00:00:00 2001 From: Melody Ren Date: Mon, 10 Mar 2025 23:49:42 +0000 Subject: [PATCH 04/11] Add instructions on how to set up Git LFS Signed-off-by: Melody Ren --- assets/README.md | 64 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 64 insertions(+) create mode 100644 assets/README.md diff --git a/assets/README.md b/assets/README.md new file mode 100644 index 00000000..ebc07021 --- /dev/null +++ b/assets/README.md @@ -0,0 +1,64 @@ +# Git LFS Setup and Usage Guide + +## Installation + +Git LFS must be installed before using it in a repository. Follow the installation steps based on your operating system. + +### Ubuntu (Debian-based distributions) +```sh +sudo apt update +sudo apt install git-lfs +git lfs install + +### AlmaLinux and ManyLinux +sudo dnf install git-lfs +git lfs install + +### macOS +brew install git-lfs +git lfs install + +## Tracking and committing large files + +1. Initialize Git LFS in your repository: + +`git lfs install` + +2. Track specific file types or individual files using the following command: + +`git lfs track "assets/*"`, where `assets` is a directory containing large files. + +3. Commit the changes to `.gitattributes`: + +`git add .gitattributes && git commit -m "Track large files with Git LFS"` + +4. Add and commit the large files: + +`git add assets/largefile.zip && git commit -m "Add large file"` + +5. Push to remote: + +`git push origin branch_name` + +## Cloning and fetching large files + +1. Clone a repository that uses Git LFS: + +`git clone https://github.com/username/repository.git`. By default, cloning only retrieves the pointer files to the large file. To fetch the actual large files, use `git lfs pull`. + +2. Fetch large files for an existing repository: + +`git lfs pull` + +## Check Git LFS status + +To check which files are tracked by Git LFS: + +`git lfs ls-files` + +## Removing a file from LFS + +Use the following steps to remove a file from LFS: + +`git rm --cached assets/largefile.zip`, then commit and push. + From b6158b73aae1c2599dc9a680d1537ab135d5a1ba Mon Sep 17 00:00:00 2001 From: Melody Ren Date: Mon, 10 Mar 2025 23:54:20 +0000 Subject: [PATCH 05/11] Format Signed-off-by: Melody Ren --- assets/README.md | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/assets/README.md b/assets/README.md index ebc07021..b93f4b3b 100644 --- a/assets/README.md +++ b/assets/README.md @@ -17,48 +17,49 @@ git lfs install ### macOS brew install git-lfs git lfs install +``` ## Tracking and committing large files 1. Initialize Git LFS in your repository: -`git lfs install` + `git lfs install` 2. Track specific file types or individual files using the following command: -`git lfs track "assets/*"`, where `assets` is a directory containing large files. + `git lfs track "assets/*"`, where `assets` is a directory containing large files. 3. Commit the changes to `.gitattributes`: -`git add .gitattributes && git commit -m "Track large files with Git LFS"` + `git add .gitattributes && git commit -m "Track large files with Git LFS"` 4. Add and commit the large files: -`git add assets/largefile.zip && git commit -m "Add large file"` + `git add assets/largefile.zip && git commit -m "Add large file"` 5. Push to remote: -`git push origin branch_name` + `git push origin branch_name` ## Cloning and fetching large files 1. Clone a repository that uses Git LFS: -`git clone https://github.com/username/repository.git`. By default, cloning only retrieves the pointer files to the large file. To fetch the actual large files, use `git lfs pull`. + `git clone https://github.com/username/repository.git`. By default, cloning only retrieves the pointer files to the large file. To fetch the actual large files, use `git lfs pull`. 2. Fetch large files for an existing repository: -`git lfs pull` + `git lfs pull` ## Check Git LFS status To check which files are tracked by Git LFS: -`git lfs ls-files` + `git lfs ls-files` ## Removing a file from LFS Use the following steps to remove a file from LFS: -`git rm --cached assets/largefile.zip`, then commit and push. + `git rm --cached assets/largefile.zip`, then commit and push. From f40f5335aa035027ced3aebee52ba74b0351ac11 Mon Sep 17 00:00:00 2001 From: Melody Ren Date: Thu, 20 Mar 2025 21:04:18 +0000 Subject: [PATCH 06/11] Add file size and type check to workflow Signed-off-by: Melody Ren --- .github/workflows/pr_file_check.yaml | 44 ++++++++++++++++++++++++++++ assets/datasets/mock_data.json | 3 -- 2 files changed, 44 insertions(+), 3 deletions(-) create mode 100644 .github/workflows/pr_file_check.yaml delete mode 100644 assets/datasets/mock_data.json diff --git a/.github/workflows/pr_file_check.yaml b/.github/workflows/pr_file_check.yaml new file mode 100644 index 00000000..ab0d876d --- /dev/null +++ b/.github/workflows/pr_file_check.yaml @@ -0,0 +1,44 @@ +name: Check for Large Files and Restricted Extensions + +on: + pull_request: + branches: + - main + types: [opened, synchronize, reopened] + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +env: + LLVM_VERSION: 16 + +jobs: + check-files: + name: Check file size and type + runs-on: ubuntu-latest + steps: + - name: Checkout repository + uses: actions/checkout@v4 + with: + set-safe-directory: true + + - name: Check for large files + run: | + MAX_SIZE=5M # Set max file size limit + LARGE_FILES=$(git diff --name-only --diff-filter=A origin/${{ github.event.pull_request.base.ref }} | xargs du -h | awk -v max="$MAX_SIZE" '$1 > max {print $2}') + if [[ ! -z "$LARGE_FILES" ]]; then + echo "❌ The following files exceed the allowed size of $MAX_SIZE:" + echo "$LARGE_FILES" + exit 1 + fi + + - name: Check for restricted file types + run: | + BLOCKED_EXTENSIONS="(exe|zip|tar.gz|bz2)" # Add any forbidden extensions + BAD_FILES=$(git diff --name-only --diff-filter=A origin/${{ github.event.pull_request.base.ref }} | grep -E "\.($BLOCKED_EXTENSIONS)$" || true) + if [[ ! -z "$BAD_FILES" ]]; then + echo "❌ The following files have restricted extensions and need to be removed from the PR:" + echo "$BAD_FILES" + exit 1 + fi diff --git a/assets/datasets/mock_data.json b/assets/datasets/mock_data.json deleted file mode 100644 index ef7f1d41..00000000 --- a/assets/datasets/mock_data.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:2ec2bb92217d74a252b94a7cb70617a8fcb7d4939dd7c0307a8f5b9fb6bf3028 -size 69 From 4d21881086c14e498cbd60932383207b6d571519 Mon Sep 17 00:00:00 2001 From: Melody Ren Date: Thu, 20 Mar 2025 21:20:11 +0000 Subject: [PATCH 07/11] Fetch base branch for comparison Signed-off-by: Melody Ren --- .github/workflows/pr_file_check.yaml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/.github/workflows/pr_file_check.yaml b/.github/workflows/pr_file_check.yaml index ab0d876d..93807503 100644 --- a/.github/workflows/pr_file_check.yaml +++ b/.github/workflows/pr_file_check.yaml @@ -22,11 +22,16 @@ jobs: uses: actions/checkout@v4 with: set-safe-directory: true + fetch-depth: 1 + + - name: Fetch base branch + run: git fetch origin ${{ github.event.pull_request.base.ref }} --depth=1 - name: Check for large files run: | MAX_SIZE=5M # Set max file size limit LARGE_FILES=$(git diff --name-only --diff-filter=A origin/${{ github.event.pull_request.base.ref }} | xargs du -h | awk -v max="$MAX_SIZE" '$1 > max {print $2}') + if [[ ! -z "$LARGE_FILES" ]]; then echo "❌ The following files exceed the allowed size of $MAX_SIZE:" echo "$LARGE_FILES" From bca99db17eaffc112298a2cfde26c8e5b350758d Mon Sep 17 00:00:00 2001 From: Melody Ren Date: Thu, 20 Mar 2025 21:33:18 +0000 Subject: [PATCH 08/11] DCO Remediation Commit for Melody Ren I, Melody Ren , hereby add my Signed-off-by to this commit: a088fabad2a4b31458945c1efac7f66b28c7d5a7 Signed-off-by: Melody Ren --- .github/workflows/pr_file_check.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/pr_file_check.yaml b/.github/workflows/pr_file_check.yaml index 93807503..c84625b2 100644 --- a/.github/workflows/pr_file_check.yaml +++ b/.github/workflows/pr_file_check.yaml @@ -43,7 +43,7 @@ jobs: BLOCKED_EXTENSIONS="(exe|zip|tar.gz|bz2)" # Add any forbidden extensions BAD_FILES=$(git diff --name-only --diff-filter=A origin/${{ github.event.pull_request.base.ref }} | grep -E "\.($BLOCKED_EXTENSIONS)$" || true) if [[ ! -z "$BAD_FILES" ]]; then - echo "❌ The following files have restricted extensions and need to be removed from the PR:" + echo "❌ The following files have restricted extensions:" echo "$BAD_FILES" exit 1 fi From 3f93e3206e6e94de41062a769a7f2330fdc49422 Mon Sep 17 00:00:00 2001 From: Melody Ren Date: Thu, 20 Mar 2025 21:45:30 +0000 Subject: [PATCH 09/11] [TEST CI] expect pipeline to fail Signed-off-by: Melody Ren --- assets/datasets/testfile.bz2 | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 assets/datasets/testfile.bz2 diff --git a/assets/datasets/testfile.bz2 b/assets/datasets/testfile.bz2 new file mode 100644 index 00000000..e69de29b From bfbc30b41a50f70e12a5e7124e1502ad92a7af5d Mon Sep 17 00:00:00 2001 From: Melody Ren Date: Thu, 20 Mar 2025 21:50:50 +0000 Subject: [PATCH 10/11] [TEST CI] expect pipeline to pass Signed-off-by: Melody Ren --- assets/datasets/testfile.bz2 | 0 1 file changed, 0 insertions(+), 0 deletions(-) delete mode 100644 assets/datasets/testfile.bz2 diff --git a/assets/datasets/testfile.bz2 b/assets/datasets/testfile.bz2 deleted file mode 100644 index e69de29b..00000000 From e418aba2bb65207ee0a0db3be3ad5a446d640bce Mon Sep 17 00:00:00 2001 From: Melody Ren Date: Thu, 20 Mar 2025 21:55:53 +0000 Subject: [PATCH 11/11] Remove untracked file from .gitattributes Signed-off-by: Melody Ren --- .gitattributes | 1 - assets/README.md | 2 ++ 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/.gitattributes b/.gitattributes index 4620fa17..38cc2ce0 100644 --- a/.gitattributes +++ b/.gitattributes @@ -1,2 +1 @@ assets/datasets/* filter=lfs diff=lfs merge=lfs -text -assets/datasets/mock_data2.bin filter=lfs diff=lfs merge=lfs -text diff --git a/assets/README.md b/assets/README.md index b93f4b3b..a11fdbed 100644 --- a/assets/README.md +++ b/assets/README.md @@ -63,3 +63,5 @@ Use the following steps to remove a file from LFS: `git rm --cached assets/largefile.zip`, then commit and push. + Once the file is removed, remember to delete the tracking information in `.gitattributes`. +