Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 26 additions & 4 deletions .github/workflows/gw_comment_trigger.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ jobs:
outputs:
repo_url: ${{ steps.parse.outputs.repo_url }}
limit: ${{ steps.parse.outputs.limit }}
branch: ${{ steps.parse.outputs.branch }}

steps:
- name: Parse comment
Expand All @@ -24,6 +25,7 @@ jobs:
run: |
echo "Comment body: $COMMENT_BODY"

# Extract repository URL
# Extract repository URL using environment variable
repo_url=$(echo "$COMMENT_BODY" | grep -oP 'gw --repo \K[^\s]+' || echo "")
if [ -z "$repo_url" ]; then
Expand All @@ -37,7 +39,7 @@ jobs:
exit 1
fi

# Extract limit with improved regex and validation
# Extract limit
limit_raw=$(echo "$COMMENT_BODY" | grep -oP -- '--limit\s+\K\d+' || echo "")

# Validate and set limit (default to 10 if not specified or invalid)
Expand All @@ -50,9 +52,18 @@ jobs:
fi
fi

# Extract branch
branch_raw=$(echo "$COMMENT_BODY" | grep -oP -- '--branch\s+\K[^\s]+' || echo "")
if [ -n "$branch_raw" ]; then
branch="$branch_raw"
else
branch=""
fi

echo "repo_url=$repo_url" >> $GITHUB_OUTPUT
echo "limit=$limit" >> $GITHUB_OUTPUT
echo "Parsed repo: $repo_url, limit: $limit"
echo "branch=$branch" >> $GITHUB_OUTPUT
echo "Parsed repo: $repo_url, limit: $limit, branch: $branch"

run-bugspots:
needs: parse-comment
Expand All @@ -79,15 +90,16 @@ jobs:
env:
REPO_URL: ${{ needs.parse-comment.outputs.repo_url }}
LIMIT: ${{ needs.parse-comment.outputs.limit }}
BRANCH: ${{ needs.parse-comment.outputs.branch }}
run: |
chmod +x BugPredict/gw.sh
# Run the script with environment variables to prevent injection
BugPredict/gw.sh "$REPO_URL" "$LIMIT"
BugPredict/gw.sh "$REPO_URL" "$LIMIT" "$BRANCH"

- name: Check for repository errors
id: check-errors
env:
REPO_URL: ${{ needs.parse-comment.outputs.repo_url }}
BRANCH: ${{ needs.parse-comment.outputs.branch }}
run: |
repo_name=$(basename "$REPO_URL" .git)

Expand All @@ -104,6 +116,8 @@ jobs:
echo "error_type=invalid_repo" >> $GITHUB_OUTPUT
elif [[ "$error_content" == *"No commits found"* ]]; then
echo "error_type=no_commits" >> $GITHUB_OUTPUT
elif [[ "$error_content" == *"no such branch"* ]]; then
echo "error_type=invalid_branch" >> $GITHUB_OUTPUT
else
echo "error_type=unknown" >> $GITHUB_OUTPUT
fi
Expand Down Expand Up @@ -150,6 +164,13 @@ jobs:
echo "" >> comment.md
echo "Bugspots analysis requires commit history to analyze bug patterns." >> comment.md
;;
"invalid_branch")
echo "### 🚫 Invalid Branch" >> comment.md
echo "" >> comment.md
echo "**Error:** The specified branch does not exist in the repository." >> comment.md
echo "" >> comment.md
echo "Please specify a valid branch or omit the --branch parameter to use the default branch." >> comment.md
;;
*)
echo "### ⚠️ Analysis Error" >> comment.md
echo "" >> comment.md
Expand All @@ -170,6 +191,7 @@ jobs:
echo '```' >> comment.md
echo "gw --repo https://github.com/username/repository" >> comment.md
echo "gw --repo https://github.com/username/repository --limit 15" >> comment.md
echo "gw --repo https://github.com/username/repository --branch main" >> comment.md
echo '```' >> comment.md

- name: Prepare success comment
Expand Down
111 changes: 77 additions & 34 deletions BugPredict/gw.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,11 @@ set -e
# Parse command line arguments
REPO_URL="$1"
LIMIT="${2:-10}"
BRANCH="$3"

if [ -z "$REPO_URL" ]; then
echo "Usage: $0 <repository_url> [limit]"
echo "Example: $0 https://github.com/user/repo.git 15"
echo "Usage: $0 <repository_url> [limit] [branch]"
echo "Example: $0 https://github.com/user/repo.git 15 main"
exit 1
fi

Expand All @@ -19,9 +20,10 @@ OUTPUT_DIR="bugspots-results"
echo "🚀 Bugspots Comment Analyzer starting at $(date '+%Y-%m-%d %H:%M:%S %Z')"
echo "Repository: $REPO_URL"
echo "File limit: $LIMIT"
[ -n "$BRANCH" ] && echo "Branch: $BRANCH"

# Check if bugspots is installed
if ! gem list bugspots -i > /dev/null; then
# Check if bugspots gem is installed
if ! gem list bugspots -i > /dev/null 2>&1; then
echo "Installing bugspots gem..."
gem install bugspots
fi
Expand All @@ -32,32 +34,50 @@ mkdir -p "$OUTPUT_DIR"
echo "🔄 Cloning $REPO_URL ..."
repo_name=$(basename "$REPO_URL" .git)

# Try different branch names in order of preference
branches=("main" "master" "develop" "dev")
clone_success=false
# Initialize selected_branch
selected_branch=""

for branch in "${branches[@]}"; do
echo "Trying to clone branch: $branch"
if git clone --branch "$branch" --depth 1000 "$REPO_URL" "$WORKDIR/$repo_name" 2>/dev/null; then
clone_success=true
echo "✅ Successfully cloned branch: $branch"
break
# If branch is specified, try cloning it first
if [ -n "$BRANCH" ]; then
echo "Trying to clone specified branch: $BRANCH"
if git clone --branch "$BRANCH" --depth 1000 "$REPO_URL" "$WORKDIR/$repo_name" 2>/dev/null; then
selected_branch="$BRANCH"
echo "✅ Successfully cloned branch: $BRANCH"
else
echo "Error: Failed to clone specified branch: $BRANCH" >&2
echo "Clone failed for $repo_name branch $BRANCH at $(date '+%Y-%m-%d %H:%M:%S %Z')" > "$OUTPUT_DIR/bugspots-${repo_name}.err"
exit 1
fi
done

# If named branches fail, try default clone
if [ "$clone_success" = false ]; then
echo "Named branches failed, trying default clone..."
if git clone --depth 1000 "$REPO_URL" "$WORKDIR/$repo_name"; then
clone_success=true
echo "✅ Successfully cloned with default branch"
else
# Try default branches in order of preference (master first to match bugspots default)
branches=("master" "main")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There is no need to guess user branches at all. If the branch is set, use it in git clone. If the branch is not specified, simply run git clone without the branch.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There is no need to guess user branches at all. If the branch is set, use it in git clone. If the branch is not specified, simply run git clone without the branch.

@llxia This handles the scenario where a Git branch is not explicitly specified. In such cases, the Ruby script defaults to using master. Since most GitHub repositories default to either main or master, the assumption is that one of these branches should be available. Therefore, when a branch is not provided via the comment, the script should attempt to resolve to main or master. Without this fallback logic, the script will fail for repositories that only use main, if master is assumed by default. The core idea is to make the script resilient by defaulting to main or master when no branch is specified.

This approach prevents failures when analyzing repositories like https://github.com/OpenElements/hiero-enterprise-java, which uses 'main', without requiring users to always specify a branch. By prioritizing 'main' and falling back to 'master', we cover the most common default branch names while maintaining flexibility for user-specified branches.

Copy link
Contributor

@llxia llxia Jun 25, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think the overall logic should be

if the user provides userBranch
    git clone userRepo userBranch
else
    git clone userRepo
    set userBranch via git cmd (i.e., git rev-parse --abbrev-ref HEAD)

bugspots -b userBranch userRepoDir

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

An example of using branch in bugspots is:

bugspots -b main /root/hiero-enterprise-java
Scanning /root/hiero-enterprise-java repo
	Found 38 bugfix commits, with 109 hotspots

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

An example of using branch in bugspots is:

bugspots -b main /root/hiero-enterprise-java
Scanning /root/hiero-enterprise-java repo
	Found 38 bugfix commits, with 109 hotspots

@llxia @LongyuZhang this is what I mean. The ruby script defaults master as a branch. Imagine a scennario where we have not specified the branch, that means the repos that default to main as a branch will fail. I will provide examples from the ruby script below.

Note: My assumption is every github repo that is created defaults to main or master but the ruby script only defaults to master which could be wrong on repos that have main as default.

Note: ONLY IMAGINE SCENARIOS WHERE WE DONT SPECIFY THE BRANCH

This error happens because hiero-enterprise-java defaults to main

bugspots /Users/mac/Hiero/hiero-enterprise-java              
Scanning /Users/mac/Hiero/hiero-enterprise-java repo
/Users/mac/.rvm/gems/ruby-3.0.0/gems/bugspots-0.2.2/lib/bugspots/scanner.rb:13:in `scan': no such branch in the repo: master (ArgumentError)
	from /Users/mac/.rvm/gems/ruby-3.0.0/gems/bugspots-0.2.2/bin/bugspots:49:in `<top (required)>'
	from /Users/mac/.rvm/gems/ruby-3.0.0/bin/bugspots:23:in `load'
	from /Users/mac/.rvm/gems/ruby-3.0.0/bin/bugspots:23:in `<main>'

This one passes because the project defaults to master

bugspots /Users/mac/maven/maven-dependency-plugin                        
Scanning /Users/mac/maven/maven-dependency-plugin repo
	Found 186 bugfix commits, with 418 hotspots:

	Fixes:
		- fix: remove duplicate maven-resolver-api and maven-resolver-util dependencies in pom.xml (#526)
		- Fix broken link on analyze-exclusions-mojo (#521)
		- Bump org.apache.maven.plugins:maven-plugins from 43 to 44 (#516)
		- Fix broken link for dependency:collect
		- [MDEP-689] Fixes ignored dependency filtering in go-offline goal (#417)
		- [MDEP-960] Repair silent logging (#447)
		- [MNG-2961] Remove workaround for fixed bug (#441)
		- Fix SCM tag
		- Delete obsolete commented code for issue that was won't fixed 10 years ago (#446)
		- [MDEP-903] Upgrade to Doxia 2.0.0 Milestone Stack
		

	Hotspots:
		3.3144 - pom.xml
		1.4030 - src/site/apt/index.apt.vm
		1.2976 - src/main/java/org/apache/maven/plugins/dependency/resolvers/ResolveDependencySourcesMojo.java
		1.1938 - src/test/java/org/apache/maven/plugins/dependency/TestCollectMojo.java
		1.1415 - src/test/java/org/apache/maven/plugins/dependency/fromDependencies/TestUnpackDependenciesMojo.java
		1.0123 - src/test/java/org/apache/maven/plugins/dependency/fromDependencies/TestCopyDependenciesMojo.java
		0.9551 - src/test/java/org/apache/maven/plugins/dependency/fromConfiguration/TestIncludeExcludeUnpackMojo.java
		0.9485 - src/it/projects/sources/pom.xml
		0.8771 - src/test/java/org/apache/maven/plugins/dependency/resolvers/GoOfflineMojoTest.java
		0.8608 - src/main/java/org/apache/maven/plugins/dependency/analyze/AbstractAnalyzeMojo.java
		0.7905 - src/test/java/org/apache/maven/plugins/dependency/fromDependencies/TestIncludeExcludeUnpackDependenciesMojo.java

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Branch should be set in bugspots. We should not rely on the default in bugspots. See #1053 (comment)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

When the userBranch is not specified, its value can be obtained with git command, e.g.

cd hiero-enterprise-java; git rev-parse --abbrev-ref HEAD
main

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Branch should be set in bugspots. We should not rely on the default in bugspots. See #1053 (comment)

@llxia @LongyuZhang my bad. It took me long to notice that we are resolving branches using git rev-parse --abbrev-ref HEAD. I will push a fix

clone_success=false

for branch in "${branches[@]}"; do
echo "Trying to clone branch: $branch"
if git clone --branch "$branch" --depth 1000 "$REPO_URL" "$WORKDIR/$repo_name" 2>/dev/null; then
clone_success=true
selected_branch="$branch"
echo "✅ Successfully cloned branch: $branch"
break
fi
done

# If named branches fail, try default clone
if [ "$clone_success" = false ]; then
echo "Named branches failed, trying default clone..."
if git clone --depth 1000 "$REPO_URL" "$WORKDIR/$repo_name"; then
clone_success=true
selected_branch=$(git -C "$WORKDIR/$repo_name" rev-parse --abbrev-ref HEAD)
echo "✅ Successfully cloned with default branch: $selected_branch"
fi
fi
fi

if [ "$clone_success" = false ]; then
echo "❌ Error: Failed to clone $REPO_URL" >&2
echo "Clone failed for $repo_name at $(date '+%Y-%m-%d %H:%M:%S %Z')" > "$OUTPUT_DIR/bugspots-${repo_name}.err"
exit 1
if [ "$clone_success" = false ]; then
echo "Error: Failed to clone $REPO_URL" >&2
echo "Clone failed for $repo_name at $(date '+%Y-%m-%d %H:%M:%S %Z')" > "$OUTPUT_DIR/bugspots-${repo_name}.err"
exit 1
fi
fi

# Verify repository
Expand All @@ -84,9 +104,23 @@ total_commits=$(git rev-list --count HEAD 2>/dev/null || echo "unknown")
echo "📈 Repository has $total_commits commits in current branch"

# Run bugspots with simplified word pattern
echo "📊 Running Bugspots for $repo_name ..."
echo "Executing: git bugspots -w fix" >&2
if ! git bugspots -w fix > "../../$OUTPUT_DIR/bugspots-${repo_name}.log" 2> "../../$OUTPUT_DIR/bugspots-${repo_name}.err"; then
echo "📊 Running Bugspots for $repo_name on branch $selected_branch..."

# Build the bugspots command
bugspots_cmd="bugspots ."

# Add branch parameter if specified or detected
if [ -n "$selected_branch" ]; then
bugspots_cmd="$bugspots_cmd --branch $selected_branch"
fi

# Add regex pattern for bug-fix commits
bugspots_cmd="$bugspots_cmd --regex 'fix(es|ed)?|close(s|d)?'"

echo "Executing: $bugspots_cmd" >&2

# Execute bugspots command
if ! eval "$bugspots_cmd" > "../../$OUTPUT_DIR/bugspots-${repo_name}.log" 2> "../../$OUTPUT_DIR/bugspots-${repo_name}.err"; then
echo "❌ Error: Bugspots failed for $repo_name. Check $OUTPUT_DIR/bugspots-${repo_name}.err" >&2
if [ -s "../../$OUTPUT_DIR/bugspots-${repo_name}.err" ]; then
echo "Error details:"
Expand All @@ -103,24 +137,33 @@ else
if [ -s "../../$OUTPUT_DIR/bugspots-${repo_name}.log" ]; then
if grep -q "Hotspots:" "../../$OUTPUT_DIR/bugspots-${repo_name}.log"; then
# Extract and count hotspots
hotspot_lines=$(sed -n '/Hotspots:/,$p' "../../$OUTPUT_DIR/bugspots-${repo_name}.log" | tail -n +2 | grep -E '^\s*[0-9]+\.[0-9]+.*' | wc -l)
hotspot_lines=$(sed -n '/Hotspots:/,$p' "../../$OUTPUT_DIR/bugspots-${repo_name}.log" | tail -n +2 | grep -E '^\s*[0-9]+\.[0-9]+.*' 2>/dev/null | wc -l)
echo "📋 Found $hotspot_lines hotspot files"

# Show summary if available - look for bug fix commits count
if grep -qE "Found \d+ bugfix commits|Found \d+ fix commits" "../../$OUTPUT_DIR/bugspots-${repo_name}.log"; then
bugfix_info=$(grep -oE "Found \d+ (bugfix|fix) commits" "../../$OUTPUT_DIR/bugspots-${repo_name}.log" | head -1)
echo "🐛 $bugfix_info"
fi

# Extract top N hotspots after "Hotspots:" line
echo ""
echo "🎯 Top $LIMIT hotspots found:"
echo "================================"
sed -n '/Hotspots:/,$p' "../../$OUTPUT_DIR/bugspots-${repo_name}.log" | \
tail -n +2 | \
grep -E '^\s*[0-9]+\.[0-9]+.*' | \
grep -E '^\s*[0-9]+\.[0-9]+.*' 2>/dev/null | \
head -n "$LIMIT" | \
sed 's/^\s*//'
sed 's/^\s*//' | \
while IFS= read -r line; do
echo " $line"
done
echo "================================"

# Create a clean output file with only the top N hotspots for the GitHub Actions
sed -n '/Hotspots:/,$p' "../../$OUTPUT_DIR/bugspots-${repo_name}.log" | \
tail -n +2 | \
grep -E '^\s*[0-9]+\.[0-9]+.*' | \
grep -E '^\s*[0-9]+\.[0-9]+.*' 2>/dev/null | \
head -n "$LIMIT" | \
sed 's/^\s*//' > "../../$OUTPUT_DIR/bugspots-${repo_name}-top.log"

Expand All @@ -139,4 +182,4 @@ cd - > /dev/null
# Clean up temporary directories
rm -rf "$WORKDIR"

echo "🏁 Bugspots Comment Analyzer completed at $(date '+%Y-%m-%d %H:%M:%S %Z')"
echo "🏁 Bugspots Comment Analyzer completed at $(date '+%Y-%m-%d %H:%M:%S %Z')"
Loading