Bump js-yaml in /test-result-summary-client #58

Workflow file for this run

.github/workflows/glitchwitcher.yml at 06cf251

	name: GlitchWitcher - Bug Prediction Analysis

	on:
	issue_comment:
	types: [created]

	permissions:
	pull-requests: write
	issues: write
	contents: read

	jobs:
	glitch-witcher:
	runs-on: ubuntu-latest
	if: contains(github.event.comment.body, 'GlitchWitcher')

	steps:
	- name: Parse GlitchWitcher Command
	id: parse-command
	uses: actions/github-script@v6
	with:
	script: \|
	const body = context.payload.comment?.body ?? '';
	core.info(`Full comment: ${body}`);

	// Look for an explicit PR link in the comment
	const linkMatch = body.match(/https:\/\/github\.com\/[^/]+\/[^/]+\/pull\/\d+/);

	let prLink = null;
	if (linkMatch) {
	prLink = linkMatch[0];
	core.info(`PR link provided: ${prLink}`);
	} else {
	// Allow "GlitchWitcher" alone when the comment is on a PR
	const hasCmdOnly = /(^\|\s)GlitchWitcher\s*$/.test(body);
	if (hasCmdOnly && context.payload.issue?.pull_request) {
	const { owner, repo } = context.repo;
	const prNumber = context.issue.number;
	prLink = `https://github.com/${owner}/${repo}/pull/${prNumber}`;
	core.info(`Using current PR: ${prLink}`);
	} else {
	core.setFailed('ERROR: Invalid GlitchWitcher command format or missing PR link');
	return;
	}
	}

	// Extract repo owner/name/number from the PR link
	const m = prLink.match(/^https:\/\/github\.com\/([^/]+)\/([^/]+)\/pull\/(\d+)$/);
	if (!m) {
	core.setFailed(`ERROR: Could not parse repository info from PR link: ${prLink}`);
	return;
	}
	const [, repoOwner, repoName, prNumber] = m;
	const fullRepoName = `${repoOwner}-${repoName}`;
	const repoUrl = `https://github.com/${repoOwner}/${repoName}.git`;

	core.setOutput('repo_owner', repoOwner);
	core.setOutput('repo_name', repoName);
	core.setOutput('pr_number', prNumber);
	core.setOutput('full_repo_name', fullRepoName);
	core.setOutput('pr_link', prLink);
	core.setOutput('repo_url', repoUrl)

	- name: Set up Python
	uses: actions/setup-python@v4
	with:
	python-version: "3.10"

	- name: Install dependencies
	run: \|
	pip install tensorflow==2.12.0 pandas joblib scipy numpy urllib3 scikit-learn
	sudo apt-get update
	sudo apt-get install -y cloc git

	- name: Download scripts
	run: \|
	echo "Downloading scripts from aqa-test-tools repository..."
	curl -L -o ExtractTraditionalFeatures.sh "https://raw.githubusercontent.com/adoptium/aqa-triage-data/refs/heads/main/GlitchWitcher/Traditional%20Dataset/ExtractTraditionalFeatures.sh"
	curl -L -o extract_traditional_features.sh "https://raw.githubusercontent.com/adoptium/aqa-test-tools/refs/heads/master/BugPredict/GlitchWitcher/extract_traditional_features.sh"
	curl -L -o predict.py "https://raw.githubusercontent.com/adoptium/aqa-test-tools/refs/heads/master/BugPredict/GlitchWitcher/predict.py"
	curl -L -o save_trained_model.py "https://raw.githubusercontent.com/adoptium/aqa-test-tools/refs/heads/master/BugPredict/GlitchWitcher/save_trained_model.py"
	curl -L -o REPD_Impl.py "https://raw.githubusercontent.com/adoptium/aqa-test-tools/refs/heads/master/BugPredict/GlitchWitcher/REPD_Impl.py"
	curl -L -o autoencoder.py "https://raw.githubusercontent.com/adoptium/aqa-test-tools/refs/heads/master/BugPredict/GlitchWitcher/autoencoder.py"
	curl -L -o stat_util.py "https://raw.githubusercontent.com/adoptium/aqa-test-tools/refs/heads/master/BugPredict/GlitchWitcher/stat_util.py"
	chmod +x ExtractTraditionalFeatures.sh
	chmod +x extract_traditional_features.sh

	- name: Check Dataset Availability
	id: check-dataset
	run: \|
	dataset_url="https://raw.githubusercontent.com/adoptium/aqa-triage-data/refs/heads/main/GlitchWitcher/Traditional%20Dataset/${{ steps.parse-command.outputs.full_repo_name }}/${{ steps.parse-command.outputs.full_repo_name }}.csv"
	base_model_url="https://raw.githubusercontent.com/adoptium/aqa-triage-data/refs/heads/main/GlitchWitcher/Traditional%20Dataset/${{ steps.parse-command.outputs.full_repo_name }}/trained_model"
	echo "Checking dataset availability..."
	echo "Dataset URL: $dataset_url"
	echo "Model base URL: $base_model_url"

	dataset_exists="false"
	model_exists="false"

	# Check if dataset CSV exists
	if curl -sI --fail "$dataset_url" > /dev/null 2>&1; then
	echo "Dataset CSV found for ${{ steps.parse-command.outputs.full_repo_name }}"
	dataset_exists="true"
	else
	echo "Dataset CSV not found for ${{ steps.parse-command.outputs.full_repo_name }}"
	fi

	# Check if TF model artifacts exist
	if \
	curl -sI --fail "$base_model_url/metadata.json" > /dev/null 2>&1 && \
	curl -sI --fail "$base_model_url/classifier_params.json" > /dev/null 2>&1 && \
	curl -sI --fail "$base_model_url/autoencoder.index" > /dev/null 2>&1 && \
	curl -sI --fail "$base_model_url/autoencoder.meta" > /dev/null 2>&1 && \
	curl -sI --fail "$base_model_url/autoencoder.data-00000-of-00001" > /dev/null 2>&1 && \
	curl -sI --fail "$base_model_url/checkpoint" > /dev/null 2>&1
	then
	echo "TensorFlow model artifacts found"
	model_exists="true"
	else
	echo "TensorFlow model artifacts not found or incomplete"
	fi

	echo "dataset_exists=$dataset_exists" >> $GITHUB_OUTPUT
	echo "model_exists=$model_exists" >> $GITHUB_OUTPUT
	echo "dataset_url=$dataset_url" >> $GITHUB_OUTPUT
	echo "base_model_url=$base_model_url" >> $GITHUB_OUTPUT

	- name: Generate Dataset if Missing
	if: steps.check-dataset.outputs.dataset_exists == 'false'
	run: \|
	echo "Generating dataset for ${{ steps.parse-command.outputs.full_repo_name }}..."
	./ExtractTraditionalFeatures.sh "${{ steps.parse-command.outputs.repo_url }}"

	# Find the generated CSV file
	csv_file=$(find . -name ".csv" -path "./metrics_output_" \| head -1)
	if [ ! -f "$csv_file" ]; then
	echo "ERROR: Failed to generate CSV file"
	exit 1
	fi

	echo "Generated CSV file: $csv_file"
	echo "csv_file_path=$csv_file" >> $GITHUB_ENV

	- name: Train Model if Missing
	if: steps.check-dataset.outputs.dataset_exists == 'false' \|\| steps.check-dataset.outputs.model_exists == 'false'
	run: \|
	echo "Training model for ${{ steps.parse-command.outputs.full_repo_name }}..."

	# Determine dataset path
	if [ "${{ steps.check-dataset.outputs.dataset_exists }}" == "true" ]; then
	dataset_path="${{ steps.check-dataset.outputs.dataset_url }}"
	else
	dataset_path="$csv_file_path"
	fi

	echo "Using dataset: $dataset_path"
	python3 save_trained_model.py "$dataset_path"

	if [ ! -d "trained_model" ]; then
	echo "ERROR: Failed to generate trained model"
	exit 1
	fi

	echo "Model training completed successfully"

	- name: Checkout adoptium/aqa-triage-data
	if: steps.check-dataset.outputs.dataset_exists == 'false' \|\| steps.check-dataset.outputs.model_exists == 'false'
	uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # v4.3.0
	with:
	repository: adoptium/aqa-triage-data
	path: aqa-triage-data
	ref: main

	- name: Prepare PR content
	id: prep-pr
	if: steps.check-dataset.outputs.dataset_exists == 'false' \|\| steps.check-dataset.outputs.model_exists == 'false'
	run: \|
	echo "Preparing PR contents for adoptium/aqa-triage-data..."

	# Compute variables
	branch_name="add-dataset-${{ steps.parse-command.outputs.full_repo_name }}-$(date +%Y%m%d-%H%M%S)"
	target_dir="GlitchWitcher/Traditional Dataset/${{ steps.parse-command.outputs.full_repo_name }}"

	# Create target directory and copy artifacts
	mkdir -p "aqa-triage-data/$target_dir"

	if [ "${{ steps.check-dataset.outputs.dataset_exists }}" == "false" ]; then
	echo "Copying CSV file..."
	cp "$csv_file_path" "aqa-triage-data/$target_dir/${{ steps.parse-command.outputs.full_repo_name }}.csv"
	fi

	if [ "${{ steps.check-dataset.outputs.model_exists }}" == "false" ]; then
	echo "Copying trained model..."
	cp -r "trained_model" "aqa-triage-data/$target_dir/"
	fi

	# Create PR body in target repo workspace
	cat > "aqa-triage-data/pr_body.md" << 'EOF'
	# GlitchWitcher Dataset Addition

	This PR adds the dataset and trained model for repository: ${{ steps.parse-command.outputs.repo_owner }}/${{ steps.parse-command.outputs.repo_name }}

	## Contents:
	- Dataset CSV file: `${{ steps.parse-command.outputs.full_repo_name }}.csv`
	- Trained model directory: `trained_model/`

	## Triggered by:
	- Comment in: ${{ github.event.issue.html_url }}
	- Target PR: ${{ steps.parse-command.outputs.pr_link }}

	This PR was automatically generated by the GlitchWitcher workflow.
	EOF

	# Outputs for subsequent steps
	echo "branch_name=$branch_name" >> "$GITHUB_OUTPUT"
	echo "target_dir=$target_dir" >> "$GITHUB_OUTPUT"
	echo "pr_body_path=aqa-triage-data/pr_body.md" >> "$GITHUB_OUTPUT"

	- name: Create Pull Request to aqa-triage-data
	id: cpr
	if: steps.check-dataset.outputs.dataset_exists == 'false' \|\| steps.check-dataset.outputs.model_exists == 'false'
	uses: peter-evans/create-pull-request@271a8d0340265f705b14b6d32b9829c1cb33d45e # v7.0.8
	with:
	token: ${{ secrets.ADOPTIUM_AQAVIT_BOT_TOKEN }}
	path: aqa-triage-data
	commit-message: "Add dataset and trained model for ${{ steps.parse-command.outputs.full_repo_name }}"
	committer: GlitchWitcher Bot <glitchwicher-bot@adoptium.net>
	author: GlitchWitcher Bot <glitchwicher-bot@adoptium.net>
	title: "Add GlitchWitcher dataset for ${{ steps.parse-command.outputs.full_repo_name }}"
	body-path: ${{ steps.prep-pr.outputs.pr_body_path }}
	branch: ${{ steps.prep-pr.outputs.branch_name }}
	base: main
	draft: false
	add-paths: \|
	${{ steps.prep-pr.outputs.target_dir }}/**

	- name: Merge PR (same behavior as before)
	if: steps.cpr.outputs.pull-request-number != ''
	run: \|
	PR_NUMBER="${{ steps.cpr.outputs.pull-request-number }}"
	PR_URL="${{ steps.cpr.outputs.pull-request-url }}"
	echo "✅ Successfully created PR #$PR_NUMBER"
	echo "PR URL: $PR_URL"

	echo "Enabling auto-merge for PR #$PR_NUMBER..."
	MERGE_RESPONSE=$(curl -sS -X PUT \
	-H "Authorization: token ${{ secrets.ADOPTIUM_AQAVIT_BOT_TOKEN }}" \
	-H "Accept: application/vnd.github.v3+json" \
	"https://api.github.com/repos/adoptium/aqa-triage-data/pulls/$PR_NUMBER/merge" \
	-d "{
	\"commit_title\": \"Auto-merge: Add GlitchWitcher dataset for ${{ steps.parse-command.outputs.full_repo_name }}\",
	\"commit_message\": \"Automatically merged by GlitchWitcher workflow\",
	\"merge_method\": \"squash\"
	}")
	echo "Auto-merge response: $MERGE_RESPONSE"

	- name: Run Analysis on Target PR
	id: analysis
	run: \|
	set -e
	echo "Running GlitchWitcher analysis on ${{ steps.parse-command.outputs.pr_link }}..."

	# Get PR details using GitHub API
	pr_api_url="https://api.github.com/repos/${{ steps.parse-command.outputs.repo_owner }}/${{ steps.parse-command.outputs.repo_name }}/pulls/${{ steps.parse-command.outputs.pr_number }}"
	pr_info=$(curl -s -H "Accept: application/vnd.github.v3+json" "$pr_api_url")

	# Parse base/head SHAs using Python (more robust than grep)
	base_sha=$(printf "%s" "$pr_info" \| python3 -c "import sys, json; d=json.load(sys.stdin); print(d['base']['sha'])")
	head_sha=$(printf "%s" "$pr_info" \| python3 -c "import sys, json; d=json.load(sys.stdin); print(d['head']['sha'])")

	echo "Base SHA: $base_sha"
	echo "Head SHA: $head_sha"

	# Clone the target repository
	git clone "${{ steps.parse-command.outputs.repo_url }}" target_repo

	# Ensure we have the PR head commit (works for forks)
	git -C target_repo fetch origin "pull/${{ steps.parse-command.outputs.pr_number }}/head:prhead" \|\| true
	git -C target_repo fetch --all --tags --prune

	# Get changed files between base..head (C/C++ only)
	merge_base=$(git -C target_repo merge-base "$base_sha" "$head_sha")
	echo "Merge base: $merge_base"
	changed_files=$(git -C target_repo diff --name-only "$merge_base" "$head_sha" \| grep -E "\.(c\|cpp\|cxx\|cc\|h\|hpp\|hxx)$" \|\| true)

	if [ -z "$changed_files" ]; then
	echo "No C/C++ files changed in this PR"
	echo "comment=No C/C++ files found in the PR changes." >> $GITHUB_OUTPUT
	exit 0
	fi

	echo "Changed files:"
	printf "%s\n" "$changed_files"

	# Extract features for base commit
	git -C target_repo checkout "$base_sha"
	mkdir -p metrics_output_base
	echo "File,loc,v(g),ev(g),iv(g),n,v,l,d,i,e,b,t,lOComment,lOBlank,LOCodeAndComment,uniq_Op,Uniq_Opnd,total_Op,total_Opnd,branchCount" > metrics_output_base/summary_metrics.csv

	for file in $changed_files; do
	fpath="target_repo/$file"
	if [ -f "$fpath" ]; then
	echo "Processing $file (base)..."
	./extract_traditional_features.sh "$fpath"
	generated_dir=$(ls -td metrics_output_* 2>/dev/null \| head -n 1 \|\| true)
	if [ -n "$generated_dir" ] && [ -d "$generated_dir" ] && [ -f "$generated_dir/summary_metrics.csv" ]; then
	tail -n +2 "$generated_dir/summary_metrics.csv" >> metrics_output_base/summary_metrics.csv
	rm -rf "$generated_dir"
	else
	echo "Warning: No metrics generated for $file (base)."
	fi
	else
	echo "Warning: File not found at $fpath (base)."
	fi
	done

	# Extract features for head commit
	git -C target_repo checkout "$head_sha"
	mkdir -p metrics_output_head
	echo "File,loc,v(g),ev(g),iv(g),n,v,l,d,i,e,b,t,lOComment,lOBlank,LOCodeAndComment,uniq_Op,Uniq_Opnd,total_Op,total_Opnd,branchCount" > metrics_output_head/summary_metrics.csv

	for file in $changed_files; do
	fpath="target_repo/$file"
	if [ -f "$fpath" ]; then
	echo "Processing $file (head)..."
	./extract_traditional_features.sh "$fpath"
	generated_dir=$(ls -td metrics_output_* 2>/dev/null \| head -n 1 \|\| true)
	if [ -n "$generated_dir" ] && [ -d "$generated_dir" ] && [ -f "$generated_dir/summary_metrics.csv" ]; then
	tail -n +2 "$generated_dir/summary_metrics.csv" >> metrics_output_head/summary_metrics.csv
	rm -rf "$generated_dir"
	else
	echo "Warning: No metrics generated for $file (head)."
	fi
	else
	echo "Warning: File not found at $fpath (head)."
	fi
	done

	echo "=== Row counts ==="
	echo "Base rows: $(wc -l < metrics_output_base/summary_metrics.csv \|\| echo 0)"
	echo "Head rows: $(wc -l < metrics_output_head/summary_metrics.csv \|\| echo 0)"

	# Prepare trained model: Prefer remote artifacts; otherwise use locally trained model
	have_model="false"
	if [ "${{ steps.check-dataset.outputs.model_exists }}" == "true" ]; then
	echo "Preparing TensorFlow model artifacts from remote..."
	mkdir -p trained_model
	base_url="${{ steps.check-dataset.outputs.base_model_url }}"
	curl -fSL -o trained_model/metadata.json "$base_url/metadata.json"
	curl -fSL -o trained_model/classifier_params.json "$base_url/classifier_params.json"
	curl -fSL -o trained_model/autoencoder.index "$base_url/autoencoder.index"
	curl -fSL -o trained_model/autoencoder.meta "$base_url/autoencoder.meta"
	curl -fSL -o trained_model/autoencoder.data-00000-of-00001 "$base_url/autoencoder.data-00000-of-00001"
	curl -fSL -o trained_model/checkpoint "$base_url/checkpoint"
	have_model="true"
	elif [ -d "trained_model" ]; then
	echo "Using locally trained model artifacts..."
	have_model="true"
	fi

	if [ "$have_model" != "true" ]; then
	echo "comment=No model found for predictions." >> $GITHUB_OUTPUT
	exit 0
	fi

	# Create the comparison script
	cat > compare_predictions.py << 'EOF'
	import sys, os
	from predict import predict, format_results_for_comparison

	base_results = predict('metrics_output_base/summary_metrics.csv')
	head_results = predict('metrics_output_head/summary_metrics.csv')

	file_names = [os.path.basename(str(r['file']).replace('\\','/')) for r in base_results if 'file' in r]
	base_data = [{'p_defective': r['p_defective'], 'p_non_defective': r['p_non_defective']} for r in base_results if 'p_defective' in r]
	head_data = [{'p_defective': r['p_defective'], 'p_non_defective': r['p_non_defective']} for r in head_results if 'p_defective' in r]

	comparison_output = format_results_for_comparison(file_names, base_data, head_data)
	print(comparison_output)
	EOF

	echo "Running comparison predictions..."
	if [ -f "metrics_output_base/summary_metrics.csv" ] && [ -f "metrics_output_head/summary_metrics.csv" ]; then
	comparison_result=$(python3 compare_predictions.py)
	if [ -n "$comparison_result" ]; then
	{
	echo "comment<<EOF"
	echo "$comparison_result"
	echo ""
	echo "### 📋 Interpretation Note:"
	echo "> The values shown are Probability Densities (PDFs), not probabilities. They represent the model's assessment of how likely a file's characteristics are to be 'defective' vs. 'non-defective'. A higher value indicates a better fit for that category. Very small values are expected and normal."
	echo ""
	echo "EOF"
	} >> $GITHUB_OUTPUT
	else
	echo "comment=Comparison prediction produced no output." >> $GITHUB_OUTPUT
	fi
	else
	echo "comment=Missing base or head metrics files for comparison." >> $GITHUB_OUTPUT
	fi

	- name: Comment on PR
	if: steps.analysis.outputs.comment != ''
	uses: actions/github-script@v6
	env:
	COMMENT_BODY: "${{ steps.analysis.outputs.comment }}"
	PR_LINK: "${{ steps.parse-command.outputs.pr_link }}"
	REPO_NAME: "${{ steps.parse-command.outputs.full_repo_name }}"
	with:
	github-token: ${{ secrets.GITHUB_TOKEN }}
	script: \|
	const commentBody = `## 🔮 GlitchWitcher Analysis Results

	Target PR: ${process.env.PR_LINK}
	Repository: ${process.env.REPO_NAME}

	${process.env.COMMENT_BODY}

	Analysis performed by GlitchWitcher Bot`;

	github.rest.issues.createComment({
	issue_number: context.issue.number,
	owner: context.repo.owner,
	repo: context.repo.repo,
	body: commentBody
	});

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Bump js-yaml in /test-result-summary-client #58

Workflow file

Bump js-yaml in /test-result-summary-client #58

Uh oh!

Workflow file for this run