Skip to content

fix: do not break subsequent exclamation points or question marks in nlp/sentencize #15546

fix: do not break subsequent exclamation points or question marks in nlp/sentencize

fix: do not break subsequent exclamation points or question marks in nlp/sentencize #15546

#/
# @license Apache-2.0
#
# Copyright (c) 2023 The Stdlib Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#/
# Workflow name:
name: run_tests_coverage
# Workflow triggers:
on:
push:
branches:
- develop
paths:
# List paths for which changes should trigger this workflow:
- 'lib/**/bin/**'
- 'lib/**/data/**'
- 'lib/**/etc/**'
- 'lib/**/include/**'
- 'lib/**/lib/**'
- 'lib/**/src/**'
- 'lib/**/test/**'
- 'lib/**/*.gyp'
- 'lib/**/*.gypi'
- 'lib/**/manifest.json'
- 'package.json'
# List paths for which changes should *not* trigger this workflow:
- '!lib/**/_tools/**'
pull_request_target:
types:
- opened
- synchronize
- reopened
paths:
# List paths for which changes should trigger this workflow:
- 'lib/**/bin/**'
- 'lib/**/data/**'
- 'lib/**/etc/**'
- 'lib/**/include/**'
- 'lib/**/lib/**'
- 'lib/**/src/**'
- 'lib/**/test/**'
- 'lib/**/*.gyp'
- 'lib/**/*.gypi'
- 'lib/**/manifest.json'
- 'package.json'
# List paths for which changes should *not* trigger this workflow:
- '!lib/**/_tools/**'
workflow_dispatch:
inputs:
directories:
description: 'List of changed directories for which to run tests (space separated):'
# Global permissions:
permissions:
# Allow read-only access to the repository contents:
contents: read
# Workflow jobs:
jobs:
# Define a job for calculating test coverage for changed files...
coverage:
# Define a display name:
name: 'Calculate test coverage for packages'
# Define the type of virtual host machine:
runs-on: ubuntu-latest
# Define the sequence of job steps...
steps:
# Checkout the repository:
- name: 'Checkout repository'
# Pin action to full length commit SHA
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
# Specify whether to remove untracked files before checking out the repository:
clean: true
# Limit clone depth to the last 1000 commits:
fetch-depth: 1000
# Specify whether to download Git-LFS files:
lfs: false
# Checkout PR branch if triggered by a pull request:
ref: ${{ github.event_name == 'pull_request_target' && github.event.pull_request.head.sha || '' }}
# Checkout PR repository (to properly work with forks) if triggered by a pull request:
repository: ${{ github.event_name == 'pull_request_target' && github.event.pull_request.head.repo.full_name || github.repository }}
timeout-minutes: 10
# Install Node.js:
- name: 'Install Node.js'
# Pin action to full length commit SHA
uses: actions/setup-node@1d0ff469b7ec7b3cb9d8673fde0c81c44821de2a # v4.2.0
with:
node-version: 20 # 'lts/*'
timeout-minutes: 5
# Cache dependencies:
- name: 'Cache dependencies'
# Pin action to full length commit SHA
uses: actions/cache@1bd1e32a3bdc45362d1e726936510720a7c30a57 # v4.2.0
id: cache
with:
path: |
${{ github.workspace }}/node_modules
key: ${{ runner.os }}-node-${{ hashFiles('**/package.json') }}
restore-keys: |
${{ runner.os }}-node-
# Install dependencies (accounting for possible network failures, etc, when installing node module dependencies):
- name: 'Install dependencies'
if: steps.cache.outputs.cache-hit != 'true'
run: |
make install-node-modules || make install-node-modules || make install-node-modules
timeout-minutes: 15
# Initialize development environment:
- name: 'Initialize development environment'
run: |
make init
timeout-minutes: 5
# Get list of changed directories from PR and push events:
- name: 'Get list of changed directories'
if: github.event_name != 'workflow_dispatch'
id: changed-directories
continue-on-error: true
env:
STDLIB_BOT_GITHUB_TOKEN: ${{ secrets.STDLIB_BOT_FGPAT_REPO_READ }}
run: |
if [ -n "${{ github.event.pull_request.number }}" ]; then
# Get the list of changed files in pull request:
page=1
files=""
while true; do
changed_files=$(curl -s -H "Accept: application/vnd.github.v3+json" -H "Authorization: Bearer $STDLIB_BOT_GITHUB_TOKEN" "https://api.github.com/repos/stdlib-js/stdlib/pulls/${{ github.event.pull_request.number }}/files?page=$page&per_page=100" | jq -r '.[] | .filename')
if [ -z "$changed_files" ]; then
break
fi
files="$files $changed_files"
page=$((page+1))
done
files=$(echo "$files" | tr '\n' ' ' | sed 's/ $//')
else
# Get changed files by comparing the current commit to the commit before the push event or with its parent:
if [ "${{ github.event.before }}" == "0000000000000000000000000000000000000000" ]; then
files=$(git diff --diff-filter=AM --name-only HEAD~ ${{ github.event.after }})
else
files=$(git diff --diff-filter=AM --name-only ${{ github.event.before }} ${{ github.event.after }})
fi
fi
directories=$(for file in $files; do dirname $file; done | uniq | tr '\n' ' ' | sed 's/ $//')
echo "directories=${directories}" >> $GITHUB_OUTPUT
# Get list of changed directories from workflow dispatch event:
- name: 'Get list of changed directories (from user input)'
if: github.event_name == 'workflow_dispatch'
id: changed-directories-user-input
run: |
echo "directories=${{ github.event.inputs.directories }}" >> $GITHUB_OUTPUT
timeout-minutes: 5
# Exit early if non-package directories are changed:
- name: 'Exit early if non-package directories are changed'
id: check-changed
if: github.event_name != 'workflow_dispatch'
run: |
directories="${{ steps.changed-directories.outputs.directories }}"
skip="false"
for directory in $directories; do
if [[ "$directory" != "lib/node_modules/@stdlib"* ]]; then
echo "Warning: changed directory '$directory' is not in 'lib/node_modules/@stdlib'. Skipping the rest of the workflow."
skip="true"
break
fi
done
echo "skip=${skip}" >> $GITHUB_OUTPUT
# Run JavaScript tests:
- name: 'Run JavaScript tests'
id: extract-coverage
if: steps.check-changed.outputs.skip == 'false'
env:
GITHUB_REPO: ${{ github.repository }}
GITHUB_REF: ${{ github.ref }}
run: |
if [ "${{ github.event_name }}" == "workflow_dispatch" ]; then
directories="${{ steps.changed-directories-user-input.outputs.directories }}"
else
directories="${{ steps.changed-directories.outputs.directories }}"
fi
# Append suffix to coverage base URL for PRs:
if [ -n "${{ github.event.pull_request.number }}" ]; then
PR_NUMBER=${{ github.event.pull_request.number }}
export COVERAGE_BASE_URL="https://coverage.stdlib.io/pr-${PR_NUMBER}"
else
export COVERAGE_BASE_URL="https://coverage.stdlib.io"
fi
. "$GITHUB_WORKSPACE/.github/workflows/scripts/run_tests_coverage" "$directories"
timeout-minutes: 30
# Create final coverage report:
- name: 'Create final coverage report'
id: create-report
if: steps.check-changed.outputs.skip == 'false'
run: |
table="${{ steps.extract-coverage.outputs.table }}"
if [ -z "$table" ]; then
report="## Coverage Report\n\nNo coverage information available."
else
if [ "${{ github.event_name }}" == "pull_request_target" ]; then
compare_url="https://github.com/stdlib-js/stdlib/compare/${{ github.event.pull_request.base.sha }}...${{ github.event.pull_request.head.sha }}"
compare_txt="The above coverage report was generated for the [changes in this PR]($compare_url)."
elif [ "${{ github.event_name }}" == "push" ]; then
compare_url="https://github.com/stdlib-js/stdlib/compare/${{ github.event.before }}...${{ github.event.after }}"
compare_txt="The above coverage report was generated for the [changes in this push]($compare_url)."
else
compare_txt=""
fi
report="## Coverage Report\n\n${table}\n\n${compare_txt}"
fi
echo "report=$report" >> $GITHUB_OUTPUT
# Post report as comment to PR:
- name: 'Post report as comment to PR'
if: github.event_name == 'pull_request_target' && steps.check-changed.outputs.skip == 'false'
# Pin action to full length commit SHA
uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1
with:
github-token: ${{ secrets.STDLIB_BOT_PAT_REPO_WRITE }}
script: |
const { data: comments } = await github.rest.issues.listComments({
'issue_number': context.issue.number,
'owner': context.repo.owner,
'repo': context.repo.repo,
});
const botComment = comments.find( comment => comment.user.login === 'stdlib-bot' && comment.body.includes( '## Coverage Report' ) );
if ( botComment ) {
await github.rest.issues.updateComment({
'owner': context.repo.owner,
'repo': context.repo.repo,
'comment_id': botComment.id,
'body': `${{ steps.create-report.outputs.report }}`
});
} else {
await github.rest.issues.createComment({
'issue_number': context.issue.number,
'owner': context.repo.owner,
'repo': context.repo.repo,
'body': `${{ steps.create-report.outputs.report }}`
});
}
# Post report as comment to commit:
- name: 'Post report as comment to commit'
if: github.event_name == 'push' && steps.check-changed.outputs.skip == 'false'
# Pin action to full length commit SHA
uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1
with:
github-token: ${{ secrets.STDLIB_BOT_PAT_REPO_WRITE }}
script: |
github.rest.repos.createCommitComment({
'commit_sha': context.sha,
'owner': context.repo.owner,
'repo': context.repo.repo,
'body': '${{ steps.create-report.outputs.report }}'
})
# Checkout coverage repository:
- name: 'Checkout coverage repository'
if: steps.check-changed.outputs.skip == 'false'
# Pin action to full length commit SHA
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
# Code coverage repository:
repository: 'stdlib-js/www-test-code-coverage'
# File path to checkout to:
path: './www-test-code-coverage'
# Specify whether to remove untracked files before checking out the repository:
clean: false
# Limit clone depth to the most recent commit:
fetch-depth: 1
# Token for accessing the repository:
token: ${{ secrets.STDLIB_BOT_FGPAT_REPO_READ }}
# Avoid storing GitHub token in local Git configuration:
persist-credentials: false
# Checkout coverage repository branch:
- name: 'Checkout coverage repository branch'
if: steps.check-changed.outputs.skip == 'false'
run: |
cd ./www-test-code-coverage
if [[ "${{ github.event_name }}" == "pull_request_target" ]]; then
BRANCH_NAME="pr-${{ github.event.pull_request.number }}"
git fetch origin $BRANCH_NAME || true
git checkout $BRANCH_NAME || git checkout -b $BRANCH_NAME
# Remove all directories except .github and .git from branch:
find . -mindepth 1 -maxdepth 1 -type d -not -name '.github' -not -name '.git' -exec git rm -rf {} +
else
BRANCH_NAME="main"
fi
# Copy artifacts to the repository:
- name: 'Copy artifacts to the repository'
if: steps.check-changed.outputs.skip == 'false'
run: |
if [ -d "./artifacts" ]; then
cp -R ./artifacts/* ./www-test-code-coverage
# Get commit SHA and timestamp:
commit_sha=$(git rev-parse HEAD)
commit_timestamp=$(git show -s --format=%ci $commit_sha)
# Append coverage to ndjson files:
files=$(find ./artifacts -name 'index.html')
for file in $files; do
file=${file//artifacts/www-test-code-coverage}
coverage=$(echo -n '['; grep -oP "(?<=class='fraction'>)[0-9]+/[0-9]+" $file | awk -F/ '{ if ($2 != 0) print $1 "," $2 "," ($1/$2)*100; else print $1 "," $2 ",100" }' | tr '\n' ',' | sed 's/,$//'; echo -n ",\"$commit_sha\",\"$commit_timestamp\"]")
echo $coverage >> $(dirname $file)/coverage.ndjson
done
else
echo "The artifacts directory does not exist."
fi
# Import GPG key to sign commits:
- name: 'Import GPG key to sign commits'
if: steps.check-changed.outputs.skip == 'false'
# Pin action to full length commit SHA
uses: crazy-max/ghaction-import-gpg@cb9bde2e2525e640591a934b1fd28eef1dcaf5e5 # v6.2.0
with:
gpg_private_key: ${{ secrets.STDLIB_BOT_GPG_PRIVATE_KEY }}
passphrase: ${{ secrets.STDLIB_BOT_GPG_PASSPHRASE }}
git_user_signingkey: true
git_commit_gpgsign: true
# Commit and push changes:
- name: 'Commit and push changes'
if: steps.check-changed.outputs.skip == 'false'
env:
REPO_GITHUB_TOKEN: ${{ secrets.STDLIB_BOT_PAT_REPO_WRITE }}
USER_NAME: stdlib-bot
run: |
cd ./www-test-code-coverage
git config --local user.email "[email protected]"
git config --local user.name "stdlib-bot"
git add .
git commit -m "Update artifacts" || exit 0
git push "https://$USER_NAME:[email protected]/stdlib-js/www-test-code-coverage.git" $BRANCH_NAME