diff --git a/.codespellrc b/.codespellrc new file mode 100644 index 0000000..b44a9e5 --- /dev/null +++ b/.codespellrc @@ -0,0 +1,6 @@ +[codespell] +# Ref: https://github.com/codespell-project/codespell#using-a-config-file +skip = .git*,.codespellrc,./examples/split_process/input.txt +check-hidden = true +# ignore-regex = +ignore-words-list = checkin diff --git a/.github/workflows/codespell.yml b/.github/workflows/codespell.yml new file mode 100644 index 0000000..c59e047 --- /dev/null +++ b/.github/workflows/codespell.yml @@ -0,0 +1,25 @@ +# Codespell configuration is within .codespellrc +--- +name: Codespell + +on: + push: + branches: [main] + pull_request: + branches: [main] + +permissions: + contents: read + +jobs: + codespell: + name: Check for spelling errors + runs-on: ubuntu-latest + + steps: + - name: Checkout + uses: actions/checkout@v4 + - name: Annotate locations with typos + uses: codespell-project/codespell-problem-matcher@v1 + - name: Codespell + uses: codespell-project/actions-codespell@v2 diff --git a/README.md b/README.md index a0f9c2e..8973c6d 100644 --- a/README.md +++ b/README.md @@ -45,7 +45,7 @@ To deactivate the virtual environment in your shell, run the command: deactivate Alternatively, a set of convenience scripts are provided that activate the -virutalenv before calling `dsub`, `dstat`, and `ddel`. They are in the +virtualenv before calling `dsub`, `dstat`, and `ddel`. They are in the [bin](https://github.com/DataBiosphere/dsub/tree/main/bin) directory. You can use these scripts if you don't want to activate the virtualenv explicitly in your shell. @@ -472,7 +472,7 @@ using the environment variable. Please read and [Semantics](https://github.com/GoogleCloudPlatform/gcsfuse/blob/master/docs/semantics.md) before using Cloud Storage FUSE. -##### Mounting an existing peristent disk +##### Mounting an existing persistent disk To have the `google-cls-v2` or `google-batch` provider mount a persistent disk that you have pre-created and populated, use the `--mount` command line flag and the diff --git a/docs/compute_quotas.md b/docs/compute_quotas.md index 7e013fa..5ba8f32 100644 --- a/docs/compute_quotas.md +++ b/docs/compute_quotas.md @@ -16,7 +16,7 @@ jobs. When you submit a `dsub` job using one of the Google providers, the single implicit task (for jobs that do not use a `--tasks` file) or the set of tasks -submited (for jobs that do use a `--tasks` file) are submitted to the +submitted (for jobs that do use a `--tasks` file) are submitted to the [Cloud Life Sciences pipelines.run() API](https://cloud.google.com/life-sciences/docs/reference/rest/v2beta/projects.locations.pipelines/run). The API maintains a queue of [operations](https://cloud.google.com/life-sciences/docs/reference/rest/v2beta/projects.locations.operations) @@ -39,7 +39,7 @@ If the lack of sufficient quota is not transient (the VM requires more resources than your quota maximum), then the Life Sciences API will mark the operation as failed and provide an informative message. -## Handling insufficent quota +## Handling insufficient quota When you have insufficient quota to run your job tasks, you have a few options: diff --git a/docs/providers/README.md b/docs/providers/README.md index 0d55b5d..18beee6 100644 --- a/docs/providers/README.md +++ b/docs/providers/README.md @@ -31,7 +31,7 @@ documentation. ### Environment variables point to where to write `--output` files -When you write your commands that run in your Docker container, you shoud +When you write your commands that run in your Docker container, you should always write output files to the locations specified by the environment variables that are set for them. You may observe that providers consistently expect output files to be @@ -198,7 +198,7 @@ The `local` provider does not support resource-related flags such as The `google-cls-v2` and `google-batch` providers share a significant amount of their implementation. The `google-cls-v2` provider utilizes the Google Cloud Life Sciences -Piplines API [v2beta](https://cloud.google.com/life-sciences/docs/apis) +Pipelines API [v2beta](https://cloud.google.com/life-sciences/docs/apis) while the `google-batch` provider utilizes the Google Cloud [Batch API](https://cloud.google.com/batch/docs/reference/rest) to queue a request for the following sequence of events: diff --git a/dsub/commands/ddel.py b/dsub/commands/ddel.py index 748e91f..551e9dd 100755 --- a/dsub/commands/ddel.py +++ b/dsub/commands/ddel.py @@ -113,7 +113,7 @@ def _emit_search_criteria(user_ids, job_ids, task_ids, labels): if task_ids: print(' task-id:') print(' %s\n' % task_ids) - # Labels are in a LabelParam namedtuple and must be reformated for printing. + # Labels are in a LabelParam namedtuple and must be reformatted for printing. if labels: print(' labels:') print(' %s\n' % repr(labels)) diff --git a/dsub/commands/dsub.py b/dsub/commands/dsub.py index 73d1973..961beef 100644 --- a/dsub/commands/dsub.py +++ b/dsub/commands/dsub.py @@ -693,7 +693,7 @@ def _generate_unique_job_id() -> str: """Generates a unique job identifier. Uses uuid4() to generate a Universally Unique IDentifier and performs a - small transformation to accomodate the Google Batch API. + small transformation to accommodate the Google Batch API. Google Batch requires a client-provided job identifier and requires that the first character be a non-digit. diff --git a/dsub/lib/param_util.py b/dsub/lib/param_util.py index c5b9dba..8557982 100644 --- a/dsub/lib/param_util.py +++ b/dsub/lib/param_util.py @@ -32,7 +32,7 @@ class ListParamAction(argparse.Action): """Append each value as a separate element to the parser destination. - This class satisifes the action interface of argparse.ArgumentParser and + This class satisfies the action interface of argparse.ArgumentParser and refines the 'append' action for arguments with `nargs='*'`. For the parameters: diff --git a/dsub/lib/providers_util.py b/dsub/lib/providers_util.py index 5700ea5..8bd7268 100644 --- a/dsub/lib/providers_util.py +++ b/dsub/lib/providers_util.py @@ -29,7 +29,7 @@ # Requirements can be found in the docs/providers/README.md. # # This module defines some utility names and functions such that new providers -# can follow the patterns of exising providers. +# can follow the patterns of existing providers. # # Unless providers have a compelling reason not to, they should just provide # a single disk for everything that needs to be written by the dsub diff --git a/dsub/providers/DEVELOPERS.md b/dsub/providers/DEVELOPERS.md index ca01b06..9e3b731 100644 --- a/dsub/providers/DEVELOPERS.md +++ b/dsub/providers/DEVELOPERS.md @@ -35,7 +35,7 @@ including: - The folder for inputs is expected to be writeable. A historical pattern for some scripts has been to use the directory where inputs are as a scratch - working diretory. If your provider must make the input directories read-only + working directory. If your provider must make the input directories read-only it may limit portability of existing scripts. - The environment variable `TMPDIR` should be set explicitly to a directory diff --git a/dsub/providers/google_batch.py b/dsub/providers/google_batch.py index 654539b..f983035 100644 --- a/dsub/providers/google_batch.py +++ b/dsub/providers/google_batch.py @@ -462,7 +462,7 @@ def _format_batch_job_id(self, task_metadata, job_metadata) -> str: # append the dsub task-id and task-attempt to the job-id for the # batch job ID. # For single-task dsub jobs, there is no task-id, so use 0. - # Use a '-' character as the delimeter because Batch API job ID + # Use a '-' character as the delimiter because Batch API job ID # must match regex ^[a-z]([a-z0-9-]{0,61}[a-z0-9])?$ task_id = task_metadata.get('task-id') or 0 task_attempt = task_metadata.get('task-attempt') or 0 diff --git a/dsub/providers/google_v2_base.py b/dsub/providers/google_v2_base.py index 4147653..5b21733 100644 --- a/dsub/providers/google_v2_base.py +++ b/dsub/providers/google_v2_base.py @@ -145,7 +145,7 @@ class GoogleV2EventMap(object): - """Helper for extracing a set of normalized, filtered operation events.""" + """Helper for extracting a set of normalized, filtered operation events.""" def __init__(self, op): self._op = op @@ -271,7 +271,7 @@ def _pipelines_run_api(self, request): raise NotImplementedError('Derived class must implement this function') def _operations_list_api(self, ops_filter, page_token, page_size): - """Executes the provider-specific operaitons.list() API.""" + """Executes the provider-specific operations.list() API.""" raise NotImplementedError('Derived class must implement this function') def _operations_cancel_api_def(self): @@ -797,7 +797,7 @@ def lookup_job_tasks(self, create_time_max: a timezone-aware datetime value for the most recent create time of a task, inclusive. max_tasks: the maximum number of job tasks to return or 0 for no limit. - page_size: the page size to use for each query to the pipelins API. + page_size: the page size to use for each query to the pipelines API. Raises: ValueError: if both a job id list and a job name list are provided @@ -1027,7 +1027,7 @@ def error_message(self): """Returns an error message if the operation failed for any reason. Failure as defined here means ended for any reason other than 'success'. - This means that a successful cancelation will also return an error message. + This means that a successful cancellation will also return an error message. Returns: string, string will be empty if job did not error. diff --git a/dsub/providers/google_v2_operations.py b/dsub/providers/google_v2_operations.py index 66fa2ae..a68b187 100644 --- a/dsub/providers/google_v2_operations.py +++ b/dsub/providers/google_v2_operations.py @@ -138,7 +138,7 @@ def get_last_event(op): def external_network_blocked(op): - """Retun True if the blockExternalNetwork flag is set for the user action.""" + """Return True if the blockExternalNetwork flag is set for the user action.""" user_action = get_action_by_name(op, 'user-command') if user_action: if _API_VERSION == google_v2_versions.V2BETA: @@ -149,7 +149,7 @@ def external_network_blocked(op): def is_unexpected_exit_status_event(e): - """Retun True if the event is for an unexpected exit status.""" + """Return True if the event is for an unexpected exit status.""" if _API_VERSION == google_v2_versions.V2BETA: return 'unexpectedExitStatus' in e @@ -159,7 +159,7 @@ def is_unexpected_exit_status_event(e): def is_failed_event(e): - """Retun True if the event is an operation failed event.""" + """Return True if the event is an operation failed event.""" if _API_VERSION == google_v2_versions.V2BETA: return 'failed' in e @@ -169,7 +169,7 @@ def is_failed_event(e): def is_container_stopped_event(e): - """Retun True if the event is a container stopped event.""" + """Return True if the event is a container stopped event.""" if _API_VERSION == google_v2_versions.V2BETA: return 'containerStopped' in e diff --git a/examples/custom_scripts/README.md b/examples/custom_scripts/README.md index fd1d965..eeb91d9 100644 --- a/examples/custom_scripts/README.md +++ b/examples/custom_scripts/README.md @@ -32,7 +32,7 @@ All of the source VCF files are stored in a public bucket at ## Setup -* Follow the [dsub geting started](../../README.md#getting-started) +* Follow the [dsub getting started](../../README.md#getting-started) instructions. ## Process one file with a Bash shell script diff --git a/examples/decompress/README.md b/examples/decompress/README.md index 7fa2f81..abccb12 100644 --- a/examples/decompress/README.md +++ b/examples/decompress/README.md @@ -18,7 +18,7 @@ All of the source VCF files are stored in a public bucket at ## Setup -* Follow the [dsub geting started](../../README.md#getting-started) +* Follow the [dsub getting started](../../README.md#getting-started) instructions. ## Decompress one file @@ -85,7 +85,7 @@ Output should look like: ``` ##fileformat=VCFv4.1 ##FILTER= -##FILTER= +##FILTER= ##FORMAT= ##FORMAT= ``` diff --git a/examples/fastqc/README.md b/examples/fastqc/README.md index ef02fb7..a558104 100644 --- a/examples/fastqc/README.md +++ b/examples/fastqc/README.md @@ -21,7 +21,7 @@ All of the source BAM files are stored in a public bucket at ## Setup -* Follow the [dsub geting started](../../README.md#getting-started) +* Follow the [dsub getting started](../../README.md#getting-started) instructions. * (Optional) [Enable](https://console.cloud.google.com/flows/enableapi?apiid=cloudbuild.googleapis.com) diff --git a/examples/samtools/README.md b/examples/samtools/README.md index 5f968ad..681d9d5 100644 --- a/examples/samtools/README.md +++ b/examples/samtools/README.md @@ -19,7 +19,7 @@ All of the source BAM files are stored in a public bucket at ## Setup -* Follow the [dsub geting started](../../README.md#getting-started) +* Follow the [dsub getting started](../../README.md#getting-started) instructions. ## Index one BAM file diff --git a/examples/split_process/README.md b/examples/split_process/README.md index ad71d35..03c5e66 100644 --- a/examples/split_process/README.md +++ b/examples/split_process/README.md @@ -17,7 +17,7 @@ run on Google Cloud with minimal change (delete the --provider line). ## Setup -* Follow the [dsub geting started](../../README.md#getting-started) +* Follow the [dsub getting started](../../README.md#getting-started) instructions. Since this script uses the `local` backend provider, you will need diff --git a/examples/split_process/input.txt b/examples/split_process/input.txt index fee181e..91dad74 100644 --- a/examples/split_process/input.txt +++ b/examples/split_process/input.txt @@ -9,9 +9,9 @@ l'homme comme l'idéal commun à atteindre par tous les peuples et toutes les nations afin que tous les individus et tous les organes de la société, ayant cette Déclaration constamment à l'esprit, s'efforcent, par l'enseignement et l'éducation, de développer le respect de ces droits et libertés et d'en assurer, -par des mesures progressives d'ordre national et international, la +par des measures progressives d'ordre national et international, la reconnaissance et l'application universelles et effectives, tant parmi les -populations des Etats Membres eux-mêmes que parmi celles des territoires placés +populations des Etats Membres eux-mêmes que parmi cells des territories placés sous leur juridiction. Article premier @@ -35,4 +35,4 @@ autonome ou soumis à une limitation quelconque de souveraineté. Article 3 -Tout individu a droit à la vie, à la liberté et à la sûreté de sa personne. +Tout individu a droit à la via, à la liberté et à la sûreté de sa personne. diff --git a/test/integration/e2e_after.py b/test/integration/e2e_after.py index 719ede7..5a67c8d 100644 --- a/test/integration/e2e_after.py +++ b/test/integration/e2e_after.py @@ -22,7 +22,7 @@ import sys # Because this may be invoked from another directory (treated as a library) or -# invoked localy (treated as a binary) both import styles need to be supported. +# invoked locally (treated as a binary) both import styles need to be supported. # pylint: disable=g-import-not-at-top try: from . import test_setup_e2e as test diff --git a/test/integration/e2e_after_fail.py b/test/integration/e2e_after_fail.py index 9963496..9fe3d9b 100644 --- a/test/integration/e2e_after_fail.py +++ b/test/integration/e2e_after_fail.py @@ -24,7 +24,7 @@ from dsub.lib import dsub_errors # Because this may be invoked from another directory (treated as a library) or -# invoked localy (treated as a binary) both import styles need to be supported. +# invoked locally (treated as a binary) both import styles need to be supported. # pylint: disable=g-import-not-at-top try: from . import test_setup_e2e as test diff --git a/test/integration/e2e_env_list.py b/test/integration/e2e_env_list.py index 3099526..774937a 100644 --- a/test/integration/e2e_env_list.py +++ b/test/integration/e2e_env_list.py @@ -22,7 +22,7 @@ import sys # Because this may be invoked from another directory (treated as a library) or -# invoked localy (treated as a binary) both import styles need to be supported. +# invoked locally (treated as a binary) both import styles need to be supported. # pylint: disable=g-import-not-at-top try: from . import test_setup_e2e as test diff --git a/test/integration/e2e_io_tasks.py b/test/integration/e2e_io_tasks.py index 85e422b..efe3fba 100644 --- a/test/integration/e2e_io_tasks.py +++ b/test/integration/e2e_io_tasks.py @@ -23,7 +23,7 @@ import sys # Because this may be invoked from another directory (treated as a library) or -# invoked localy (treated as a binary) both import styles need to be supported. +# invoked locally (treated as a binary) both import styles need to be supported. # pylint: disable=g-import-not-at-top try: from . import test_setup_e2e as test diff --git a/test/integration/e2e_python_api.py b/test/integration/e2e_python_api.py index 63841ea..00ce531 100644 --- a/test/integration/e2e_python_api.py +++ b/test/integration/e2e_python_api.py @@ -28,7 +28,7 @@ from dsub.providers import local # Because this may be invoked from another directory (treated as a library) or -# invoked localy (treated as a binary) both import styles need to be supported. +# invoked locally (treated as a binary) both import styles need to be supported. # pylint: disable=g-import-not-at-top try: from . import test_setup diff --git a/test/integration/e2e_requester_pays_buckets.sh b/test/integration/e2e_requester_pays_buckets.sh index 47b9356..236ac9a 100755 --- a/test/integration/e2e_requester_pays_buckets.sh +++ b/test/integration/e2e_requester_pays_buckets.sh @@ -18,7 +18,7 @@ set -o errexit set -o nounset # This test is designed to verify that accessing a Requester Pays bucket -# by specifiying a user-project to bill works. All input files used in this test +# by specifying a user-project to bill works. All input files used in this test # are inside a requester-pays bucket. # # Note that we do not include a test for writing and logging to the requester diff --git a/test/integration/get_data_value.py b/test/integration/get_data_value.py index 4fefefe..736de09 100644 --- a/test/integration/get_data_value.py +++ b/test/integration/get_data_value.py @@ -14,7 +14,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -"""Utililty for helping shell scripts extract values from JSON or YAML. +"""Utility for helping shell scripts extract values from JSON or YAML. Usage: diff --git a/test/integration/test_setup_e2e.py b/test/integration/test_setup_e2e.py index d2534a5..27e62a2 100644 --- a/test/integration/test_setup_e2e.py +++ b/test/integration/test_setup_e2e.py @@ -42,7 +42,7 @@ from dsub.commands import dsub as dsub_command # Because this may be invoked from another directory (treated as a library) or -# invoked localy (treated as a binary) both import styles need to be supported. +# invoked locally (treated as a binary) both import styles need to be supported. # pylint: disable=g-import-not-at-top try: from . import test_setup @@ -58,7 +58,7 @@ def _environ(): - """Merge the current enviornment and test variables into a dictionary.""" + """Merge the current environment and test variables into a dictionary.""" e = dict(os.environ) for var in TEST_VARS + TEST_E2E_VARS: e[var] = globals()[var] diff --git a/test/run_tests.sh b/test/run_tests.sh index bba73a8..873af0a 100755 --- a/test/run_tests.sh +++ b/test/run_tests.sh @@ -207,7 +207,7 @@ function get_test_providers() { local providers="$(echo -n "${test_file}" | awk -F . '{ print $(NF-1) }')" # Special case the google-batch tests - don't run them when this flag is set - # To be renabled once batch client library is available in G3 + # To be re-enabled once batch client library is available in G3 if [[ "${providers}" == "google-batch" ]] && [[ "${NO_GOOGLE_BATCH_TESTS:-0}" -eq 1 ]]; then echo -n "" else diff --git a/test/unit/job_model_test.py b/test/unit/job_model_test.py index f676491..3e3c808 100644 --- a/test/unit/job_model_test.py +++ b/test/unit/job_model_test.py @@ -186,7 +186,7 @@ def testScriptCreation(self): # TASK_3 gs://bucket/path/NA06986.chrom18...bam gs://bucket/path/3/*.md5 # pylint: disable=common_typos_disable -# pilot3_exon_targetted_GRCh37_bams raises a "common typos" warning: "targetted" +# pilot3_exon_targetted_GRCh37_bams raises a "common typos" warning: "targeted" _IO_TASKS_META = textwrap.dedent(""" create-time: {}