Skip to content

Commit 42bbdc8

Browse files
committed
fix: Use utf-8 in read_text calls. Merge branch 'fix/unicode-defect'
2 parents df643c3 + 23e8f1e commit 42bbdc8

File tree

7 files changed

+991
-590
lines changed

7 files changed

+991
-590
lines changed

.github/workflows/main.yml

Lines changed: 19 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -15,31 +15,29 @@ jobs:
1515
steps:
1616
- name: Checkout repository
1717
uses: actions/checkout@v2
18-
- name: Setup Python 3.8
18+
- name: Setup Python 3.12
1919
uses: actions/setup-python@v2
2020
with:
21-
python-version: "3.8"
21+
python-version: "3.12"
2222
- name: Install Poetry
2323
uses: abatilo/[email protected]
2424
with:
25-
poetry-version: 1.1.13
25+
poetry-version: 1.8.5
2626
- name: Setup Poetry
2727
run: |
2828
poetry config virtualenvs.create true
2929
poetry config virtualenvs.in-project true
3030
poetry env info
3131
- name: Install dependencies
3232
run: poetry install --no-interaction --no-ansi
33-
- name: Lint code
34-
run: poetry run task lint --check
33+
# - name: Lint code
34+
# run: poetry run task lint --check
3535
- name: Lint writing
3636
uses: actionshub/markdownlint@main
3737

3838
test:
3939
# Test uses a strategy matrix to ensure that sufficient platform test
40-
# coverage is reached. For this configuration, we run the latest Ubuntu
41-
# image with Python 3.6 and 3.9, while also including MacOS + Python 3.8 and
42-
# Windows + Python 3.7. With this spread, we achieve testing of four
40+
# coverage is reached. With this spread, we achieve testing of four
4341
# different Python versions and three operating systems without running the
4442
# full twelve possible combinations, greatly reducing load and usage.
4543
name: Test
@@ -51,12 +49,12 @@ jobs:
5149
fail-fast: false
5250
matrix:
5351
os: [ubuntu-latest]
54-
python-version: ["3.7", "3.10"]
52+
python-version: ["3.10", "3.12"]
5553
include:
5654
- os: macos-latest
5755
python-version: "3.9"
5856
- os: windows-latest
59-
python-version: "3.8"
57+
python-version: "3.11"
6058
env:
6159
# These environment variables are passed to CodeCov to identify each build
6260
OS: ${{ matrix.os }}
@@ -71,15 +69,17 @@ jobs:
7169
- name: Install Poetry
7270
uses: abatilo/[email protected]
7371
with:
74-
poetry-version: 1.1.13
72+
poetry-version: 1.8.5
7573
- name: Setup Poetry
7674
run: |
7775
poetry config virtualenvs.create true
7876
poetry config virtualenvs.in-project true
7977
poetry env info
8078
- name: Install dependencies
8179
run: poetry install --no-interaction --no-ansi
82-
- name: Execute tests
80+
- name: Run the test suite
81+
run: poetry run pytest
82+
- name: Collect test coverage
8383
# We need to ensure that the cover-win script is run for Windows, so
8484
# this Action runs different commands based on the runner's operating
8585
# system.
@@ -88,10 +88,10 @@ jobs:
8888
linux: poetry run task cover
8989
macos: poetry run task cover
9090
windows: poetry run task cover-win
91-
- name: Upload coverage
92-
uses: codecov/codecov-action@v1
93-
with:
94-
files: ./coverage.xml
95-
flags: unittests
96-
env_vars: OS,PYTHON
97-
fail_ci_if_error: true
91+
# - name: Upload coverage
92+
# uses: codecov/codecov-action@v1
93+
# with:
94+
# files: ./coverage.xml
95+
# flags: unittests
96+
# env_vars: OS,PYTHON
97+
# fail_ci_if_error: true

.github/workflows/publish.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,10 +11,10 @@ jobs:
1111
steps:
1212
- name: Checkout repository
1313
uses: actions/checkout@v2
14-
- name: Set up Python 3.7
14+
- name: Set up Python 3.12
1515
uses: actions/setup-python@v2
1616
with:
17-
python-version: '3.7'
17+
python-version: '3.12'
1818
- name: Install Poetry
1919
uses: Gr1N/setup-poetry@v7
2020
- name: Publish

gator/entities.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -51,9 +51,13 @@ def count_entities(given_file, containing_directory, checking_function):
5151
file_contents_count = 0
5252
# create an empty dictionary of the counts
5353
file_contents_count_dictionary = {}
54-
# a valid file exists and thus it is acceptable to perform the checking
55-
# extract the text from the file_for_checking
56-
file_contents = file_for_checking.read_text()
54+
# a valid file exists and thus it is acceptable to perform the checking;
55+
# first extract the text from the file_for_checking; note that this
56+
# explicitly sets the encoding to be UTF-8 to ensure that the input of
57+
# the file will work on operating systems where the default character
58+
# encoding is not UTF-8; this commonly happens on Windows systems where
59+
# the default encoding is usually CP-1252
60+
file_contents = file_for_checking.read_text(encoding='utf-8')
5761
# use the provided checking_function to check the contents of the file
5862
# note this works since Python supports passing a function to a function
5963
file_contents_count, file_contents_count_dictionary = checking_function(

gator/fragments.py

Lines changed: 15 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -205,10 +205,14 @@ def count_entities(
205205
for file_for_checking in files.create_paths(
206206
file=given_file, home=containing_directory
207207
):
208-
# an actual file is available and command contents are not provided
209-
# the context for this condition is when the function checks file contents
210-
# read the text from the file and then check for the chosen fragment
211-
file_contents = file_for_checking.read_text()
208+
# an actual file is available and command contents are not provided the
209+
# context for this condition is when the function checks file contents
210+
# read the text from the file and then check for the chosen fragment;
211+
# note that this explicitly sets the encoding to be UTF-8 to ensure
212+
# that the input of the file will work on operating systems where the
213+
# default character encoding is not UTF-8; this commonly happens on
214+
# Windows systems where the default encoding is usually CP-1252
215+
file_contents = file_for_checking.read_text(encoding='utf-8')
212216
file_contents_count = checking_function(file_contents, chosen_fragment)
213217
file_contents_count_dictionary[file_for_checking.name] = file_contents_count
214218
# return the minimum value and the entire dictionary of counts
@@ -273,10 +277,14 @@ def count_lines(
273277
file=given_file, home=containing_directory
274278
):
275279
file_contents_count = 0
276-
# file is available and the contents are not provided
280+
# file is available and the contents are not provided;
277281
# the context for this condition is when the function checks
278-
# the contents of a specified file that exists on the filesystem
279-
file_contents = file_for_checking.read_text()
282+
# the contents of a specified file that exists on the filesystem;
283+
# note that this explicitly sets the encoding to be UTF-8 to ensure
284+
# that the input of the file will work on operating systems where the
285+
# default character encoding is not UTF-8; this commonly happens on
286+
# Windows systems where the default encoding is usually CP-1252
287+
file_contents = file_for_checking.read_text(encoding='utf-8')
280288
line_list = get_line_list(file_contents)
281289
file_contents_count = len(line_list)
282290
file_contents_count_dictionary[file_for_checking.name] = file_contents_count

gator/markdown.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -40,8 +40,12 @@ def specified_tag_greater_than_count(
4040
):
4141
file_tag_count = 0
4242
# since the specified file must be valid and thus suitable for checking,
43-
# read the contents of the file and then check for the chosen tag
44-
file_contents = file_for_checking.read_text()
43+
# read the contents of the file and then check for the chosen tag; note
44+
# that this explicitly sets the encoding to be UTF-8 to ensure that
45+
# the input of the file will work on operating systems where the
46+
# default character encoding is not UTF-8; this commonly happens
47+
# on Windows systems where the default encoding is usually CP-1252
48+
file_contents = file_for_checking.read_text(encoding='utf-8')
4549
file_tag_count = checking_function(file_contents, chosen_tag)
4650
file_tags_count_dictionary[file_for_checking.name] = file_tag_count
4751
# return the minimum value and the entire dictionary of counts

0 commit comments

Comments
 (0)