Skip to content
This repository was archived by the owner on Jul 16, 2025. It is now read-only.

Commit 1c24461

Browse files
authored
Fix handling files with unicode names or contents (#661)
This adds the `git ls-files` which instructs git to output files separated by `\0`, and otherwise output filenames verbatim without any special encoding. The second fix is related to enforcing `utf-8` encoding when reading in file contents in order to produce "file fixes".
1 parent 2165dc1 commit 1c24461

File tree

5 files changed

+48
-44
lines changed

5 files changed

+48
-44
lines changed

codecov_cli/helpers/versioning_systems.py

Lines changed: 8 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,13 @@
11
from itertools import chain
22
import logging
3+
import re
34
import subprocess
45
import typing as t
56
from pathlib import Path
67
from shutil import which
78

89
from codecov_cli.fallbacks import FallbackFieldEnum
10+
from codecov_cli.helpers.folder_searcher import search_files
911
from codecov_cli.helpers.git import parse_git_service, parse_slug
1012
from abc import ABC, abstractmethod
1113

@@ -168,19 +170,11 @@ def list_relevant_files(
168170
if dir_to_use is None:
169171
raise ValueError("Can't determine root folder")
170172

171-
cmd = ["git", "-C", str(dir_to_use), "ls-files"]
173+
cmd = ["git", "-C", str(dir_to_use), "ls-files", "-z"]
172174
if recurse_submodules:
173175
cmd.append("--recurse-submodules")
174176
res = subprocess.run(cmd, capture_output=True)
175-
176-
return [
177-
(
178-
filename[1:-1]
179-
if filename.startswith('"') and filename.endswith('"')
180-
else filename
181-
)
182-
for filename in res.stdout.decode("unicode_escape").strip().split("\n")
183-
]
177+
return res.stdout.decode().split("\0")
184178

185179

186180
class NoVersioningSystem(VersioningSystemInterface):
@@ -201,22 +195,7 @@ def list_relevant_files(
201195
if dir_to_use is None:
202196
raise ValueError("Can't determine root folder")
203197

204-
cmd = [
205-
"find",
206-
str(dir_to_use),
207-
*chain.from_iterable(
208-
["-name", block, "-prune", "-o"] for block in IGNORE_DIRS
209-
),
210-
*chain.from_iterable(
211-
["-path", block, "-prune", "-o"] for block in IGNORE_PATHS
212-
),
213-
"-type",
214-
"f",
215-
"-print",
216-
]
217-
res = subprocess.run(cmd, capture_output=True)
218-
return [
219-
filename
220-
for filename in res.stdout.decode("unicode_escape").strip().split("\n")
221-
if filename
222-
]
198+
files = search_files(
199+
dir_to_use, folders_to_ignore=[], filename_include_regex=re.compile("")
200+
)
201+
return [f.relative_to(dir_to_use).as_posix() for f in files]

codecov_cli/services/upload/upload_collector.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -115,7 +115,7 @@ def _get_file_fixes(
115115
eof = None
116116

117117
try:
118-
with open(filename, "r") as f:
118+
with open(filename, "r", encoding="utf-8") as f:
119119
# If lineno is unset that means that the
120120
# file is empty thus the eof should be 0
121121
# so lineno will be set to -1 here
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
// See <https://github.com/codecov/codecov-action/issues/1550>
2+
// Plus, add a bunch of unicode chars in order to trigger
3+
// <https://github.com/codecov/codecov-action/issues/1539>
4+
5+
// µ, ¹², ‘’“”, őá…–🤮🚀¿ 한글 테스트

tests/helpers/test_versioning_systems.py

Lines changed: 25 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,10 @@
33
import pytest
44

55
from codecov_cli.fallbacks import FallbackFieldEnum
6-
from codecov_cli.helpers.versioning_systems import GitVersioningSystem
6+
from codecov_cli.helpers.versioning_systems import (
7+
GitVersioningSystem,
8+
NoVersioningSystem,
9+
)
710

811

912
class TestGitVersioningSystem(object):
@@ -105,8 +108,7 @@ def test_list_relevant_files_returns_correct_network_files(self, mocker, tmp_pat
105108
"codecov_cli.helpers.versioning_systems.subprocess.run",
106109
return_value=mocked_subprocess,
107110
)
108-
# git ls-files diplays a single \n as \\\\n
109-
mocked_subprocess.stdout = b'a.txt\nb.txt\n"a\\\\nb.txt"\nc.txt\nd.txt\n.circleci/config.yml\nLICENSE\napp/advanced calculations/advanced_calculator.js\n'
111+
mocked_subprocess.stdout = b"a.txt\0b.txt\0a\\nb.txt\0c.txt\0d.txt\0.circleci/config.yml\0LICENSE\0app/advanced calculations/advanced_calculator.js"
110112

111113
vs = GitVersioningSystem()
112114

@@ -138,6 +140,25 @@ def test_list_relevant_files_recurse_submodules(self, mocker, tmp_path):
138140
vs = GitVersioningSystem()
139141
_ = vs.list_relevant_files(tmp_path, recurse_submodules=True)
140142
subproc_run.assert_called_with(
141-
["git", "-C", str(tmp_path), "ls-files", "--recurse-submodules"],
143+
["git", "-C", str(tmp_path), "ls-files", "-z", "--recurse-submodules"],
142144
capture_output=True,
143145
)
146+
147+
148+
def test_exotic_git_filenames():
149+
vs = GitVersioningSystem()
150+
found_repo_files = vs.list_relevant_files()
151+
152+
# See <https://github.com/codecov/codecov-action/issues/1550>
153+
assert (
154+
"tests/data/Контроллеры/Пользователь/ГлавныйКонтроллер.php" in found_repo_files
155+
)
156+
157+
158+
def test_exotic_fallback_filenames():
159+
vs = NoVersioningSystem()
160+
found_repo_files = vs.list_relevant_files()
161+
162+
assert (
163+
"tests/data/Контроллеры/Пользователь/ГлавныйКонтроллер.php" in found_repo_files
164+
)

tests/services/upload/test_upload_collector.py

Lines changed: 9 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,5 @@
11
from pathlib import Path
22
from unittest.mock import patch
3-
import pytest
4-
import sys
53

64
from codecov_cli.helpers.versioning_systems import (
75
GitVersioningSystem,
@@ -87,6 +85,15 @@ def test_fix_php_files():
8785
assert fixes_for_php_file.fixed_lines_with_reason == set([])
8886

8987

88+
def test_can_read_unicode_file():
89+
col = UploadCollector(None, None, None, None, True)
90+
91+
php_file = Path("tests/data/Контроллеры/Пользователь/ГлавныйКонтроллер.php")
92+
_fixes = col._produce_file_fixes([php_file])
93+
# we just want to assert that this is not throwing an error related to file encoding,
94+
# see <https://github.com/codecov/codecov-action/issues/1539>
95+
96+
9097
def test_fix_for_cpp_swift_vala(tmp_path):
9198
cpp_file = Path("tests/data/files_to_fix_examples/sample.cpp")
9299

@@ -182,10 +189,6 @@ def test_generate_upload_data(tmp_path):
182189

183190

184191
@patch("codecov_cli.services.upload.upload_collector.logger")
185-
@pytest.mark.skipif(
186-
sys.platform == "win32",
187-
reason="the fallback `list_relevant_files` is currently broken on windows",
188-
)
189192
def test_generate_upload_data_with_none_network(mock_logger, tmp_path):
190193
(tmp_path / "coverage.xml").touch()
191194

@@ -206,10 +209,6 @@ def test_generate_upload_data_with_none_network(mock_logger, tmp_path):
206209
assert len(res.file_fixes) > 1
207210

208211

209-
@pytest.mark.skipif(
210-
sys.platform == "win32",
211-
reason="the fallback `list_relevant_files` is currently broken on windows",
212-
)
213212
def test_generate_network_with_no_versioning_system(tmp_path):
214213
versioning_system = NoVersioningSystem()
215214
found_files = versioning_system.list_relevant_files()

0 commit comments

Comments
 (0)