Skip to content

Commit b20eb6a

Browse files
authored
Merge pull request #771 from GeorgianaElena/fix_requirements_possible_encodings
Handle different file encodings
2 parents 96cfc50 + 75f4a70 commit b20eb6a

File tree

3 files changed

+34
-2
lines changed

3 files changed

+34
-2
lines changed

repo2docker/buildpacks/python/__init__.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
import os
33

44
from ..conda import CondaBuildPack
5-
from ...utils import is_local_pip_requirement
5+
from ...utils import is_local_pip_requirement, open_guess_encoding
66

77

88
class PythonBuildPack(CondaBuildPack):
@@ -86,7 +86,7 @@ def _should_preassemble_pip(self):
8686
requirements_txt = self.binder_path(name)
8787
if not os.path.exists(requirements_txt):
8888
continue
89-
with open(requirements_txt) as f:
89+
with open_guess_encoding(requirements_txt) as f:
9090
for line in f:
9191
if is_local_pip_requirement(line):
9292
return False

repo2docker/utils.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
import os
44
import re
55
import subprocess
6+
import chardet
67

78
from shutil import copystat, copy2
89

@@ -70,6 +71,27 @@ def chdir(path):
7071
os.chdir(old_dir)
7172

7273

74+
@contextmanager
75+
def open_guess_encoding(path):
76+
"""
77+
Open a file in text mode, specifying its encoding,
78+
that we guess using chardet.
79+
"""
80+
detector = chardet.universaldetector.UniversalDetector()
81+
with open(path, "rb") as f:
82+
for line in f.readlines():
83+
detector.feed(line)
84+
if detector.done:
85+
break
86+
detector.close()
87+
88+
file = open(path, encoding=detector.result["encoding"])
89+
try:
90+
yield file
91+
finally:
92+
file.close()
93+
94+
7395
def validate_and_generate_port_mapping(port_mappings):
7496
"""
7597
Validate a list of port mappings and return a dictionary of port mappings.

tests/unit/test_utils.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
from repo2docker import utils
77
import pytest
88
import subprocess
9+
import tempfile
910

1011

1112
def test_capture_cmd_no_capture_success():
@@ -112,6 +113,15 @@ def test_normalize_doi():
112113
assert utils.normalize_doi("http://dx.doi.org/10.1234/jshd123") == "10.1234/jshd123"
113114

114115

116+
def test_open_guess_encoding():
117+
data = "Rică nu știa să zică râu, rățușcă, rămurică."
118+
with tempfile.NamedTemporaryFile(mode="wb") as test_file:
119+
test_file.write(str.encode(data, "utf-16"))
120+
test_file.seek(0)
121+
with utils.open_guess_encoding(test_file.name) as fd:
122+
assert fd.read() == data
123+
124+
115125
@pytest.mark.parametrize(
116126
"req, is_local",
117127
[

0 commit comments

Comments
 (0)