Skip to content
This repository was archived by the owner on Mar 8, 2020. It is now read-only.

Commit 147288f

Browse files
authored
Merge pull request #65 from juanjux/feature/deny_nonUTF8
Check for non UTF8 contents + test. Factorize content loading.
2 parents 79f11b2 + 48d8d15 commit 147288f

File tree

2 files changed

+26
-14
lines changed

2 files changed

+26
-14
lines changed

bblfsh/client.py

Lines changed: 21 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,10 @@
1212
sys.path.insert(0, os.path.dirname(__file__))
1313

1414

15+
class NonUTF8ContentException(Exception):
16+
pass
17+
18+
1519
class BblfshClient(object):
1620
"""
1721
Babelfish gRPC client. Currently it is only capable of fetching UASTs.
@@ -28,6 +32,21 @@ def __init__(self, endpoint):
2832
self._channel = grpc.insecure_channel(endpoint)
2933
self._stub = ProtocolServiceStub(self._channel)
3034

35+
@staticmethod
36+
def _check_utf8(text):
37+
try:
38+
text.decode("utf-8")
39+
except UnicodeDecodeError:
40+
raise NonUTF8ContentException("Content must be UTF-8, ASCII or Base64 encoded")
41+
42+
@staticmethod
43+
def _get_contents(contents, filename):
44+
if contents is None:
45+
with open(filename, "rb") as fin:
46+
contents = fin.read()
47+
BblfshClient._check_utf8(contents)
48+
return contents
49+
3150
def parse(self, filename, language=None, contents=None, timeout=None):
3251
"""
3352
Queries the Babelfish server and receives the UAST response for the specified
@@ -48,9 +67,7 @@ def parse(self, filename, language=None, contents=None, timeout=None):
4867
:return: UAST object.
4968
"""
5069

51-
if contents is None:
52-
with open(filename, "rb") as fin:
53-
contents = fin.read()
70+
contents = self._get_contents(contents, filename)
5471
request = ParseRequest(filename=os.path.basename(filename),
5572
content=contents,
5673
language=self._scramble_language(language))
@@ -76,9 +93,7 @@ def native_parse(self, filename, language=None, contents=None, timeout=None):
7693
:return: Native AST object.
7794
"""
7895

79-
if contents is None:
80-
with open(filename, "rb") as fin:
81-
contents = fin.read()
96+
contents = self._get_contents(contents, filename)
8297
request = NativeParseRequest(filename=os.path.basename(filename),
8398
content=contents,
8499
language=self._scramble_language(language))

bblfsh/test.py

Lines changed: 5 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55

66
from bblfsh import BblfshClient, filter, role_id, role_name, Node, ParseResponse
77
from bblfsh.launcher import ensure_bblfsh_is_running
8+
from bblfsh.client import NonUTF8ContentException
89

910

1011
class BblfshTests(unittest.TestCase):
@@ -35,6 +36,10 @@ def testNativeParse(self):
3536
reply = self.client.native_parse(__file__)
3637
assert(reply.ast)
3738

39+
def testNonUTF8ParseError(self):
40+
self.assertRaises(NonUTF8ContentException,
41+
self.client.parse, "", "Python", b"a = '\x80abc'")
42+
3843
def testUASTDefaultLanguage(self):
3944
self._validate_resp(self.client.parse(__file__))
4045

@@ -117,14 +122,6 @@ def testFilterBadQuery(self):
117122
node = Node()
118123
self.assertRaises(RuntimeError, filter, node, "//*roleModule")
119124

120-
def testIssue60(self):
121-
fixtures_dir = os.path.join(
122-
os.path.dirname(os.path.realpath(__file__)),
123-
"fixtures")
124-
rep = self.client.parse(os.path.join(fixtures_dir, "issue60.py"))
125-
assert(rep.uast)
126-
self.assertFalse(any(filter(rep.uast, "//@roleLiteral")))
127-
128125
def testRoleIdName(sedlf):
129126
assert(role_id(role_name(1)) == 1)
130127
assert(role_name(role_id("IDENTIFIER")) == "IDENTIFIER")

0 commit comments

Comments
 (0)