Skip to content

Commit 7ba65eb

Browse files
authored
Bug Fix: Return the parsed path without netloc for HDFS (#675)
1 parent 1b8a4c3 commit 7ba65eb

File tree

2 files changed

+4
-4
lines changed

2 files changed

+4
-4
lines changed

pyiceberg/io/pyarrow.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -333,7 +333,7 @@ def parse_location(location: str) -> Tuple[str, str, str]:
333333
if not uri.scheme:
334334
return "file", uri.netloc, os.path.abspath(location)
335335
elif uri.scheme == "hdfs":
336-
return uri.scheme, uri.netloc, location
336+
return uri.scheme, uri.netloc, uri.path
337337
else:
338338
return uri.scheme, uri.netloc, f"{uri.netloc}{uri.path}"
339339

tests/io/test_pyarrow.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1644,9 +1644,9 @@ def check_results(location: str, expected_schema: str, expected_netloc: str, exp
16441644
assert netloc == expected_netloc
16451645
assert uri == expected_uri
16461646

1647-
check_results("hdfs://127.0.0.1:9000/root/foo.txt", "hdfs", "127.0.0.1:9000", "hdfs://127.0.0.1:9000/root/foo.txt")
1648-
check_results("hdfs://127.0.0.1/root/foo.txt", "hdfs", "127.0.0.1", "hdfs://127.0.0.1/root/foo.txt")
1649-
check_results("hdfs://clusterA/root/foo.txt", "hdfs", "clusterA", "hdfs://clusterA/root/foo.txt")
1647+
check_results("hdfs://127.0.0.1:9000/root/foo.txt", "hdfs", "127.0.0.1:9000", "/root/foo.txt")
1648+
check_results("hdfs://127.0.0.1/root/foo.txt", "hdfs", "127.0.0.1", "/root/foo.txt")
1649+
check_results("hdfs://clusterA/root/foo.txt", "hdfs", "clusterA", "/root/foo.txt")
16501650

16511651
check_results("/root/foo.txt", "file", "", "/root/foo.txt")
16521652
check_results("/root/tmp/foo.txt", "file", "", "/root/tmp/foo.txt")

0 commit comments

Comments
 (0)