Skip to content

Commit acd25ef

Browse files
arnavk23Copilot
andauthored
Fix : error handling for malformed TOML files (#436)
Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
1 parent 75e9c26 commit acd25ef

File tree

2 files changed

+162
-12
lines changed

2 files changed

+162
-12
lines changed

tagbot/action/repo.py

Lines changed: 44 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -168,7 +168,12 @@ def _project(self, k: str) -> str:
168168
pass # Try the next filename
169169
else:
170170
raise InvalidProject("Project file was not found")
171-
self.__project = toml.loads(contents.decoded_content.decode())
171+
try:
172+
self.__project = toml.loads(contents.decoded_content.decode())
173+
except toml.TomlDecodeError as e:
174+
raise InvalidProject(f"Failed to parse Project.toml: {e}")
175+
except UnicodeDecodeError as e:
176+
raise InvalidProject(f"Failed to parse Project.toml (encoding error): {e}")
172177
return str(self.__project[k])
173178

174179
@property
@@ -194,16 +199,35 @@ def _registry_path(self) -> Optional[str]:
194199
uuid = self._project("uuid").lower()
195200
except KeyError:
196201
raise InvalidProject("Project file has no UUID")
197-
if self._clone_registry:
198-
with open(os.path.join(self._registry_clone_dir, "Registry.toml")) as f:
199-
registry = toml.load(f)
200-
else:
201-
contents = self._only(self._registry.get_contents("Registry.toml"))
202-
blob = self._registry.get_git_blob(contents.sha)
203-
b64 = b64decode(blob.content)
204-
string_contents = b64.decode("utf8")
205-
registry = toml.loads(string_contents)
202+
try:
203+
if self._clone_registry:
204+
with open(os.path.join(self._registry_clone_dir, "Registry.toml")) as f:
205+
registry = toml.load(f)
206+
else:
207+
contents = self._only(self._registry.get_contents("Registry.toml"))
208+
blob = self._registry.get_git_blob(contents.sha)
209+
b64 = b64decode(blob.content)
210+
string_contents = b64.decode("utf8")
211+
registry = toml.loads(string_contents)
212+
except toml.TomlDecodeError as e:
213+
logger.warning(
214+
f"Failed to parse Registry.toml (malformed TOML): {e}. "
215+
"This may indicate a structural issue with the registry file."
216+
)
217+
return None
218+
except (UnicodeDecodeError, OSError) as e:
219+
logger.warning(
220+
f"Failed to parse Registry.toml ({type(e).__name__}): {e}. "
221+
"This may indicate a temporary issue with the registry file."
222+
)
223+
return None
206224

225+
if "packages" not in registry:
226+
logger.warning(
227+
"Registry.toml is missing the 'packages' key. "
228+
"This may indicate a structural issue with the registry file."
229+
)
230+
return None
207231
if uuid in registry["packages"]:
208232
self.__registry_path = registry["packages"][uuid]["path"]
209233
return self.__registry_path
@@ -219,8 +243,16 @@ def _registry_url(self) -> Optional[str]:
219243
contents = self._only(self._registry.get_contents(f"{root}/Package.toml"))
220244
except UnknownObjectExceptions:
221245
raise InvalidProject("Package.toml was not found")
222-
package = toml.loads(contents.decoded_content.decode())
223-
self.__registry_url = package["repo"]
246+
try:
247+
package = toml.loads(contents.decoded_content.decode())
248+
except toml.TomlDecodeError as e:
249+
raise InvalidProject(f"Failed to parse Package.toml: {e}")
250+
except UnicodeDecodeError as e:
251+
raise InvalidProject(f"Failed to parse Package.toml (encoding error): {e}")
252+
try:
253+
self.__registry_url = package["repo"]
254+
except KeyError:
255+
raise InvalidProject("Package.toml is missing the 'repo' key")
224256
return self.__registry_url
225257

226258
@property

test/action/test_repo.py

Lines changed: 118 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,26 @@ def test_project():
9898
r._project("name")
9999

100100

101+
def test_project_malformed_toml():
102+
"""Test that malformed Project.toml raises InvalidProject."""
103+
r = _repo()
104+
r._repo.get_contents = Mock(
105+
return_value=Mock(decoded_content=b"""name = "FooBar"\nuuid""")
106+
)
107+
r._Repo__project = None
108+
with pytest.raises(InvalidProject, match="Failed to parse Project.toml"):
109+
r._project("name")
110+
111+
112+
def test_project_invalid_encoding():
113+
"""Invalid UTF-8 in Project.toml raises InvalidProject."""
114+
r = _repo()
115+
r._repo.get_contents = Mock(return_value=Mock(decoded_content=b"name = \xff\xfe"))
116+
r._Repo__project = None
117+
with pytest.raises(InvalidProject, match="Failed to parse Project.toml"):
118+
r._project("name")
119+
120+
101121
def test_project_subdir():
102122
r = _repo(subdir="path/to/FooBar.jl")
103123
r._repo.get_contents = Mock(
@@ -146,6 +166,71 @@ def test_registry_path_with_uppercase_uuid():
146166
assert r._registry_path == "B/Bar"
147167

148168

169+
@patch("tagbot.action.repo.logger")
170+
def test_registry_path_malformed_toml(logger):
171+
"""Test that malformed Registry.toml returns None and logs warning."""
172+
r = _repo()
173+
logger.reset_mock() # Clear any warnings from _repo() initialization
174+
r._registry = Mock()
175+
r._registry.get_contents.return_value.sha = "123"
176+
# Malformed TOML content (missing closing bracket)
177+
r._registry.get_git_blob.return_value.content = b64encode(b"[packages\nkey")
178+
r._project = lambda _k: "abc-def"
179+
result = r._registry_path
180+
assert result is None
181+
logger.warning.assert_called_once()
182+
assert "Failed to parse Registry.toml" in logger.warning.call_args[0][0]
183+
assert "malformed TOML" in logger.warning.call_args[0][0]
184+
185+
186+
@patch("tagbot.action.repo.logger")
187+
def test_registry_path_invalid_encoding(logger):
188+
"""Invalid UTF-8 in Registry.toml returns None and logs warning."""
189+
r = _repo()
190+
logger.reset_mock() # Clear any warnings from _repo() initialization
191+
r._registry = Mock()
192+
r._registry.get_contents.return_value.sha = "123"
193+
# Mock get_git_blob to return content with invalid UTF-8 bytes
194+
r._registry.get_git_blob.return_value.content = b64encode(b"\x80\x81[packages]")
195+
r._project = lambda _k: "abc-def"
196+
result = r._registry_path
197+
assert result is None
198+
logger.warning.assert_called_once()
199+
assert "Failed to parse Registry.toml" in logger.warning.call_args[0][0]
200+
assert "UnicodeDecodeError" in logger.warning.call_args[0][0]
201+
202+
203+
@patch("tagbot.action.repo.logger")
204+
def test_registry_path_file_not_found(logger):
205+
"""Test that missing Registry.toml file returns None and logs warning."""
206+
r = _repo(registry_ssh="key") # Use SSH to trigger clone path
207+
logger.reset_mock() # Clear any warnings from _repo() initialization
208+
r._clone_registry = True
209+
r._Repo__registry_clone_dir = "/nonexistent/path"
210+
r._project = lambda _k: "abc-def"
211+
result = r._registry_path
212+
assert result is None
213+
logger.warning.assert_called_once()
214+
assert "Failed to parse Registry.toml" in logger.warning.call_args[0][0]
215+
assert "FileNotFoundError" in logger.warning.call_args[0][0]
216+
217+
218+
@patch("tagbot.action.repo.logger")
219+
def test_registry_path_missing_packages_key(logger):
220+
"""Missing 'packages' key returns None and logs warning."""
221+
r = _repo()
222+
logger.reset_mock() # Clear any warnings from _repo() initialization
223+
r._registry = Mock()
224+
r._registry.get_contents.return_value.sha = "123"
225+
# Valid TOML but missing required 'packages' section
226+
r._registry.get_git_blob.return_value.content = b64encode(b"[foo]\nbar=1")
227+
r._project = lambda _k: "abc-def"
228+
result = r._registry_path
229+
assert result is None
230+
logger.warning.assert_called_once()
231+
assert "missing the 'packages' key" in logger.warning.call_args[0][0]
232+
233+
149234
def test_registry_url():
150235
r = _repo()
151236
r._Repo__registry_path = "E/Example"
@@ -160,6 +245,39 @@ def test_registry_url():
160245
assert r._registry.get_contents.call_count == 1
161246

162247

248+
def test_registry_url_malformed_toml():
249+
"""Test that malformed Package.toml raises InvalidProject."""
250+
r = _repo()
251+
r._Repo__registry_path = "E/Example"
252+
r._registry = Mock()
253+
# Malformed TOML content
254+
r._registry.get_contents.return_value.decoded_content = b"name = \n[incomplete"
255+
with pytest.raises(InvalidProject, match="Failed to parse Package.toml"):
256+
_ = r._registry_url
257+
258+
259+
def test_registry_url_invalid_encoding():
260+
"""Test that invalid UTF-8 encoding in Package.toml raises InvalidProject."""
261+
r = _repo()
262+
r._Repo__registry_path = "E/Example"
263+
r._registry = Mock()
264+
# Invalid UTF-8 bytes (0x80 and 0x81 are not valid UTF-8 start bytes)
265+
r._registry.get_contents.return_value.decoded_content = b"\x80\x81"
266+
with pytest.raises(InvalidProject, match="Failed to parse Package.toml"):
267+
_ = r._registry_url
268+
269+
270+
def test_registry_url_missing_repo_key():
271+
"""Missing 'repo' key in Package.toml raises InvalidProject."""
272+
r = _repo()
273+
r._Repo__registry_path = "E/Example"
274+
r._registry = Mock()
275+
# Valid TOML but missing required 'repo' field
276+
r._registry.get_contents.return_value.decoded_content = b"name = 'Example'\n"
277+
with pytest.raises(InvalidProject, match="missing the 'repo' key"):
278+
_ = r._registry_url
279+
280+
163281
def test_release_branch():
164282
r = _repo()
165283
r._repo = Mock(default_branch="a")

0 commit comments

Comments
 (0)