Skip to content

Commit a7a99d1

Browse files
committed
rules: skip rules with utf8 decoding errors
If a rule can't be decoded as utf-8, we can't further parse it. So skip it with a warning. The previous behavior was to raise an exception and abort. Ticket: #7812
1 parent ee8da57 commit a7a99d1

File tree

3 files changed

+48
-5
lines changed

3 files changed

+48
-5
lines changed

suricata/update/rule.py

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -321,11 +321,16 @@ def parse_fileobj(fileobj, group=None):
321321
rules = []
322322
buf = ""
323323
for line in fileobj:
324-
try:
325-
if type(line) == type(b""):
326-
line = line.decode()
327-
except:
328-
pass
324+
if type(line) == type(b""):
325+
try:
326+
line = line.decode("utf-8", "strict")
327+
except UnicodeDecodeError:
328+
logger.warning("Skipping rule due to encoding issue: %s", repr(line))
329+
# Skip this line and reset buffer if we were accumulating a multi-line rule
330+
if buf:
331+
logger.warning("Discarding incomplete multi-line rule due to encoding issue")
332+
buf = ""
333+
continue
329334
if line.rstrip().endswith("\\"):
330335
buf = "%s%s " % (buf, line.rstrip()[0:-1])
331336
continue
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
alert tcp any any -> any any (msg:"Valid rule 1"; sid:1001; rev:1;)
2+
alert tcp any any -> any any (msg:"Bad encoding ��"; sid:1002; rev:1;)
3+
alert tcp any any -> any any (msg:"Latin1 ��"; sid:1003; rev:1;)
4+
alert tcp any any -> any any (msg:"Valid rule 2"; sid:1004; rev:1;)
5+
alert tcp any any -> any any (msg:"Valid multiline"; \
6+
content:"test"; \
7+
sid:2001; rev:1;)
8+
alert tcp any any -> any any (msg:"Bad multiline"; \
9+
content:"bad��"; \
10+
sid:2002; rev:1;)
11+
alert tcp any any -> any any (msg:"Valid after bad"; sid:2003; rev:1;)

tests/test_rule.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -262,3 +262,30 @@ def test_parse_var_lists(self):
262262
self.assertEqual(rule["source_port"], "any")
263263
self.assertEqual(rule["dest_addr"], "[any,![$EXTERNAL_IP,$REVERSE_PROXY_HOSTS,$ODD_HTTP_HOSTS]]")
264264
self.assertEqual(rule["dest_port"], "80")
265+
266+
def test_rule_with_encoding_issues(self):
267+
"""Test that rules with encoding issues are skipped (Issue #7812)"""
268+
# Parse the file with encoding issues
269+
with open('tests/rules-with-encoding-issues.rules', 'rb') as fileobj:
270+
rules = suricata.update.rule.parse_fileobj(fileobj)
271+
272+
# Should have parsed:
273+
# - Valid single-line rules (1001 and 1004)
274+
# - Valid multiline rule (2001)
275+
# - Valid rule after bad multiline (2003)
276+
# Should have skipped:
277+
# - Rules with bad encoding (1002, 1003)
278+
# - Bad multiline rule (2002)
279+
self.assertEqual(len(rules), 4)
280+
281+
# Check single-line rules
282+
self.assertEqual(rules[0].sid, 1001)
283+
self.assertEqual(rules[0].msg, "Valid rule 1")
284+
self.assertEqual(rules[1].sid, 1004)
285+
self.assertEqual(rules[1].msg, "Valid rule 2")
286+
287+
# Check multiline rules
288+
self.assertEqual(rules[2].sid, 2001)
289+
self.assertEqual(rules[2].msg, "Valid multiline")
290+
self.assertEqual(rules[3].sid, 2003)
291+
self.assertEqual(rules[3].msg, "Valid after bad")

0 commit comments

Comments
 (0)