Skip to content

Commit f399369

Browse files
authored
Replace fussy parser processing; Return message text verbatim (#312)
* Add logging to parser * Fix parser to not munge ":" (#310) or whitespace (#311) in message text Do not use lstrip() to remove single protocol characters from the raw data, since that will overzealously strip actual parsed-data content. E.g. lstrip will strip all ":" characters from the start of messages which actually DO begin with a ":" character (#310). Note also that EVERY lstrip() call in parser is likewise inappropriate. Do not split() and then re-join() the message text, since that will compress multiple consecutive whitespace characters into just one space and precludes passing the tab character at all. E.g. the split/join method munges "foo bar" to just "foo bar" (#311). Replace fussy parser processing with an re.search() instead. Eliminate all the overzealous lstrip calls. Always return the message text verbatim. Remove unused global var 'TMI'. * Black formatting
1 parent 6d5fd64 commit f399369

File tree

1 file changed

+31
-28
lines changed

1 file changed

+31
-28
lines changed

twitchio/parse.py

Lines changed: 31 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626

2727
import re
2828
import typing
29+
import logging
2930

3031
if typing.TYPE_CHECKING:
3132
from .websocket import WSConnection
@@ -44,7 +45,9 @@
4445
)
4546
ACTIONS2 = ("USERSTATE", "ROOMSTATE", "PRIVMSG", "USERNOTICE", "WHISPER")
4647
USER_SUB = re.compile(r":(?P<user>.*)!")
47-
TMI = "tmi.twitch.tv"
48+
MESSAGE_RE = re.compile(r":(?P<useraddr>\S+) (?P<action>\S+) #(?P<channel>\S+)( :(?P<message>.*))?$")
49+
50+
logger = logging.getLogger("twitchio.parser")
4851

4952

5053
def parser(data: str, nick: str):
@@ -55,33 +58,22 @@ def parser(data: str, nick: str):
5558
user = None
5659
badges = None
5760

61+
logger.debug(f"---DATA--- {data}")
62+
5863
if action == "PING":
5964
return dict(action="PING")
6065

61-
elif groups[2] in {"PRIVMSG", "PRIVMSG(ECHO)"}:
62-
action = groups[2]
63-
channel = groups[3].lstrip("#")
64-
message = " ".join(groups[4:]).lstrip(":")
65-
user = re.search(USER_SUB, groups[1]).group("user")
66-
67-
elif groups[2] == "WHISPER":
68-
action = groups[2]
69-
message = " ".join(groups[4:]).lstrip(":")
70-
user = re.search(USER_SUB, groups[1]).group("user")
71-
72-
elif groups[2] == "USERNOTICE":
73-
action = groups[2]
74-
channel = groups[3].lstrip("#")
75-
message = " ".join(groups[4:]).lstrip(":")
76-
77-
elif action in ACTIONS:
78-
channel = groups[-1].lstrip("#")
79-
80-
elif groups[3] in {"PRIVMSG", "PRIVMSG(ECHO)"}:
81-
action = groups[3]
82-
channel = groups[4].lstrip("#")
83-
message = " ".join(groups[5:]).lstrip(":")
84-
user = re.search(USER_SUB, groups[2]).group("user")
66+
elif (
67+
groups[1] in ACTIONS or groups[2] in ACTIONS or (len(groups) > 3 and groups[3] in {"PRIVMSG", "PRIVMSG(ECHO)"})
68+
):
69+
result = re.search(MESSAGE_RE, data)
70+
if not result:
71+
logger.error(f" ****** MESSAGE_RE Failed! ******")
72+
return None # raise exception?
73+
user = result.group("useraddr").split("!")[0]
74+
action = result.group("action")
75+
channel = result.group("channel")
76+
message = result.group("message")
8577

8678
if action in ACTIONS2:
8779
prebadge = groups[0].split(";")
@@ -111,18 +103,29 @@ def parser(data: str, nick: str):
111103

112104
batches = []
113105
if code == 353:
114-
if not channel:
115-
channel = groups[4].lstrip("#")
106+
channel = groups[4]
107+
if channel[0] == "#":
108+
channel = channel[1:]
109+
else:
110+
logger.warning(f" (353) parse failed? ||{channel}||")
116111

117112
for b in groups[5:-1]:
118-
b = b.lstrip(":")
113+
if b[0] == ":":
114+
b = b[1:]
119115

120116
if "\r\n:" in b:
121117
batches.append(b.split("\r\n:")[0])
122118
break
123119
else:
124120
batches.append(b)
125121

122+
actcode = action or code
123+
if actcode:
124+
level = logging.DEBUG
125+
if actcode not in ["JOIN", "PART"]:
126+
level = logging.INFO
127+
logger.log(level, f" parsed <{actcode}><{channel}><{user}><{message}>")
128+
126129
return dict(
127130
data=data,
128131
nick=nick,

0 commit comments

Comments
 (0)