Use regex for tokenizing words

MyNameIsTrez · MyNameIsTrez · commit c6486ea00e83 · 2022-03-09T07:13:54.000+01:00
diff --git a/Python/ini_converting/ini_tokenizer.py b/Python/ini_converting/ini_tokenizer.py
@@ -1,3 +1,6 @@
+import re
+
+
 def get_tokens(filepath):
 	tokens = []
 
@@ -115,9 +118,16 @@ def tokenize_newline(i, text_len, text, tokens, filepath):
 def tokenize_word(i, text_len, text, tokens, filepath):
 	token = ""
 
-	while i < text_len and text[i] not in ("\t =\n") and not (text[i] == "/" and i + 1 < text_len and text[i + 1] == "/"):
-		token += text[i]
-		i += 1
+	subtext = text[i:]
+	token = re.match("(\S+([\t\f\v ]*\S+)*)", subtext).group(0)
+
+	token = token.split("//", maxsplit=1)[0]
+	token = token.split("/*", maxsplit=1)[0]
+	token = token.split("=", maxsplit=1)[0]
+	
+	token = token.rstrip()
+
+	i += len(token)
 
 	tokens.append(get_token("WORD", token, i, filepath))