22# See https://llvm.org/LICENSE.txt for license information.
33# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
44
5- from pygments .lexer import RegexLexer , bygroups , include , combined
5+ from pygments .lexer import RegexLexer , bygroups , include , using
66from pygments .token import *
77import re
88
@@ -27,22 +27,40 @@ class MlirLexer(RegexLexer):
2727
2828 flags = re .MULTILINE
2929
30+ class VariableList (RegexLexer ):
31+ """Lexer for lists of SSA variables separated by commas."""
32+
33+ tokens = {
34+ "root" : [
35+ (r"\s+" , Text ),
36+ (r"," , Punctuation ),
37+ (r"%[_A-Za-z0-9\.\$\-:#]+" , Name .Variable ),
38+ ]
39+ }
40+
3041 tokens = {
3142 "root" : [
3243 # Comments
3344 (r"//.*?$" , Comment .Single ),
45+ # operation name with assignment: %... = op.name
46+ (
47+ r"^(\s*)(%[\%_A-Za-z0-9\:#\,\s]+)(=)(\s*)([A-Za-z0-9_\.\$\-]+)\b" ,
48+ bygroups (Text , using (VariableList ), Operator , Text , Name .Builtin ),
49+ ),
50+ # operation name without result
51+ (r"^(\s*)([A-Za-z0-9_\.\$\-]+)\b(?=[^<:])" , bygroups (Text , Name .Builtin )),
3452 # Attribute alias definition: #name =
3553 (
36- r"^\s*(#[_A-Za-z0-9\$\-\.]+)(\b)(\s*=)" ,
37- bygroups (Name .Constant , Text , Operator ),
54+ r"^( \s*) (#[_A-Za-z0-9\$\-\.]+)(\b)(\s*=)" ,
55+ bygroups (Text , Name .Constant , Text , Operator ),
3856 ),
3957 # Type alias definition: !name =
4058 (
41- r"^\s*(![_A-Za-z0-9\$\-\.]+)(\b)(\s*=)" ,
42- bygroups (Keyword .Type , Text , Operator ),
59+ r"^( \s*) (![_A-Za-z0-9\$\-\.]+)(\b)(\s*=)" ,
60+ bygroups (Text , Keyword .Type , Text , Operator ),
4361 ),
44- # SSA values (results, uses) - allow many characters MLIR uses
45- (r"%[% _A-Za-z0-9\.\$:\- ]+" , Name .Variable ),
62+ # SSA values (results, uses)
63+ (r"%[_A-Za-z0-9\.\$\-:# ]+" , Name .Variable ),
4664 # attribute refs, constants and named attributes
4765 (r"#[_A-Za-z0-9\$\-\.]+\b" , Name .Constant ),
4866 # symbol refs / function-like names
@@ -61,14 +79,7 @@ class MlirLexer(RegexLexer):
6179 # affine constructs
6280 (r"\b(affine_map|affine_set)\b" , Keyword .Reserved ),
6381 # common builtin operators / functions inside affine_map
64- (r"\b(ceildiv|floordiv|mod|symbol)\b" , Name .Builtin ),
65- # operation definitions with assignment: %... = op.name
66- (
67- r"^(\s*)(%[\%_A-Za-z0-9\:\,\s]+)(\s*=\s*)([A-Za-z0-9_\.\$\-]+)\b" ,
68- bygroups (Text , Name .Variable , Operator , Name .Function ),
69- ),
70- # operation name without result
71- (r"^(\s*)([A-Za-z0-9_\.\$\-]+)\b(?=[^<:])" , bygroups (Text , Name .Function )),
82+ (r"\b(ceildiv|floordiv|mod|symbol)\b" , Name .Other ),
7283 # identifiers / bare words
7384 (r"\b[_A-Za-z][_A-Za-z0-9\.-]*\b" , Name .Other ),
7485 # numbers: hex, float (with exponent), integer
@@ -96,13 +107,13 @@ class MlirLexer(RegexLexer):
96107 (r">" , Punctuation , "#pop" ),
97108 # dimensions like 3x or 3x3x... and standalone numbers:
98109 # - match numbers that are followed by an 'x' (dimension separator)
99- (r"([0-9]+)(?=(?:[xX] ))" , Number .Integer ),
110+ (r"([0-9]+)(?=(?:x ))" , Number .Integer ),
100111 # - match bare numbers (sizes)
101112 (r"[0-9]+" , Number .Integer ),
102113 # dynamic dimension '?'
103- (r"\?" , Name .Constant ),
114+ (r"\?" , Name .Integer ),
104115 # the 'x' dimension separator (treat as punctuation)
105- (r"[xX] " , Punctuation ),
116+ (r"x " , Punctuation ),
106117 # element / builtin types inside angle brackets (no word-boundary)
107118 (
108119 r"(?:bf16|f16|f32|f64|f80|f128|index|none|(?:[us]?i[0-9]+))" ,
0 commit comments