Skip to content

Commit c8432df

Browse files
Jan200101jpakkane
authored andcommitted
replace regex implementation for cmake with a parser
cmake does preprocessing in a linear pass through the entire file this allows recursive variable lookups which cannot be supported by a basic regex implementation.
1 parent 07cd9bc commit c8432df

File tree

3 files changed

+107
-65
lines changed

3 files changed

+107
-65
lines changed

mesonbuild/utils/universal.py

Lines changed: 93 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -1240,7 +1240,7 @@ def do_replacement(regex: T.Pattern[str], line: str,
12401240
if variable_format == 'meson':
12411241
return do_replacement_meson(regex, line, confdata)
12421242
elif variable_format in {'cmake', 'cmake@'}:
1243-
return do_replacement_cmake(regex, line, variable_format == 'cmake@', confdata)
1243+
return do_replacement_cmake(line, variable_format == 'cmake@', confdata)
12441244
else:
12451245
raise MesonException('Invalid variable format')
12461246

@@ -1275,44 +1275,92 @@ def variable_replace(match: T.Match[str]) -> str:
12751275
return var_str
12761276
return re.sub(regex, variable_replace, line), missing_variables
12771277

1278-
def do_replacement_cmake(regex: T.Pattern[str], line: str, at_only: bool,
1278+
def do_replacement_cmake(line: str, at_only: bool,
12791279
confdata: T.Union[T.Dict[str, T.Tuple[str, T.Optional[str]]], 'ConfigurationData']) -> T.Tuple[str, T.Set[str]]:
12801280
missing_variables: T.Set[str] = set()
12811281

1282-
def variable_replace(match: T.Match[str]) -> str:
1283-
# Pairs of escape characters before '@', '\@', '${' or '\${'
1284-
if match.group(0).endswith('\\'):
1285-
num_escapes = match.end(0) - match.start(0)
1286-
return '\\' * (num_escapes // 2)
1287-
# Handle cmake escaped \${} tags
1288-
elif not at_only and match.group(0) == '\\${':
1289-
return '${'
1290-
# \@escaped\@ variables
1291-
elif match.groupdict().get('escaped') is not None:
1292-
return match.group('escaped')[1:-2]+'@'
1282+
character_regex = re.compile(r'''
1283+
[^a-zA-Z0-9_/.+\-]
1284+
''', re.VERBOSE)
1285+
1286+
def variable_get(varname: str) -> str:
1287+
var_str = ''
1288+
if varname in confdata:
1289+
var, _ = confdata.get(varname)
1290+
if isinstance(var, str):
1291+
var_str = var
1292+
elif isinstance(var, bool):
1293+
var_str = str(int(var))
1294+
elif isinstance(var, int):
1295+
var_str = str(var)
1296+
else:
1297+
msg = f'Tried to replace variable {varname!r} value with ' \
1298+
f'something other than a string or int: {var!r}'
1299+
raise MesonException(msg)
12931300
else:
1294-
# Template variable to be replaced
1295-
varname = match.group('variable')
1296-
if not varname:
1297-
varname = match.group('cmake_variable')
1298-
1299-
var_str = ''
1300-
if varname in confdata:
1301-
var, _ = confdata.get(varname)
1302-
if isinstance(var, str):
1303-
var_str = var
1304-
elif isinstance(var, bool):
1305-
var_str = str(int(var))
1306-
elif isinstance(var, int):
1307-
var_str = str(var)
1308-
else:
1309-
msg = f'Tried to replace variable {varname!r} value with ' \
1310-
f'something other than a string or int: {var!r}'
1301+
missing_variables.add(varname)
1302+
return var_str
1303+
1304+
def parse_line(line: str) -> str:
1305+
index = 0
1306+
while len(line) > index:
1307+
if line[index] == '@':
1308+
next_at = line.find("@", index+1)
1309+
if next_at > index+1:
1310+
varname = line[index+1:next_at]
1311+
match = character_regex.search(varname)
1312+
1313+
# at substituion doesn't occur if they key isn't valid
1314+
# however it also doesn't raise an error
1315+
if not match:
1316+
value = variable_get(varname)
1317+
line = line[:index] + value + line[next_at+1:]
1318+
1319+
elif not at_only and line[index:index+2] == '${':
1320+
bracket_count = 1
1321+
end_bracket = index + 2
1322+
try:
1323+
while bracket_count > 0:
1324+
if line[end_bracket:end_bracket+2] == "${":
1325+
end_bracket += 2
1326+
bracket_count += 1
1327+
elif line[end_bracket] == "}":
1328+
end_bracket += 1
1329+
bracket_count -= 1
1330+
elif line[end_bracket] in {"@", "\n"}:
1331+
# these aren't valid variable characters
1332+
# but they are inconsequential at this point
1333+
end_bracket += 1
1334+
elif character_regex.search(line[end_bracket]):
1335+
invalid_character = line[end_bracket]
1336+
variable = line[index+2:end_bracket]
1337+
msg = f'Found invalid character {invalid_character!r}' \
1338+
f' in variable {variable!r}'
1339+
raise MesonException(msg)
1340+
else:
1341+
end_bracket += 1
1342+
except IndexError:
1343+
msg = f'Found incomplete variable {line[index:-1]!r}'
13111344
raise MesonException(msg)
1312-
else:
1313-
missing_variables.add(varname)
1314-
return var_str
1315-
return re.sub(regex, variable_replace, line), missing_variables
1345+
1346+
if bracket_count == 0:
1347+
varname = parse_line(line[index+2:end_bracket-1])
1348+
match = character_regex.search(varname)
1349+
if match:
1350+
invalid_character = line[end_bracket-2]
1351+
variable = line[index+2:end_bracket-3]
1352+
msg = f'Found invalid character {invalid_character!r}' \
1353+
f' in variable {variable!r}'
1354+
raise MesonException(msg)
1355+
1356+
value = variable_get(varname)
1357+
line = line[:index] + value + line[end_bracket:]
1358+
1359+
index += 1
1360+
1361+
return line
1362+
1363+
return parse_line(line), missing_variables
13161364

13171365
def do_define_meson(regex: T.Pattern[str], line: str, confdata: 'ConfigurationData',
13181366
subproject: T.Optional[SubProject] = None) -> str:
@@ -1341,7 +1389,7 @@ def do_define_meson(regex: T.Pattern[str], line: str, confdata: 'ConfigurationDa
13411389
else:
13421390
raise MesonException('#mesondefine argument "%s" is of unknown type.' % varname)
13431391

1344-
def do_define_cmake(regex: T.Pattern[str], line: str, confdata: 'ConfigurationData', at_only: bool,
1392+
def do_define_cmake(line: str, confdata: 'ConfigurationData', at_only: bool,
13451393
subproject: T.Optional[SubProject] = None) -> str:
13461394
cmake_bool_define = 'cmakedefine01' in line
13471395

@@ -1381,12 +1429,12 @@ def get_cmake_define(line: str, confdata: 'ConfigurationData') -> str:
13811429

13821430
result = get_cmake_define(line, confdata)
13831431
result = f'#define {varname} {result}'.strip() + '\n'
1384-
result, _ = do_replacement_cmake(regex, result, at_only, confdata)
1432+
result, _ = do_replacement_cmake(result, at_only, confdata)
13851433
return result
13861434

13871435
def get_variable_regex(variable_format: Literal['meson', 'cmake', 'cmake@'] = 'meson') -> T.Pattern[str]:
13881436
# Only allow (a-z, A-Z, 0-9, _, -) as valid characters for a define
1389-
if variable_format in {'meson', 'cmake@'}:
1437+
if variable_format == 'meson':
13901438
# Also allow escaping pairs of '@' with '\@'
13911439
regex = re.compile(r'''
13921440
(?:\\\\)+(?=\\?@) # Matches multiple backslashes followed by an @ symbol
@@ -1395,17 +1443,13 @@ def get_variable_regex(variable_format: Literal['meson', 'cmake', 'cmake@'] = 'm
13951443
| # OR
13961444
(?P<escaped>\\@[-a-zA-Z0-9_]+\\@) # Match an escaped variable enclosed in @ symbols
13971445
''', re.VERBOSE)
1398-
else:
1446+
elif variable_format == 'cmake@':
13991447
regex = re.compile(r'''
1400-
(?:\\\\)+(?=\\?(\$|@)) # Match multiple backslashes followed by a dollar sign or an @ symbol
1401-
| # OR
1402-
\\\${ # Match a backslash followed by a dollar sign and an opening curly brace
1403-
| # OR
1404-
\${(?P<cmake_variable>[-a-zA-Z0-9_]*)} # Match a variable enclosed in curly braces and capture the variable name
1405-
| # OR
14061448
(?<!\\)@(?P<variable>[-a-zA-Z0-9_]+)@ # Match a variable enclosed in @ symbols and capture the variable name; no matches beginning with '\@'
1407-
| # OR
1408-
(?P<escaped>\\@[-a-zA-Z0-9_]+\\@) # Match an escaped variable enclosed in @ symbols
1449+
''', re.VERBOSE)
1450+
elif variable_format == "cmake":
1451+
regex = re.compile(r'''
1452+
\${(?P<variable>[-a-zA-Z0-9_]*)} # Match a variable enclosed in curly braces and capture the variable name
14091453
''', re.VERBOSE)
14101454
return regex
14111455

@@ -1453,8 +1497,6 @@ def do_conf_str_cmake(src: str, data: T.List[str], confdata: 'ConfigurationData'
14531497
if at_only:
14541498
variable_format = 'cmake@'
14551499

1456-
regex = get_variable_regex(variable_format)
1457-
14581500
search_token = 'cmakedefine'
14591501

14601502
result: T.List[str] = []
@@ -1466,11 +1508,12 @@ def do_conf_str_cmake(src: str, data: T.List[str], confdata: 'ConfigurationData'
14661508
stripped_line = line.lstrip()
14671509
if len(stripped_line) >= 2 and stripped_line[0] == '#' and stripped_line[1:].lstrip().startswith(search_token):
14681510
confdata_useless = False
1469-
line = do_define_cmake(regex, line, confdata, at_only, subproject)
1511+
1512+
line = do_define_cmake(line, confdata, at_only, subproject)
14701513
else:
14711514
if '#mesondefine' in line:
14721515
raise MesonException(f'Format error in {src}: saw "{line.strip()}" when format set to "{variable_format}"')
1473-
line, missing = do_replacement_cmake(regex, line, at_only, confdata)
1516+
line, missing = do_replacement_cmake(line, at_only, confdata)
14741517
missing_variables.update(missing)
14751518
if missing:
14761519
confdata_useless = False
Lines changed: 6 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,11 @@
1-
/* No escape */
1+
/* cmake substitions cannot be escaped */
22
#define MESSAGE1 "${var1}"
3-
4-
/* Single escape means no replace */
53
#define MESSAGE2 "\${var1}"
6-
7-
/* Replace pairs of escapes before '@' or '\@' with escape characters
8-
* (note we have to double number of pairs due to C string escaping)
9-
*/
104
#define MESSAGE3 "\\\\${var1}"
11-
12-
/* Pairs of escapes and then single escape to avoid replace */
135
#define MESSAGE4 "\\\\\${var1}"
6+
#define MESSAGE5 "@var1@"
7+
#define MESSAGE6 "\\@var1@"
8+
#define MESSAGE7 "\\\\@var1@"
149

15-
/* Check escape character outside variables */
16-
#define MESSAGE5 "\\ ${ \${ \\\\${ \\\\\${"
10+
/* backslash is an invalid variable character */
11+
#define MESSAGE8 "@var1\@"

test cases/common/14 configure file/prog7.c

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,12 @@
33

44
int main(void) {
55
return strcmp(MESSAGE1, "foo")
6-
|| strcmp(MESSAGE2, "${var1}")
7-
|| strcmp(MESSAGE3, "\\foo")
8-
|| strcmp(MESSAGE4, "\\${var1}")
9-
|| strcmp(MESSAGE5, "\\ ${ ${ \\${ \\${");
6+
|| strcmp(MESSAGE2, "\foo")
7+
|| strcmp(MESSAGE3, "\\\\foo")
8+
|| strcmp(MESSAGE4, "\\\\\foo")
9+
|| strcmp(MESSAGE5, "foo")
10+
|| strcmp(MESSAGE6, "\\foo")
11+
|| strcmp(MESSAGE7, "\\\\foo")
12+
|| strcmp(MESSAGE8, "@var1\@")
13+
|| 0;
1014
}

0 commit comments

Comments
 (0)