@@ -1244,6 +1244,252 @@ class BibliographyEntry:
12441244 return authors
12451245
12461246
1247+ def update_via_simpasm (
1248+ infile_full ,
1249+ outdir ,
1250+ outfile = None ,
1251+ cflags = None ,
1252+ preserve_header = True ,
1253+ dry_run = False ,
1254+ force_cross = False ,
1255+ ):
1256+ status_update ("simpasm" , infile_full )
1257+
1258+ _ , infile = os .path .split (infile_full )
1259+ if outfile is None :
1260+ outfile = infile
1261+ outfile_full = os .path .join (outdir , outfile )
1262+
1263+ # Check if we need to use a cross-compiler
1264+ if "aarch64" in infile_full :
1265+ source_arch = "aarch64"
1266+ elif "x86_64" in infile_full :
1267+ source_arch = "x86_64"
1268+ else :
1269+ raise Exception (f"Could not detect architecture of source file { infile_full } ." )
1270+ # Check native architecture
1271+ if platform .machine ().lower () in ["arm64" , "aarch64" ]:
1272+ native_arch = "aarch64"
1273+ else :
1274+ native_arch = "x86_64"
1275+
1276+ if native_arch != source_arch :
1277+ cross_prefix = f"{ source_arch } -unknown-linux-gnu-"
1278+ cross_gcc = cross_prefix + "gcc"
1279+ # Check if cross-compiler is present
1280+ if shutil .which (cross_gcc ) is None :
1281+ if force_cross is False :
1282+ return
1283+ raise Exception (f"Could not find cross toolchain { cross_prefix } " )
1284+ else :
1285+ cross_prefix = None
1286+
1287+ with tempfile .NamedTemporaryFile (suffix = ".S" ) as tmp :
1288+ try :
1289+ # Determine architecture from filename
1290+ arch = "aarch64" if "aarch64" in infile_full else "x86_64"
1291+
1292+ # TODO: Temporary remvoe the "--cfify", add back when CFI script added.
1293+ cmd = [
1294+ "./scripts/simpasm" ,
1295+ "--objdump=llvm-objdump" ,
1296+ # "--cfify",
1297+ "--arch=" + arch ,
1298+ "-i" ,
1299+ infile_full ,
1300+ "-o" ,
1301+ tmp .name ,
1302+ ]
1303+ if cross_prefix is not None :
1304+ # Stick with llvm-objdump for disassembly
1305+ cmd += ["--cc" , cross_prefix + "gcc" ]
1306+ cmd += ["--nm" , cross_prefix + "nm" ]
1307+ if cflags is not None :
1308+ cmd += [f'--cflags="{ cflags } "' ]
1309+ if preserve_header is True :
1310+ cmd += ["-p" ]
1311+ r = subprocess .run (
1312+ cmd ,
1313+ stdout = subprocess .DEVNULL ,
1314+ stderr = subprocess .PIPE ,
1315+ check = True ,
1316+ text = True ,
1317+ )
1318+ except subprocess .CalledProcessError as e :
1319+ print (f"Command failed: { ' ' .join (cmd )} " )
1320+ print (f"Exit code: { e .returncode } " )
1321+ print (f"stderr: { e .stderr } " )
1322+ raise Exception ("Failed to run simpasm" ) from e
1323+ tmp .seek (0 )
1324+ new_contents = tmp .read ().decode ()
1325+
1326+ update_file (outfile_full , new_contents , dry_run = dry_run )
1327+
1328+
1329+ def update_via_copy (infile_full , outfile_full , dry_run = False , transform = None ):
1330+ status_update ("copy" , f"{ infile_full } -> { outfile_full } " )
1331+
1332+ with open (infile_full , "r" ) as f :
1333+ content = f .read ()
1334+
1335+ if transform is not None :
1336+ content = transform (content )
1337+
1338+ update_file (outfile_full , content , dry_run = dry_run )
1339+
1340+
1341+ def update_via_remove (filename , dry_run = False ):
1342+ if dry_run is True :
1343+ print (
1344+ f"Autogenerated file { filename } needs removing. Have you called scripts/autogen?" ,
1345+ file = sys .stderr ,
1346+ )
1347+ exit (1 )
1348+
1349+ # Remove the file
1350+ os .remove (filename )
1351+
1352+
1353+ def synchronize_file (
1354+ f , in_dir , out_dir , dry_run = False , delete = False , no_simplify = False , ** kwargs
1355+ ):
1356+
1357+ # Only synchronize sources, but not README.md, Makefile and so on
1358+ extensions = (".c" , ".h" , ".i" , ".inc" , ".S" )
1359+
1360+ if not f .endswith (extensions ):
1361+ return None
1362+
1363+ basename = os .path .basename (f )
1364+
1365+ if delete is True :
1366+ return basename
1367+
1368+ if no_simplify is False and f .endswith (".S" ):
1369+ update_via_simpasm (f , out_dir , dry_run = dry_run , ** kwargs )
1370+ else :
1371+ # Update via copy
1372+ _ , infile = os .path .split (f )
1373+ outfile_full = os .path .join (out_dir , infile )
1374+ # The header guards will also be checked later, but if we
1375+ # don't do it here, the dry-run would fail because of a
1376+ # mismatching intermediate file
1377+ if f .endswith (".h" ):
1378+ transform = lambda c : adjust_header_guard_for_filename (c , outfile_full )
1379+ else :
1380+ transform = None
1381+ update_via_copy (f , outfile_full , dry_run = dry_run , transform = transform )
1382+
1383+ return basename
1384+
1385+
1386+ def synchronize_backend (
1387+ in_dir , out_dir , dry_run = False , delete = False , no_simplify = False , ** kwargs
1388+ ):
1389+ copied = []
1390+
1391+ with ThreadPoolExecutor () as executor :
1392+ pool_results = list (
1393+ executor .map (
1394+ partial (
1395+ synchronize_file ,
1396+ in_dir = in_dir ,
1397+ out_dir = out_dir ,
1398+ dry_run = dry_run ,
1399+ delete = delete ,
1400+ no_simplify = no_simplify ,
1401+ ** kwargs ,
1402+ ),
1403+ get_files (os .path .join (in_dir , "*" )),
1404+ )
1405+ )
1406+
1407+ copied = [r for r in pool_results if r is not None ]
1408+
1409+ if delete is False :
1410+ return
1411+
1412+ # Check for files in the target directory that have not been copied
1413+ for f in get_files (os .path .join (out_dir , "*" )):
1414+ if os .path .basename (f ) in copied :
1415+ continue
1416+ # Otherwise, remove it
1417+ update_via_remove (f , dry_run = dry_run )
1418+
1419+
1420+ def synchronize_backends (
1421+ * , dry_run = False , force_cross = False , clean = False , delete = False , no_simplify = False
1422+ ):
1423+ if clean is False :
1424+ ty = "opt"
1425+ else :
1426+ ty = "clean"
1427+
1428+ if delete is False :
1429+ # We may switch the AArch64 arithmetic backend, so adjust the metadata file
1430+ update_via_copy (
1431+ f"dev/aarch64_{ ty } /meta.h" ,
1432+ "mldsa/native/aarch64/meta.h" ,
1433+ dry_run = dry_run ,
1434+ transform = lambda c : adjust_header_guard_for_filename (
1435+ c , "mldsa/native/aarch64/meta.h"
1436+ ),
1437+ )
1438+
1439+ update_via_copy (
1440+ f"dev/x86_64/meta.h" ,
1441+ "mldsa/native/x86_64/meta.h" ,
1442+ dry_run = dry_run ,
1443+ transform = lambda c : adjust_header_guard_for_filename (
1444+ c , "mldsa/native/x86_64/meta.h"
1445+ ),
1446+ )
1447+
1448+ synchronize_backend (
1449+ f"dev/aarch64_{ ty } /src" ,
1450+ "mldsa/native/aarch64/src" ,
1451+ dry_run = dry_run ,
1452+ delete = delete ,
1453+ force_cross = force_cross ,
1454+ no_simplify = no_simplify ,
1455+ cflags = "-Imldsa/native/aarch64/src" ,
1456+ )
1457+ synchronize_backend (
1458+ "dev/fips202/aarch64/src" ,
1459+ "mldsa/fips202/native/aarch64/src" ,
1460+ dry_run = dry_run ,
1461+ delete = delete ,
1462+ force_cross = force_cross ,
1463+ no_simplify = no_simplify ,
1464+ cflags = "-Imldsa/fips202/native/aarch64/src -march=armv8.4-a+sha3" ,
1465+ )
1466+ synchronize_backend (
1467+ "dev/fips202/aarch64" ,
1468+ "mldsa/fips202/native/aarch64" ,
1469+ dry_run = dry_run ,
1470+ delete = delete ,
1471+ force_cross = force_cross ,
1472+ no_simplify = no_simplify ,
1473+ cflags = "-Imldsa/fips202/native/aarch64 -march=armv8.4-a+sha3" ,
1474+ )
1475+ synchronize_backend (
1476+ "dev/x86_64/src" ,
1477+ "mldsa/native/x86_64/src" ,
1478+ dry_run = dry_run ,
1479+ delete = delete ,
1480+ force_cross = force_cross ,
1481+ no_simplify = no_simplify ,
1482+ # Turn off control-flow protection (CET) explicitly. Newer versions of
1483+ # clang turn it on by default and insert endbr64 instructions at every
1484+ # global symbol.
1485+ # We insert endbr64 instruction manually via the MLD_ASM_FN_SYMBOL
1486+ # macro.
1487+ # This leads to duplicate endbr64 instructions causing a failure when
1488+ # comparing the object code before and after simplification.
1489+ cflags = "-Imldsa/native/x86_64/src/ -mavx2 -mbmi2 -msse4 -fcf-protection=none" ,
1490+ )
1491+
1492+
12471493def gen_markdown_citations_for (filename , bibliography , dry_run = False ):
12481494
12491495 # Skip BIBLIOGRAPHY.md
@@ -1473,6 +1719,9 @@ def _main():
14731719 formatter_class = argparse .ArgumentDefaultsHelpFormatter
14741720 )
14751721 parser .add_argument ("--dry-run" , default = False , action = "store_true" )
1722+ parser .add_argument ("--aarch64-clean" , default = True , action = "store_true" )
1723+ parser .add_argument ("--no-simplify" , default = False , action = "store_true" )
1724+ parser .add_argument ("--force-cross" , default = False , action = "store_true" )
14761725
14771726 args = parser .parse_args ()
14781727
@@ -1489,11 +1738,29 @@ def _main():
14891738 gen_avx2_zeta_file (args .dry_run )
14901739 gen_avx2_rej_uniform_table (args .dry_run )
14911740 high_level_status ("Generated zeta and lookup tables" )
1741+
1742+ synchronize_backends (
1743+ dry_run = args .dry_run ,
1744+ clean = args .aarch64_clean ,
1745+ no_simplify = args .no_simplify ,
1746+ force_cross = args .force_cross ,
1747+ )
1748+ high_level_status ("Synchronized backends" )
1749+
14921750 gen_header_guards (args .dry_run )
14931751 high_level_status ("Generated header guards" )
14941752 gen_preprocessor_comments (args .dry_run )
14951753 high_level_status ("Generated preprocessor comments" )
14961754
1755+ synchronize_backends (
1756+ dry_run = args .dry_run ,
1757+ clean = args .aarch64_clean ,
1758+ delete = True ,
1759+ force_cross = args .force_cross ,
1760+ no_simplify = args .no_simplify ,
1761+ )
1762+ high_level_status ("Completed final backend synchronization" )
1763+
14971764
14981765if __name__ == "__main__" :
14991766 _main ()
0 commit comments