Skip to content

Commit 21223b2

Browse files
committed
Port simpasm from mlkem-native to mldsa-native
- This commit ports the `simpasm` script and related functions in `autogen` from `mlkem-native` to `mldsa-native`. - Added the `simpasm` job in `base.yml` and changed the `nix-shell` from `ci` to `ci-cross` in `ci.yml` for ASM simplification. - Since the CFI scripts have not yet been ported to `mldsa-native`, the `--cfify` argument has been removed temporary. A TODO comment has been added and will be restored once the CFI scripts are ported. Signed-off-by: willieyz <[email protected]>
1 parent 03a184c commit 21223b2

File tree

4 files changed

+744
-2
lines changed

4 files changed

+744
-2
lines changed

.github/workflows/base.yml

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -218,6 +218,34 @@ jobs:
218218
- name: make lib
219219
run: |
220220
make lib
221+
simpasm:
222+
strategy:
223+
fail-fast: false
224+
matrix:
225+
backend:
226+
- arg: '--aarch64-clean'
227+
name: Clean
228+
# TODO: add backend option after we have optimized/clean seperation
229+
# - arg: ''
230+
# name: Optimized
231+
simplify:
232+
- arg: ''
233+
name: Simplified
234+
- arg: '--no-simplify'
235+
name: Unmodified
236+
runs-on: pqcp-arm64
237+
name: AArch64 dev backend (${{ matrix.simplify.name }})
238+
steps:
239+
- uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
240+
- name: Reinstate and test backend
241+
uses: ./.github/actions/setup-shell
242+
with:
243+
nix-shell: 'ci'
244+
gh_token: ${{ secrets.GITHUB_TOKEN }}
245+
script: |
246+
./scripts/autogen ${{ matrix.simplify.arg }}
247+
make clean
248+
OPT=1 make quickcheck
221249
scan-build:
222250
strategy:
223251
fail-fast: false

.github/workflows/ci.yml

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -461,7 +461,8 @@ jobs:
461461
- uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
462462
- uses: ./.github/actions/setup-shell
463463
with:
464-
nix-shell: 'ci'
464+
nix-shell: 'ci-cross' # Need cross-compiler for ASM simplification
465+
nix-cache: 'true'
465466
gh_token: ${{ secrets.GITHUB_TOKEN }}
466467
script: |
467-
python3 ./scripts/autogen --dry-run
468+
python3 ./scripts/autogen --dry-run --force-cross

scripts/autogen

Lines changed: 267 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1244,6 +1244,252 @@ class BibliographyEntry:
12441244
return authors
12451245

12461246

1247+
def update_via_simpasm(
1248+
infile_full,
1249+
outdir,
1250+
outfile=None,
1251+
cflags=None,
1252+
preserve_header=True,
1253+
dry_run=False,
1254+
force_cross=False,
1255+
):
1256+
status_update("simpasm", infile_full)
1257+
1258+
_, infile = os.path.split(infile_full)
1259+
if outfile is None:
1260+
outfile = infile
1261+
outfile_full = os.path.join(outdir, outfile)
1262+
1263+
# Check if we need to use a cross-compiler
1264+
if "aarch64" in infile_full:
1265+
source_arch = "aarch64"
1266+
elif "x86_64" in infile_full:
1267+
source_arch = "x86_64"
1268+
else:
1269+
raise Exception(f"Could not detect architecture of source file {infile_full}.")
1270+
# Check native architecture
1271+
if platform.machine().lower() in ["arm64", "aarch64"]:
1272+
native_arch = "aarch64"
1273+
else:
1274+
native_arch = "x86_64"
1275+
1276+
if native_arch != source_arch:
1277+
cross_prefix = f"{source_arch}-unknown-linux-gnu-"
1278+
cross_gcc = cross_prefix + "gcc"
1279+
# Check if cross-compiler is present
1280+
if shutil.which(cross_gcc) is None:
1281+
if force_cross is False:
1282+
return
1283+
raise Exception(f"Could not find cross toolchain {cross_prefix}")
1284+
else:
1285+
cross_prefix = None
1286+
1287+
with tempfile.NamedTemporaryFile(suffix=".S") as tmp:
1288+
try:
1289+
# Determine architecture from filename
1290+
arch = "aarch64" if "aarch64" in infile_full else "x86_64"
1291+
1292+
# TODO: Temporary remvoe the "--cfify", add back when CFI script added.
1293+
cmd = [
1294+
"./scripts/simpasm",
1295+
"--objdump=llvm-objdump",
1296+
# "--cfify",
1297+
"--arch=" + arch,
1298+
"-i",
1299+
infile_full,
1300+
"-o",
1301+
tmp.name,
1302+
]
1303+
if cross_prefix is not None:
1304+
# Stick with llvm-objdump for disassembly
1305+
cmd += ["--cc", cross_prefix + "gcc"]
1306+
cmd += ["--nm", cross_prefix + "nm"]
1307+
if cflags is not None:
1308+
cmd += [f'--cflags="{cflags}"']
1309+
if preserve_header is True:
1310+
cmd += ["-p"]
1311+
r = subprocess.run(
1312+
cmd,
1313+
stdout=subprocess.DEVNULL,
1314+
stderr=subprocess.PIPE,
1315+
check=True,
1316+
text=True,
1317+
)
1318+
except subprocess.CalledProcessError as e:
1319+
print(f"Command failed: {' '.join(cmd)}")
1320+
print(f"Exit code: {e.returncode}")
1321+
print(f"stderr: {e.stderr}")
1322+
raise Exception("Failed to run simpasm") from e
1323+
tmp.seek(0)
1324+
new_contents = tmp.read().decode()
1325+
1326+
update_file(outfile_full, new_contents, dry_run=dry_run)
1327+
1328+
1329+
def update_via_copy(infile_full, outfile_full, dry_run=False, transform=None):
1330+
status_update("copy", f"{infile_full} -> {outfile_full}")
1331+
1332+
with open(infile_full, "r") as f:
1333+
content = f.read()
1334+
1335+
if transform is not None:
1336+
content = transform(content)
1337+
1338+
update_file(outfile_full, content, dry_run=dry_run)
1339+
1340+
1341+
def update_via_remove(filename, dry_run=False):
1342+
if dry_run is True:
1343+
print(
1344+
f"Autogenerated file {filename} needs removing. Have you called scripts/autogen?",
1345+
file=sys.stderr,
1346+
)
1347+
exit(1)
1348+
1349+
# Remove the file
1350+
os.remove(filename)
1351+
1352+
1353+
def synchronize_file(
1354+
f, in_dir, out_dir, dry_run=False, delete=False, no_simplify=False, **kwargs
1355+
):
1356+
1357+
# Only synchronize sources, but not README.md, Makefile and so on
1358+
extensions = (".c", ".h", ".i", ".inc", ".S")
1359+
1360+
if not f.endswith(extensions):
1361+
return None
1362+
1363+
basename = os.path.basename(f)
1364+
1365+
if delete is True:
1366+
return basename
1367+
1368+
if no_simplify is False and f.endswith(".S"):
1369+
update_via_simpasm(f, out_dir, dry_run=dry_run, **kwargs)
1370+
else:
1371+
# Update via copy
1372+
_, infile = os.path.split(f)
1373+
outfile_full = os.path.join(out_dir, infile)
1374+
# The header guards will also be checked later, but if we
1375+
# don't do it here, the dry-run would fail because of a
1376+
# mismatching intermediate file
1377+
if f.endswith(".h"):
1378+
transform = lambda c: adjust_header_guard_for_filename(c, outfile_full)
1379+
else:
1380+
transform = None
1381+
update_via_copy(f, outfile_full, dry_run=dry_run, transform=transform)
1382+
1383+
return basename
1384+
1385+
1386+
def synchronize_backend(
1387+
in_dir, out_dir, dry_run=False, delete=False, no_simplify=False, **kwargs
1388+
):
1389+
copied = []
1390+
1391+
with ThreadPoolExecutor() as executor:
1392+
pool_results = list(
1393+
executor.map(
1394+
partial(
1395+
synchronize_file,
1396+
in_dir=in_dir,
1397+
out_dir=out_dir,
1398+
dry_run=dry_run,
1399+
delete=delete,
1400+
no_simplify=no_simplify,
1401+
**kwargs,
1402+
),
1403+
get_files(os.path.join(in_dir, "*")),
1404+
)
1405+
)
1406+
1407+
copied = [r for r in pool_results if r is not None]
1408+
1409+
if delete is False:
1410+
return
1411+
1412+
# Check for files in the target directory that have not been copied
1413+
for f in get_files(os.path.join(out_dir, "*")):
1414+
if os.path.basename(f) in copied:
1415+
continue
1416+
# Otherwise, remove it
1417+
update_via_remove(f, dry_run=dry_run)
1418+
1419+
1420+
def synchronize_backends(
1421+
*, dry_run=False, force_cross=False, clean=False, delete=False, no_simplify=False
1422+
):
1423+
if clean is False:
1424+
ty = "opt"
1425+
else:
1426+
ty = "clean"
1427+
1428+
if delete is False:
1429+
# We may switch the AArch64 arithmetic backend, so adjust the metadata file
1430+
update_via_copy(
1431+
f"dev/aarch64_{ty}/meta.h",
1432+
"mldsa/native/aarch64/meta.h",
1433+
dry_run=dry_run,
1434+
transform=lambda c: adjust_header_guard_for_filename(
1435+
c, "mldsa/native/aarch64/meta.h"
1436+
),
1437+
)
1438+
1439+
update_via_copy(
1440+
f"dev/x86_64/meta.h",
1441+
"mldsa/native/x86_64/meta.h",
1442+
dry_run=dry_run,
1443+
transform=lambda c: adjust_header_guard_for_filename(
1444+
c, "mldsa/native/x86_64/meta.h"
1445+
),
1446+
)
1447+
1448+
synchronize_backend(
1449+
f"dev/aarch64_{ty}/src",
1450+
"mldsa/native/aarch64/src",
1451+
dry_run=dry_run,
1452+
delete=delete,
1453+
force_cross=force_cross,
1454+
no_simplify=no_simplify,
1455+
cflags="-Imldsa/native/aarch64/src",
1456+
)
1457+
synchronize_backend(
1458+
"dev/fips202/aarch64/src",
1459+
"mldsa/fips202/native/aarch64/src",
1460+
dry_run=dry_run,
1461+
delete=delete,
1462+
force_cross=force_cross,
1463+
no_simplify=no_simplify,
1464+
cflags="-Imldsa/fips202/native/aarch64/src -march=armv8.4-a+sha3",
1465+
)
1466+
synchronize_backend(
1467+
"dev/fips202/aarch64",
1468+
"mldsa/fips202/native/aarch64",
1469+
dry_run=dry_run,
1470+
delete=delete,
1471+
force_cross=force_cross,
1472+
no_simplify=no_simplify,
1473+
cflags="-Imldsa/fips202/native/aarch64 -march=armv8.4-a+sha3",
1474+
)
1475+
synchronize_backend(
1476+
"dev/x86_64/src",
1477+
"mldsa/native/x86_64/src",
1478+
dry_run=dry_run,
1479+
delete=delete,
1480+
force_cross=force_cross,
1481+
no_simplify=no_simplify,
1482+
# Turn off control-flow protection (CET) explicitly. Newer versions of
1483+
# clang turn it on by default and insert endbr64 instructions at every
1484+
# global symbol.
1485+
# We insert endbr64 instruction manually via the MLD_ASM_FN_SYMBOL
1486+
# macro.
1487+
# This leads to duplicate endbr64 instructions causing a failure when
1488+
# comparing the object code before and after simplification.
1489+
cflags="-Imldsa/native/x86_64/src/ -mavx2 -mbmi2 -msse4 -fcf-protection=none",
1490+
)
1491+
1492+
12471493
def gen_markdown_citations_for(filename, bibliography, dry_run=False):
12481494

12491495
# Skip BIBLIOGRAPHY.md
@@ -1473,6 +1719,9 @@ def _main():
14731719
formatter_class=argparse.ArgumentDefaultsHelpFormatter
14741720
)
14751721
parser.add_argument("--dry-run", default=False, action="store_true")
1722+
parser.add_argument("--aarch64-clean", default=True, action="store_true")
1723+
parser.add_argument("--no-simplify", default=False, action="store_true")
1724+
parser.add_argument("--force-cross", default=False, action="store_true")
14761725

14771726
args = parser.parse_args()
14781727

@@ -1489,11 +1738,29 @@ def _main():
14891738
gen_avx2_zeta_file(args.dry_run)
14901739
gen_avx2_rej_uniform_table(args.dry_run)
14911740
high_level_status("Generated zeta and lookup tables")
1741+
1742+
synchronize_backends(
1743+
dry_run=args.dry_run,
1744+
clean=args.aarch64_clean,
1745+
no_simplify=args.no_simplify,
1746+
force_cross=args.force_cross,
1747+
)
1748+
high_level_status("Synchronized backends")
1749+
14921750
gen_header_guards(args.dry_run)
14931751
high_level_status("Generated header guards")
14941752
gen_preprocessor_comments(args.dry_run)
14951753
high_level_status("Generated preprocessor comments")
14961754

1755+
synchronize_backends(
1756+
dry_run=args.dry_run,
1757+
clean=args.aarch64_clean,
1758+
delete=True,
1759+
force_cross=args.force_cross,
1760+
no_simplify=args.no_simplify,
1761+
)
1762+
high_level_status("Completed final backend synchronization")
1763+
14971764

14981765
if __name__ == "__main__":
14991766
_main()

0 commit comments

Comments
 (0)