Skip to content

Commit 5c05903

Browse files
committed
Replace literal assignment with memcpy to avoid undefined behaviour.
Fixes #141
1 parent a815cd0 commit 5c05903

File tree

1 file changed

+104
-95
lines changed

1 file changed

+104
-95
lines changed

htscodecs/rANS_static32x16pr_neon.c

Lines changed: 104 additions & 95 deletions
Original file line numberDiff line numberDiff line change
@@ -1238,153 +1238,162 @@ static inline void transpose_and_copy(uint8_t *out, int iN[32],
12381238
// }
12391239

12401240
for (z = 0; z < NX; z+=4) {
1241-
*(uint64_t *)&out[iN[z]] =
1241+
uint64_t t0[4] = {
12421242
((uint64_t)(t[0][z])<< 0) +
12431243
((uint64_t)(t[1][z])<< 8) +
12441244
((uint64_t)(t[2][z])<<16) +
12451245
((uint64_t)(t[3][z])<<24) +
12461246
((uint64_t)(t[4][z])<<32) +
12471247
((uint64_t)(t[5][z])<<40) +
12481248
((uint64_t)(t[6][z])<<48) +
1249-
((uint64_t)(t[7][z])<<56);
1250-
*(uint64_t *)&out[iN[z+1]] =
1251-
((uint64_t)(t[0][z+1])<< 0) +
1252-
((uint64_t)(t[1][z+1])<< 8) +
1253-
((uint64_t)(t[2][z+1])<<16) +
1254-
((uint64_t)(t[3][z+1])<<24) +
1255-
((uint64_t)(t[4][z+1])<<32) +
1256-
((uint64_t)(t[5][z+1])<<40) +
1257-
((uint64_t)(t[6][z+1])<<48) +
1258-
((uint64_t)(t[7][z+1])<<56);
1259-
*(uint64_t *)&out[iN[z+2]] =
1260-
((uint64_t)(t[0][z+2])<< 0) +
1261-
((uint64_t)(t[1][z+2])<< 8) +
1262-
((uint64_t)(t[2][z+2])<<16) +
1263-
((uint64_t)(t[3][z+2])<<24) +
1264-
((uint64_t)(t[4][z+2])<<32) +
1265-
((uint64_t)(t[5][z+2])<<40) +
1266-
((uint64_t)(t[6][z+2])<<48) +
1267-
((uint64_t)(t[7][z+2])<<56);
1268-
*(uint64_t *)&out[iN[z+3]] =
1269-
((uint64_t)(t[0][z+3])<< 0) +
1270-
((uint64_t)(t[1][z+3])<< 8) +
1271-
((uint64_t)(t[2][z+3])<<16) +
1272-
((uint64_t)(t[3][z+3])<<24) +
1273-
((uint64_t)(t[4][z+3])<<32) +
1274-
((uint64_t)(t[5][z+3])<<40) +
1275-
((uint64_t)(t[6][z+3])<<48) +
1276-
((uint64_t)(t[7][z+3])<<56);
1249+
((uint64_t)(t[7][z])<<56),
12771250

1278-
*(uint64_t *)&out[iN[z]+8] =
12791251
((uint64_t)(t[8+0][z])<< 0) +
12801252
((uint64_t)(t[8+1][z])<< 8) +
12811253
((uint64_t)(t[8+2][z])<<16) +
12821254
((uint64_t)(t[8+3][z])<<24) +
12831255
((uint64_t)(t[8+4][z])<<32) +
12841256
((uint64_t)(t[8+5][z])<<40) +
12851257
((uint64_t)(t[8+6][z])<<48) +
1286-
((uint64_t)(t[8+7][z])<<56);
1287-
*(uint64_t *)&out[iN[z+1]+8] =
1288-
((uint64_t)(t[8+0][z+1])<< 0) +
1289-
((uint64_t)(t[8+1][z+1])<< 8) +
1290-
((uint64_t)(t[8+2][z+1])<<16) +
1291-
((uint64_t)(t[8+3][z+1])<<24) +
1292-
((uint64_t)(t[8+4][z+1])<<32) +
1293-
((uint64_t)(t[8+5][z+1])<<40) +
1294-
((uint64_t)(t[8+6][z+1])<<48) +
1295-
((uint64_t)(t[8+7][z+1])<<56);
1296-
*(uint64_t *)&out[iN[z+2]+8] =
1297-
((uint64_t)(t[8+0][z+2])<< 0) +
1298-
((uint64_t)(t[8+1][z+2])<< 8) +
1299-
((uint64_t)(t[8+2][z+2])<<16) +
1300-
((uint64_t)(t[8+3][z+2])<<24) +
1301-
((uint64_t)(t[8+4][z+2])<<32) +
1302-
((uint64_t)(t[8+5][z+2])<<40) +
1303-
((uint64_t)(t[8+6][z+2])<<48) +
1304-
((uint64_t)(t[8+7][z+2])<<56);
1305-
*(uint64_t *)&out[iN[z+3]+8] =
1306-
((uint64_t)(t[8+0][z+3])<< 0) +
1307-
((uint64_t)(t[8+1][z+3])<< 8) +
1308-
((uint64_t)(t[8+2][z+3])<<16) +
1309-
((uint64_t)(t[8+3][z+3])<<24) +
1310-
((uint64_t)(t[8+4][z+3])<<32) +
1311-
((uint64_t)(t[8+5][z+3])<<40) +
1312-
((uint64_t)(t[8+6][z+3])<<48) +
1313-
((uint64_t)(t[8+7][z+3])<<56);
1258+
((uint64_t)(t[8+7][z])<<56),
13141259

1315-
*(uint64_t *)&out[iN[z]+16] =
13161260
((uint64_t)(t[16+0][z])<< 0) +
13171261
((uint64_t)(t[16+1][z])<< 8) +
13181262
((uint64_t)(t[16+2][z])<<16) +
13191263
((uint64_t)(t[16+3][z])<<24) +
13201264
((uint64_t)(t[16+4][z])<<32) +
13211265
((uint64_t)(t[16+5][z])<<40) +
13221266
((uint64_t)(t[16+6][z])<<48) +
1323-
((uint64_t)(t[16+7][z])<<56);
1324-
*(uint64_t *)&out[iN[z+1]+16] =
1325-
((uint64_t)(t[16+0][z+1])<< 0) +
1326-
((uint64_t)(t[16+1][z+1])<< 8) +
1327-
((uint64_t)(t[16+2][z+1])<<16) +
1328-
((uint64_t)(t[16+3][z+1])<<24) +
1329-
((uint64_t)(t[16+4][z+1])<<32) +
1330-
((uint64_t)(t[16+5][z+1])<<40) +
1331-
((uint64_t)(t[16+6][z+1])<<48) +
1332-
((uint64_t)(t[16+7][z+1])<<56);
1333-
*(uint64_t *)&out[iN[z+2]+16] =
1334-
((uint64_t)(t[16+0][z+2])<< 0) +
1335-
((uint64_t)(t[16+1][z+2])<< 8) +
1336-
((uint64_t)(t[16+2][z+2])<<16) +
1337-
((uint64_t)(t[16+3][z+2])<<24) +
1338-
((uint64_t)(t[16+4][z+2])<<32) +
1339-
((uint64_t)(t[16+5][z+2])<<40) +
1340-
((uint64_t)(t[16+6][z+2])<<48) +
1341-
((uint64_t)(t[16+7][z+2])<<56);
1342-
*(uint64_t *)&out[iN[z+3]+16] =
1343-
((uint64_t)(t[16+0][z+3])<< 0) +
1344-
((uint64_t)(t[16+1][z+3])<< 8) +
1345-
((uint64_t)(t[16+2][z+3])<<16) +
1346-
((uint64_t)(t[16+3][z+3])<<24) +
1347-
((uint64_t)(t[16+4][z+3])<<32) +
1348-
((uint64_t)(t[16+5][z+3])<<40) +
1349-
((uint64_t)(t[16+6][z+3])<<48) +
1350-
((uint64_t)(t[16+7][z+3])<<56);
1267+
((uint64_t)(t[16+7][z])<<56),
13511268

1352-
*(uint64_t *)&out[iN[z]+24] =
13531269
((uint64_t)(t[24+0][z])<< 0) +
13541270
((uint64_t)(t[24+1][z])<< 8) +
13551271
((uint64_t)(t[24+2][z])<<16) +
13561272
((uint64_t)(t[24+3][z])<<24) +
13571273
((uint64_t)(t[24+4][z])<<32) +
13581274
((uint64_t)(t[24+5][z])<<40) +
13591275
((uint64_t)(t[24+6][z])<<48) +
1360-
((uint64_t)(t[24+7][z])<<56);
1361-
*(uint64_t *)&out[iN[z+1]+24] =
1276+
((uint64_t)(t[24+7][z])<<56)
1277+
};
1278+
memcpy(&out[iN[z]], &t0, 32);
1279+
1280+
uint64_t t1[4] = {
1281+
((uint64_t)(t[0][z+1])<< 0) +
1282+
((uint64_t)(t[1][z+1])<< 8) +
1283+
((uint64_t)(t[2][z+1])<<16) +
1284+
((uint64_t)(t[3][z+1])<<24) +
1285+
((uint64_t)(t[4][z+1])<<32) +
1286+
((uint64_t)(t[5][z+1])<<40) +
1287+
((uint64_t)(t[6][z+1])<<48) +
1288+
((uint64_t)(t[7][z+1])<<56),
1289+
1290+
((uint64_t)(t[8+0][z+1])<< 0) +
1291+
((uint64_t)(t[8+1][z+1])<< 8) +
1292+
((uint64_t)(t[8+2][z+1])<<16) +
1293+
((uint64_t)(t[8+3][z+1])<<24) +
1294+
((uint64_t)(t[8+4][z+1])<<32) +
1295+
((uint64_t)(t[8+5][z+1])<<40) +
1296+
((uint64_t)(t[8+6][z+1])<<48) +
1297+
((uint64_t)(t[8+7][z+1])<<56),
1298+
1299+
((uint64_t)(t[16+0][z+1])<< 0) +
1300+
((uint64_t)(t[16+1][z+1])<< 8) +
1301+
((uint64_t)(t[16+2][z+1])<<16) +
1302+
((uint64_t)(t[16+3][z+1])<<24) +
1303+
((uint64_t)(t[16+4][z+1])<<32) +
1304+
((uint64_t)(t[16+5][z+1])<<40) +
1305+
((uint64_t)(t[16+6][z+1])<<48) +
1306+
((uint64_t)(t[16+7][z+1])<<56),
1307+
13621308
((uint64_t)(t[24+0][z+1])<< 0) +
13631309
((uint64_t)(t[24+1][z+1])<< 8) +
13641310
((uint64_t)(t[24+2][z+1])<<16) +
13651311
((uint64_t)(t[24+3][z+1])<<24) +
13661312
((uint64_t)(t[24+4][z+1])<<32) +
13671313
((uint64_t)(t[24+5][z+1])<<40) +
13681314
((uint64_t)(t[24+6][z+1])<<48) +
1369-
((uint64_t)(t[24+7][z+1])<<56);
1370-
*(uint64_t *)&out[iN[z+2]+24] =
1315+
((uint64_t)(t[24+7][z+1])<<56)
1316+
};
1317+
memcpy(&out[iN[z+1]], &t1, 32);
1318+
1319+
uint64_t t2[4] = {
1320+
((uint64_t)(t[0][z+2])<< 0) +
1321+
((uint64_t)(t[1][z+2])<< 8) +
1322+
((uint64_t)(t[2][z+2])<<16) +
1323+
((uint64_t)(t[3][z+2])<<24) +
1324+
((uint64_t)(t[4][z+2])<<32) +
1325+
((uint64_t)(t[5][z+2])<<40) +
1326+
((uint64_t)(t[6][z+2])<<48) +
1327+
((uint64_t)(t[7][z+2])<<56),
1328+
1329+
((uint64_t)(t[8+0][z+2])<< 0) +
1330+
((uint64_t)(t[8+1][z+2])<< 8) +
1331+
((uint64_t)(t[8+2][z+2])<<16) +
1332+
((uint64_t)(t[8+3][z+2])<<24) +
1333+
((uint64_t)(t[8+4][z+2])<<32) +
1334+
((uint64_t)(t[8+5][z+2])<<40) +
1335+
((uint64_t)(t[8+6][z+2])<<48) +
1336+
((uint64_t)(t[8+7][z+2])<<56),
1337+
1338+
((uint64_t)(t[16+0][z+2])<< 0) +
1339+
((uint64_t)(t[16+1][z+2])<< 8) +
1340+
((uint64_t)(t[16+2][z+2])<<16) +
1341+
((uint64_t)(t[16+3][z+2])<<24) +
1342+
((uint64_t)(t[16+4][z+2])<<32) +
1343+
((uint64_t)(t[16+5][z+2])<<40) +
1344+
((uint64_t)(t[16+6][z+2])<<48) +
1345+
((uint64_t)(t[16+7][z+2])<<56),
1346+
13711347
((uint64_t)(t[24+0][z+2])<< 0) +
13721348
((uint64_t)(t[24+1][z+2])<< 8) +
13731349
((uint64_t)(t[24+2][z+2])<<16) +
13741350
((uint64_t)(t[24+3][z+2])<<24) +
13751351
((uint64_t)(t[24+4][z+2])<<32) +
13761352
((uint64_t)(t[24+5][z+2])<<40) +
13771353
((uint64_t)(t[24+6][z+2])<<48) +
1378-
((uint64_t)(t[24+7][z+2])<<56);
1379-
*(uint64_t *)&out[iN[z+3]+24] =
1354+
((uint64_t)(t[24+7][z+2])<<56),
1355+
1356+
};
1357+
memcpy(&out[iN[z+2]], &t2, 32);
1358+
1359+
uint64_t t3[4] = {
1360+
((uint64_t)(t[0][z+3])<< 0) +
1361+
((uint64_t)(t[1][z+3])<< 8) +
1362+
((uint64_t)(t[2][z+3])<<16) +
1363+
((uint64_t)(t[3][z+3])<<24) +
1364+
((uint64_t)(t[4][z+3])<<32) +
1365+
((uint64_t)(t[5][z+3])<<40) +
1366+
((uint64_t)(t[6][z+3])<<48) +
1367+
((uint64_t)(t[7][z+3])<<56),
1368+
1369+
((uint64_t)(t[8+0][z+3])<< 0) +
1370+
((uint64_t)(t[8+1][z+3])<< 8) +
1371+
((uint64_t)(t[8+2][z+3])<<16) +
1372+
((uint64_t)(t[8+3][z+3])<<24) +
1373+
((uint64_t)(t[8+4][z+3])<<32) +
1374+
((uint64_t)(t[8+5][z+3])<<40) +
1375+
((uint64_t)(t[8+6][z+3])<<48) +
1376+
((uint64_t)(t[8+7][z+3])<<56),
1377+
1378+
((uint64_t)(t[16+0][z+3])<< 0) +
1379+
((uint64_t)(t[16+1][z+3])<< 8) +
1380+
((uint64_t)(t[16+2][z+3])<<16) +
1381+
((uint64_t)(t[16+3][z+3])<<24) +
1382+
((uint64_t)(t[16+4][z+3])<<32) +
1383+
((uint64_t)(t[16+5][z+3])<<40) +
1384+
((uint64_t)(t[16+6][z+3])<<48) +
1385+
((uint64_t)(t[16+7][z+3])<<56),
1386+
13801387
((uint64_t)(t[24+0][z+3])<< 0) +
13811388
((uint64_t)(t[24+1][z+3])<< 8) +
13821389
((uint64_t)(t[24+2][z+3])<<16) +
13831390
((uint64_t)(t[24+3][z+3])<<24) +
13841391
((uint64_t)(t[24+4][z+3])<<32) +
13851392
((uint64_t)(t[24+5][z+3])<<40) +
13861393
((uint64_t)(t[24+6][z+3])<<48) +
1387-
((uint64_t)(t[24+7][z+3])<<56);
1394+
((uint64_t)(t[24+7][z+3])<<56)
1395+
};
1396+
memcpy(&out[iN[z+3]], &t3, 32);
13881397

13891398
iN[z+0] += 32;
13901399
iN[z+1] += 32;

0 commit comments

Comments
 (0)