Skip to content

O2 flag generates segmentation fault in my code #1763

@Joao-Pedro-Cabral

Description

@Joao-Pedro-Cabral

Hello,

I'm using this function from PQClean:

void
PQCLEAN_FALCON512_CLEAN_prng_init(prng *p, inner_shake256_context *src) {
    /*
     * To ensure reproducibility for a given seed, we
     * must enforce little-endian interpretation of
     * the state words.
     */
    uint8_t tmp[56];
    uint64_t th, tl;
    int i;

    uint32_t *d32 = (uint32_t *) p->state.d;
    uint64_t *d64 = (uint64_t *) p->state.d;

    inner_shake256_extract(src, tmp, 56);
    for (i = 0; i < 14; i ++) {
        uint32_t w;

        w = (uint32_t)tmp[(i << 2) + 0]
            | ((uint32_t)tmp[(i << 2) + 1] << 8)
            | ((uint32_t)tmp[(i << 2) + 2] << 16)
            | ((uint32_t)tmp[(i << 2) + 3] << 24);
        d32[i] = w;
    }
    tl = d32[48 / sizeof(uint32_t)];
     th = d32[52 / sizeof(uint32_t)];
    d64[48 / sizeof(uint64_t)] = tl + (th << 32);
    PQCLEAN_FALCON512_CLEAN_prng_refill(p);
}

When running my code in my x86_64 pc (with both O2 and O0 flags) everything works fine.

However, when using O2 flag and running the code in gem5 simulator, I got a segmentation fault.
First, I noted that with O2 flag the for (i = 0; i < 14; i ++) is auto-vectorized.
After, I used remote gdb in gem5 to debug my code. Then, I discover that the segmentation fault was caused by this line:
22768: 02b70187 vlm.v v3,(a4)
This a4 comes from:
2275c: 8ec18713 addi a4,gp,-1812 # 390ec <__TMC_END__+0x4>

Additionally, I tried another code with the same vlm.v instruction and the same vtype and vl configurations in gem5 (but with a different memory address) and no segmentation fault happened (only to verify if it's not a internal gem5 error).

What do you think about this issue? If you need any more information from my setup/code, feel free to ask me (I can share anything from this code).

O0 disassembly code from this function:

000000000002f430 <PQCLEAN_FALCON512_CLEAN_prng_init>:
   2f430:	7175                	addi	sp,sp,-144
   2f432:	e506                	sd	ra,136(sp)
   2f434:	e122                	sd	s0,128(sp)
   2f436:	0900                	addi	s0,sp,144
   2f438:	f6a43c23          	sd	a0,-136(s0)
   2f43c:	f6b43823          	sd	a1,-144(s0)
   2f440:	f7843783          	ld	a5,-136(s0)
   2f444:	20878793          	addi	a5,a5,520
   2f448:	fef43023          	sd	a5,-32(s0)
   2f44c:	f7843783          	ld	a5,-136(s0)
   2f450:	20878793          	addi	a5,a5,520
   2f454:	fcf43c23          	sd	a5,-40(s0)
   2f458:	f8840793          	addi	a5,s0,-120
   2f45c:	f7043603          	ld	a2,-144(s0)
   2f460:	03800593          	li	a1,56
   2f464:	853e                	mv	a0,a5
   2f466:	963e40ef          	jal	13dc8 <shake256_inc_squeeze>
   2f46a:	fe042623          	sw	zero,-20(s0)
   2f46e:	a045                	j	2f50e <PQCLEAN_FALCON512_CLEAN_prng_init+0xde>
   2f470:	fec42783          	lw	a5,-20(s0)
   2f474:	0027979b          	slliw	a5,a5,0x2
   2f478:	2781                	sext.w	a5,a5
   2f47a:	17c1                	addi	a5,a5,-16
   2f47c:	97a2                	add	a5,a5,s0
   2f47e:	f987c783          	lbu	a5,-104(a5)
   2f482:	0007871b          	sext.w	a4,a5
   2f486:	fec42783          	lw	a5,-20(s0)
   2f48a:	0027979b          	slliw	a5,a5,0x2
   2f48e:	2781                	sext.w	a5,a5
   2f490:	2785                	addiw	a5,a5,1
   2f492:	2781                	sext.w	a5,a5
   2f494:	17c1                	addi	a5,a5,-16
   2f496:	97a2                	add	a5,a5,s0
   2f498:	f987c783          	lbu	a5,-104(a5)
   2f49c:	2781                	sext.w	a5,a5
   2f49e:	0087979b          	slliw	a5,a5,0x8
   2f4a2:	2781                	sext.w	a5,a5
   2f4a4:	8fd9                	or	a5,a5,a4
   2f4a6:	0007871b          	sext.w	a4,a5
   2f4aa:	fec42783          	lw	a5,-20(s0)
   2f4ae:	0027979b          	slliw	a5,a5,0x2
   2f4b2:	2781                	sext.w	a5,a5
   2f4b4:	2789                	addiw	a5,a5,2
   2f4b6:	2781                	sext.w	a5,a5
   2f4b8:	17c1                	addi	a5,a5,-16
   2f4ba:	97a2                	add	a5,a5,s0
   2f4bc:	f987c783          	lbu	a5,-104(a5)
   2f4c0:	2781                	sext.w	a5,a5
   2f4c2:	0107979b          	slliw	a5,a5,0x10
   2f4c6:	2781                	sext.w	a5,a5
   2f4c8:	8fd9                	or	a5,a5,a4
   2f4ca:	0007871b          	sext.w	a4,a5
   2f4ce:	fec42783          	lw	a5,-20(s0)
   2f4d2:	0027979b          	slliw	a5,a5,0x2
   2f4d6:	2781                	sext.w	a5,a5
   2f4d8:	278d                	addiw	a5,a5,3
   2f4da:	2781                	sext.w	a5,a5
   2f4dc:	17c1                	addi	a5,a5,-16
   2f4de:	97a2                	add	a5,a5,s0
   2f4e0:	f987c783          	lbu	a5,-104(a5)
   2f4e4:	2781                	sext.w	a5,a5
   2f4e6:	0187979b          	slliw	a5,a5,0x18
   2f4ea:	2781                	sext.w	a5,a5
   2f4ec:	8fd9                	or	a5,a5,a4
   2f4ee:	fcf42223          	sw	a5,-60(s0)
   2f4f2:	fec42783          	lw	a5,-20(s0)
   2f4f6:	078a                	slli	a5,a5,0x2
   2f4f8:	fe043703          	ld	a4,-32(s0)
   2f4fc:	97ba                	add	a5,a5,a4
   2f4fe:	fc442703          	lw	a4,-60(s0)
   2f502:	c398                	sw	a4,0(a5)
   2f504:	fec42783          	lw	a5,-20(s0)
   2f508:	2785                	addiw	a5,a5,1
   2f50a:	fef42623          	sw	a5,-20(s0)
   2f50e:	fec42783          	lw	a5,-20(s0)
   2f512:	0007871b          	sext.w	a4,a5
   2f516:	47b5                	li	a5,13
   2f518:	f4e7dce3          	bge	a5,a4,2f470 <PQCLEAN_FALCON512_CLEAN_prng_init+0x40>
   2f51c:	fe043783          	ld	a5,-32(s0)
   2f520:	03078793          	addi	a5,a5,48
   2f524:	439c                	lw	a5,0(a5)
   2f526:	1782                	slli	a5,a5,0x20
   2f528:	9381                	srli	a5,a5,0x20
   2f52a:	fcf43823          	sd	a5,-48(s0)
   2f52e:	fe043783          	ld	a5,-32(s0)
   2f532:	03478793          	addi	a5,a5,52
   2f536:	439c                	lw	a5,0(a5)
   2f538:	1782                	slli	a5,a5,0x20
   2f53a:	9381                	srli	a5,a5,0x20
   2f53c:	fcf43423          	sd	a5,-56(s0)
   2f540:	fc843783          	ld	a5,-56(s0)
   2f544:	02079693          	slli	a3,a5,0x20
   2f548:	fd843783          	ld	a5,-40(s0)
   2f54c:	03078793          	addi	a5,a5,48
   2f550:	fd043703          	ld	a4,-48(s0)
   2f554:	9736                	add	a4,a4,a3
   2f556:	e398                	sd	a4,0(a5)
   2f558:	f7843503          	ld	a0,-136(s0)
   2f55c:	010000ef          	jal	2f56c <PQCLEAN_FALCON512_CLEAN_prng_refill>
   2f560:	0001                	nop
   2f562:	0001                	nop
   2f564:	60aa                	ld	ra,136(sp)
   2f566:	640a                	ld	s0,128(sp)
   2f568:	6149                	addi	sp,sp,144
   2f56a:	8082                	ret

O2 disassembly code from this function:

0000000000022748 <PQCLEAN_FALCON512_CLEAN_prng_init>:
   22748:	715d                	addi	sp,sp,-80
   2274a:	e0a2                	sd	s0,64(sp)
   2274c:	862e                	mv	a2,a1
   2274e:	842a                	mv	s0,a0
   22750:	03800593          	li	a1,56
   22754:	0028                	addi	a0,sp,8
   22756:	e486                	sd	ra,72(sp)
   22758:	9c3ef0ef          	jal	1211a <shake256_inc_squeeze>
   2275c:	8ec18713          	addi	a4,gp,-1812 # 390ec <__TMC_END__+0x4>
   22760:	8ed18793          	addi	a5,gp,-1811 # 390ed <__TMC_END__+0x5>
   22764:	cc627057          	vsetivli	zero,4,e8,mf4,ta,ma
   22768:	02b70187          	vlm.v	v3,(a4)
   2276c:	02b78107          	vlm.v	v2,(a5)
   22770:	20840713          	addi	a4,s0,520
   22774:	003c                	addi	a5,sp,8
   22776:	23840513          	addi	a0,s0,568
   2277a:	00478593          	addi	a1,a5,4
   2277e:	02058087          	vle8.v	v1,(a1)
   22782:	9e303057          	vmv1r.v	v0,v3
   22786:	02078207          	vle8.v	v4,(a5)
   2278a:	00878613          	addi	a2,a5,8
   2278e:	02060407          	vle8.v	v8,(a2)
   22792:	00c78693          	addi	a3,a5,12
   22796:	5e1025d7          	vcompress.vm	v11,v1,v0
   2279a:	9e203057          	vmv1r.v	v0,v2
   2279e:	02068387          	vle8.v	v7,(a3)
   227a2:	07c1                	addi	a5,a5,16
   227a4:	5e102557          	vcompress.vm	v10,v1,v0
   227a8:	9e303057          	vmv1r.v	v0,v3
   227ac:	5e402357          	vcompress.vm	v6,v4,v0
   227b0:	9e203057          	vmv1r.v	v0,v2
   227b4:	3ab13357          	vslideup.vi	v6,v11,2
   227b8:	5e4020d7          	vcompress.vm	v1,v4,v0
   227bc:	5e802257          	vcompress.vm	v4,v8,v0
   227c0:	9e303057          	vmv1r.v	v0,v3
   227c4:	3aa130d7          	vslideup.vi	v1,v10,2
   227c8:	5e8022d7          	vcompress.vm	v5,v8,v0
   227cc:	9e203057          	vmv1r.v	v0,v2
   227d0:	5e7024d7          	vcompress.vm	v9,v7,v0
   227d4:	9e303057          	vmv1r.v	v0,v3
   227d8:	3a913257          	vslideup.vi	v4,v9,2
   227dc:	5e702457          	vcompress.vm	v8,v7,v0
   227e0:	5e1024d7          	vcompress.vm	v9,v1,v0
   227e4:	5e4023d7          	vcompress.vm	v7,v4,v0
   227e8:	3a8132d7          	vslideup.vi	v5,v8,2
   227ec:	5e602457          	vcompress.vm	v8,v6,v0
   227f0:	3a7134d7          	vslideup.vi	v9,v7,2
   227f4:	5e502557          	vcompress.vm	v10,v5,v0
   227f8:	9e203057          	vmv1r.v	v0,v2
   227fc:	3aa13457          	vslideup.vi	v8,v10,2
   22800:	5e6023d7          	vcompress.vm	v7,v6,v0
   22804:	5e502557          	vcompress.vm	v10,v5,v0
   22808:	5e102357          	vcompress.vm	v6,v1,v0
   2280c:	0d007057          	vsetvli	zero,zero,e32,m1,ta,ma
   22810:	4a9220d7          	vzext.vf4	v1,v9
   22814:	0c607057          	vsetvli	zero,zero,e8,mf4,ta,ma
   22818:	5e4024d7          	vcompress.vm	v9,v4,v0
   2281c:	0d007057          	vsetvli	zero,zero,e32,m1,ta,ma
   22820:	4a8222d7          	vzext.vf4	v5,v8
   22824:	0c607057          	vsetvli	zero,zero,e8,mf4,ta,ma
   22828:	3aa133d7          	vslideup.vi	v7,v10,2
   2282c:	0d007057          	vsetvli	zero,zero,e32,m1,ta,ma
   22830:	961830d7          	vsll.vi	v1,v1,16
   22834:	0c607057          	vsetvli	zero,zero,e8,mf4,ta,ma
   22838:	3a913357          	vslideup.vi	v6,v9,2
   2283c:	0d007057          	vsetvli	zero,zero,e32,m1,ta,ma
   22840:	965c32d7          	vsll.vi	v5,v5,24
   22844:	0cf07057          	vsetvli	zero,zero,e16,mf2,ta,ma
   22848:	4a732257          	vzext.vf2	v4,v7
   2284c:	0d007057          	vsetvli	zero,zero,e32,m1,ta,ma
   22850:	2a1280d7          	vor.vv	v1,v1,v5
   22854:	4a6223d7          	vzext.vf4	v7,v6
   22858:	0cf07057          	vsetvli	zero,zero,e16,mf2,ta,ma
   2285c:	96443257          	vsll.vi	v4,v4,8
   22860:	0d007057          	vsetvli	zero,zero,e32,m1,ta,ma
   22864:	2a1380d7          	vor.vv	v1,v1,v7
   22868:	4a4322d7          	vzext.vf2	v5,v4
   2286c:	2a1280d7          	vor.vv	v1,v1,v5
   22870:	020760a7          	vse32.v	v1,(a4)
   22874:	0741                	addi	a4,a4,16
   22876:	00e50563          	beq	a0,a4,22880 <PQCLEAN_FALCON512_CLEAN_prng_init+0x138>
   2287a:	0c607057          	vsetvli	zero,zero,e8,mf4,ta,ma
   2287e:	bdf5                	j	2277a <PQCLEAN_FALCON512_CLEAN_prng_init+0x32>
   22880:	03914603          	lbu	a2,57(sp)
   22884:	03a14783          	lbu	a5,58(sp)
   22888:	03d14683          	lbu	a3,61(sp)
   2288c:	03e14583          	lbu	a1,62(sp)
   22890:	03814803          	lbu	a6,56(sp)
   22894:	03b14703          	lbu	a4,59(sp)
   22898:	0107979b          	slliw	a5,a5,0x10
   2289c:	0086161b          	slliw	a2,a2,0x8
   228a0:	03c14503          	lbu	a0,60(sp)
   228a4:	8e5d                	or	a2,a2,a5
   228a6:	03f14783          	lbu	a5,63(sp)
   228aa:	0105959b          	slliw	a1,a1,0x10
   228ae:	0086969b          	slliw	a3,a3,0x8
   228b2:	01066633          	or	a2,a2,a6
   228b6:	8ecd                	or	a3,a3,a1
   228b8:	0187171b          	slliw	a4,a4,0x18
   228bc:	8ec9                	or	a3,a3,a0
   228be:	8f51                	or	a4,a4,a2
   228c0:	0187979b          	slliw	a5,a5,0x18
   228c4:	8fd5                	or	a5,a5,a3
   228c6:	1702                	slli	a4,a4,0x20
   228c8:	9301                	srli	a4,a4,0x20
   228ca:	1782                	slli	a5,a5,0x20
   228cc:	8fd9                	or	a5,a5,a4
   228ce:	22f43c23          	sd	a5,568(s0)
   228d2:	8522                	mv	a0,s0
   228d4:	969ff0ef          	jal	2223c <PQCLEAN_FALCON512_CLEAN_prng_refill>
   228d8:	60a6                	ld	ra,72(sp)
   228da:	6406                	ld	s0,64(sp)
   228dc:	6161                	addi	sp,sp,80
   228de:	8082                	ret

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions