Skip to content

[clang] Using the -O2 compilation option causes useful code segments to be removed. #158476

@PowerfulCat

Description

@PowerfulCat

Main function seems disappear, the following code has been simplified; the actual project code is much more complex.
in main function print the value only in debug mode.

OS: ubuntu24.04

clang:
Ubuntu clang version 18.1.3 (1ubuntu1)
Target: x86_64-pc-linux-gnu
Thread model: posix
InstalledDir: /usr/bin
Found candidate GCC installation: /usr/bin/../lib/gcc/x86_64-linux-gnu/13
Selected GCC installation: /usr/bin/../lib/gcc/x86_64-linux-gnu/13
Candidate multilib: .;@m64
Selected multilib: .;@m64

build cmd:
// bad
clang++ -std=c++20 -O2 -g ./bug.cpp -o ./bug

// ok when remove -O2 option
clang++ -std=c++20 -g ./bug.cpp -o ./bug

I tried other clang version(such as 20.1.8, 18.1.3, 17.0.6, 16.0.6, 15.0.7), the result is the same.

// bug.cpp
#include<stdint.h>
#include<malloc.h>

using size_t                = decltype(sizeof(0));

inline uint64_t count_of_set(uint64_t v){
    return __builtin_popcountll(v);
}

inline uint64_t ring_shift_left(uint64_t v, size_t s){
    uint64_t a = v << s;
    uint64_t b = v >> (64 - s); 
    return a | b;
}

inline uint64_t ring_shift_right(uint64_t v, size_t s){
    uint64_t a = v >> s;
    uint64_t b = v << (64 - s); 
    return a | b;
}

template<class seq_t>
inline uint64_t hashx(seq_t value, size_t seed = 0){
    auto y                  = uint64_t(99999989ull * 99999971);
    auto x                  = uint64_t(seed);
    auto mem_block          = value;
    auto total_bytes        = sizeof(value);
    auto slice_bytes        = sizeof(uint64_t) >= total_bytes ? total_bytes : sizeof(uint64_t);
    auto offset             = size_t{};

    while(total_bytes != 0){
        auto temp           = uint64_t{};

        for(size_t i = 0, shift = 0; i < slice_bytes; i++, shift += sizeof(uint8_t) * 8){
            temp           |= uint64_t(((uint8_t *)(&mem_block))[offset]) << shift;
        }

        x                  += count_of_set(y + temp);
        temp               -= count_of_set(x + y);
        y                  += count_of_set(x + temp);
        x                  -= ring_shift_left(y + temp, x);
        temp               += ring_shift_right(x + y, temp);
        y                  -= ring_shift_left(x + temp, y);
        total_bytes        -= slice_bytes;
        offset             += slice_bytes;

        if (total_bytes < slice_bytes){
            slice_bytes     = total_bytes;
        }
    }

    return y;
}

inline void * operator new(size_t, void * ptr, void *, int) noexcept {
    return ptr;
}

#define xnew(...)           new(__VA_ARGS__, nullptr, 0)

enum class hashmap_remove_result_t{
    success,
    item_not_exist,
};

enum class hashmap_take_out_result_t{
    success,
    item_not_exist,
};

enum class hashmap_set_result_t{
    success,
    overrided,
};

template<class key_t>
struct node{
    using node_t = node<key_t>;
    key_t key;
    node<key_t> * next;

    node(key_t const & key) : 
        next(nullptr) {
        (key_t &)this->key = key;
    }

    node() : next(this) {}

    hashmap_set_result_t set(key_t const & key){
        if (is_empty()){
            xnew (this) node(key);
            return hashmap_set_result_t::success;
        }

        for(auto cur = this; ; cur = cur->next){
            if ((key_t &)cur->key == (key_t &)key){
                (key_t &)cur->key = key;
                return hashmap_set_result_t::overrided;
            }
            if (cur->next == nullptr){
                cur->next = new node_t(key);
                return hashmap_set_result_t::success;
            }
        }
    }

    key_t * get(key_t const & key){
        if (not is_empty()){
            for(auto cur = this; cur != nullptr; cur = cur->next){
                if ((key_t &)cur->key == (key_t &)key){
                    return &cur->key;
                }
            }
        }
        return nullptr;
    }

    void free(){
        if (is_empty()){
            return;
        }
        while(next != nullptr){
            auto temp = next;
            key_t * k = (key_t *)temp->key;
            k->~key_t();
            next      = next->next;
            delete temp;
        }

        key_t * k = (key_t *)&this->key;
        k->~key_t();
        this->next = this;
    }

    bool is_empty() const {
        return this->next == this;
    }
};

template<class key_t>
struct khashmap{
    using node_t   = node<key_t>;
    size_t                  m_lines;
    size_t                  m_count;
    node_t *                m_nodes;

    static constexpr size_t multi         = 4;
    static constexpr size_t start_capcity = 16;

    khashmap() : 
        m_lines(start_capcity), 
        m_count(0), 
        m_nodes(new node_t[m_lines]){
    }

    ~khashmap(){
        this->clear();
        this->free();
    }

    void clear() {
        if (m_nodes != nullptr){
            for(size_t i = 0; i < m_lines; i++){
                if (m_nodes[i].is_empty()){
                    continue;
                }
                m_nodes[i].free();
            }
            m_count = 0;
        }
    }

    key_t * get(key_t const & key) const {
        size_t index = addressing(key);
        return m_nodes[index].get(key);
    }

    void set(key_t const & key, hashmap_set_result_t * state = nullptr){
        size_t index = addressing(key);
        node_t & node = m_nodes[index];
        hashmap_set_result_t sta = node.set(key);

        if (state != nullptr){
            state[0] = sta;
        }
        if (sta == hashmap_set_result_t::success){
            m_count   += 1;
        }
        if (m_lines == m_count){
            // resize(m_lines * multi);
        }
    }

    size_t addressing(key_t const & key) const {
        auto hash   = hashx(key);
        auto index  = hash % m_lines;
        return index;
    }

    void free(){
        delete [] m_nodes;
        m_nodes = nullptr;
    }
};

int main(){
    khashmap<size_t> map;
    map.set(10);

    if (auto r = map.get(10); r){
        printf("%ld\n", *r);
    }
    return 0;
}
../main.exe:      elf64-x86-64

SYMBOL TABLE:
0000000000000000 l    df *ABS*	0000000000000000              Scrt1.o
000000000000037c l     O .note.ABI-tag	0000000000000020              __abi_tag
0000000000000000 l    df *ABS*	0000000000000000              crtstuff.c
0000000000001070 l     F .text	0000000000000000              deregister_tm_clones
00000000000010a0 l     F .text	0000000000000000              register_tm_clones
00000000000010e0 l     F .text	0000000000000000              __do_global_dtors_aux
0000000000004010 l     O .bss	0000000000000001              completed.0
0000000000003dd8 l     O .fini_array	0000000000000000              __do_global_dtors_aux_fini_array_entry
0000000000001120 l     F .text	0000000000000000              frame_dummy
0000000000003dd0 l     O .init_array	0000000000000000              __frame_dummy_init_array_entry
0000000000000000 l    df *ABS*	0000000000000000              bug.cpp
0000000000000000 l    df *ABS*	0000000000000000              crtstuff.c
0000000000002094 l     O .eh_frame	0000000000000000              __FRAME_END__
0000000000000000 l    df *ABS*	0000000000000000              
0000000000002004 l       .eh_frame_hdr	0000000000000000              __GNU_EH_FRAME_HDR
0000000000003de0 l     O .dynamic	0000000000000000              _DYNAMIC
0000000000003fe8 l     O .got.plt	0000000000000000              _GLOBAL_OFFSET_TABLE_
0000000000004010 g       .data	0000000000000000              _edata
0000000000004000  w      .data	0000000000000000              data_start
0000000000002000 g     O .rodata	0000000000000004              _IO_stdin_used
0000000000000000  w    F *UND*	0000000000000000              __cxa_finalize@GLIBC_2.2.5
0000000000001130 g     F .text	0000000000000000              main
0000000000004008 g     O .data	0000000000000000              .hidden __dso_handle
0000000000001130 g     F .fini	0000000000000000              .hidden _fini
0000000000000000       F *UND*	0000000000000000              __libc_start_main@GLIBC_2.34
0000000000001040 g     F .text	0000000000000026              _start
0000000000001000 g     F .init	0000000000000000              .hidden _init
0000000000004010 g     O .data	0000000000000000              .hidden __TMC_END__
0000000000004000 g       .data	0000000000000000              __data_start
0000000000004018 g       .bss	0000000000000000              _end
0000000000004010 g       .bss	0000000000000000              __bss_start
0000000000000000  w      *UND*	0000000000000000              _ITM_deregisterTMCloneTable
0000000000000000  w      *UND*	0000000000000000              __gmon_start__
0000000000000000  w      *UND*	0000000000000000              _ITM_registerTMCloneTable



Disassembly of section .init:

0000000000001000 <_init>:
    1000:	f3 0f 1e fa          	endbr64
    1004:	48 83 ec 08          	sub    $0x8,%rsp
    1008:	48 8b 05 c9 2f 00 00 	mov    0x2fc9(%rip),%rax        # 3fd8 <__gmon_start__@Base>
    100f:	48 85 c0             	test   %rax,%rax
    1012:	74 02                	je     1016 <_init+0x16>
    1014:	ff d0                	call   *%rax
    1016:	48 83 c4 08          	add    $0x8,%rsp
    101a:	c3                   	ret

Disassembly of section .plt:

0000000000001020 <.plt>:
    1020:	ff 35 ca 2f 00 00    	push   0x2fca(%rip)        # 3ff0 <_GLOBAL_OFFSET_TABLE_+0x8>
    1026:	ff 25 cc 2f 00 00    	jmp    *0x2fcc(%rip)        # 3ff8 <_GLOBAL_OFFSET_TABLE_+0x10>
    102c:	0f 1f 40 00          	nopl   0x0(%rax)

Disassembly of section .plt.got:

0000000000001030 <__cxa_finalize@plt>:
    1030:	ff 25 8a 2f 00 00    	jmp    *0x2f8a(%rip)        # 3fc0 <__cxa_finalize@GLIBC_2.2.5>
    1036:	66 90                	xchg   %ax,%ax

Disassembly of section .text:

0000000000001040 <_start>:
    1040:	f3 0f 1e fa          	endbr64
    1044:	31 ed                	xor    %ebp,%ebp
    1046:	49 89 d1             	mov    %rdx,%r9
    1049:	5e                   	pop    %rsi
    104a:	48 89 e2             	mov    %rsp,%rdx
    104d:	48 83 e4 f0          	and    $0xfffffffffffffff0,%rsp
    1051:	50                   	push   %rax
    1052:	54                   	push   %rsp
    1053:	45 31 c0             	xor    %r8d,%r8d
    1056:	31 c9                	xor    %ecx,%ecx
    1058:	48 8d 3d d1 00 00 00 	lea    0xd1(%rip),%rdi        # 1130 <main>
    105f:	ff 15 63 2f 00 00    	call   *0x2f63(%rip)        # 3fc8 <__libc_start_main@GLIBC_2.34>
    1065:	f4                   	hlt
    1066:	66 2e 0f 1f 84 00 00 	cs nopw 0x0(%rax,%rax,1)
    106d:	00 00 00 

0000000000001070 <deregister_tm_clones>:
    1070:	48 8d 3d 99 2f 00 00 	lea    0x2f99(%rip),%rdi        # 4010 <__TMC_END__>
    1077:	48 8d 05 92 2f 00 00 	lea    0x2f92(%rip),%rax        # 4010 <__TMC_END__>
    107e:	48 39 f8             	cmp    %rdi,%rax
    1081:	74 15                	je     1098 <deregister_tm_clones+0x28>
    1083:	48 8b 05 46 2f 00 00 	mov    0x2f46(%rip),%rax        # 3fd0 <_ITM_deregisterTMCloneTable@Base>
    108a:	48 85 c0             	test   %rax,%rax
    108d:	74 09                	je     1098 <deregister_tm_clones+0x28>
    108f:	ff e0                	jmp    *%rax
    1091:	0f 1f 80 00 00 00 00 	nopl   0x0(%rax)
    1098:	c3                   	ret
    1099:	0f 1f 80 00 00 00 00 	nopl   0x0(%rax)

00000000000010a0 <register_tm_clones>:
    10a0:	48 8d 3d 69 2f 00 00 	lea    0x2f69(%rip),%rdi        # 4010 <__TMC_END__>
    10a7:	48 8d 35 62 2f 00 00 	lea    0x2f62(%rip),%rsi        # 4010 <__TMC_END__>
    10ae:	48 29 fe             	sub    %rdi,%rsi
    10b1:	48 89 f0             	mov    %rsi,%rax
    10b4:	48 c1 ee 3f          	shr    $0x3f,%rsi
    10b8:	48 c1 f8 03          	sar    $0x3,%rax
    10bc:	48 01 c6             	add    %rax,%rsi
    10bf:	48 d1 fe             	sar    $1,%rsi
    10c2:	74 14                	je     10d8 <register_tm_clones+0x38>
    10c4:	48 8b 05 15 2f 00 00 	mov    0x2f15(%rip),%rax        # 3fe0 <_ITM_registerTMCloneTable@Base>
    10cb:	48 85 c0             	test   %rax,%rax
    10ce:	74 08                	je     10d8 <register_tm_clones+0x38>
    10d0:	ff e0                	jmp    *%rax
    10d2:	66 0f 1f 44 00 00    	nopw   0x0(%rax,%rax,1)
    10d8:	c3                   	ret
    10d9:	0f 1f 80 00 00 00 00 	nopl   0x0(%rax)

00000000000010e0 <__do_global_dtors_aux>:
    10e0:	f3 0f 1e fa          	endbr64
    10e4:	80 3d 25 2f 00 00 00 	cmpb   $0x0,0x2f25(%rip)        # 4010 <__TMC_END__>
    10eb:	75 2b                	jne    1118 <__do_global_dtors_aux+0x38>
    10ed:	55                   	push   %rbp
    10ee:	48 83 3d ca 2e 00 00 	cmpq   $0x0,0x2eca(%rip)        # 3fc0 <__cxa_finalize@GLIBC_2.2.5>
    10f5:	00 
    10f6:	48 89 e5             	mov    %rsp,%rbp
    10f9:	74 0c                	je     1107 <__do_global_dtors_aux+0x27>
    10fb:	48 8b 3d 06 2f 00 00 	mov    0x2f06(%rip),%rdi        # 4008 <__dso_handle>
    1102:	e8 29 ff ff ff       	call   1030 <__cxa_finalize@plt>
    1107:	e8 64 ff ff ff       	call   1070 <deregister_tm_clones>
    110c:	c6 05 fd 2e 00 00 01 	movb   $0x1,0x2efd(%rip)        # 4010 <__TMC_END__>
    1113:	5d                   	pop    %rbp
    1114:	c3                   	ret
    1115:	0f 1f 00             	nopl   (%rax)
    1118:	c3                   	ret
    1119:	0f 1f 80 00 00 00 00 	nopl   0x0(%rax)

0000000000001120 <frame_dummy>:
    1120:	f3 0f 1e fa          	endbr64
    1124:	e9 77 ff ff ff       	jmp    10a0 <register_tm_clones>
    1129:	0f 1f 80 00 00 00 00 	nopl   0x0(%rax)

Disassembly of section .fini:

0000000000001130 <_fini>:
    1130:	f3 0f 1e fa          	endbr64
    1134:	48 83 ec 08          	sub    $0x8,%rsp
    1138:	48 83 c4 08          	add    $0x8,%rsp
    113c:	c3                   	ret

Metadata

Metadata

Assignees

No one assigned

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions