Skip to content

Commit ce3d546

Browse files
committed
x86 asm: move exchange instructions from x86-assembly-cheat
1 parent 88a1c91 commit ce3d546

File tree

5 files changed

+117
-8
lines changed

5 files changed

+117
-8
lines changed

README.adoc

Lines changed: 47 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11793,9 +11793,20 @@ Programs under link:userland/cpp/[] are examples of link:https://en.wikipedia.or
1179311793

1179411794
* link:userland/cpp/empty.cpp[]
1179511795
* link:userland/cpp/hello.cpp[]
11796-
* `<atomic>` 32 "Atomic operations library"
11796+
* `<atomic>`: <<cpp17>> 32 "Atomic operations library"
1179711797
** link:userland/cpp/atomic.cpp[]
1179811798

11799+
==== C++ standards
11800+
11801+
Like for C, you have to pay for the standards... insane. So we just use the closest free drafts instead.
11802+
11803+
https://stackoverflow.com/questions/81656/where-do-i-find-the-current-c-or-c-standard-documents
11804+
11805+
[[cpp17]]
11806+
===== C++17 N4659 standards draft
11807+
11808+
http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2017/n4659.pdf
11809+
1179911810
=== POSIX
1180011811

1180111812
Programs under link:userland/posix/[] are examples of POSIX C programming.
@@ -12538,6 +12549,33 @@ Bibliography:
1253812549
* link:userland/arch/x86_64/bswap.S[]: BSWAP: convert between little endian and big endian
1253912550
* link:userland/arch/x86_64/pushf.S[] PUSHF: <<x86-push-and-pop-instructions,push and pop>> the <<x86-flags-registers>> to / from the stack
1254012551

12552+
==== x86 exchange instructions
12553+
12554+
<<intel-manual-1>> 7.3.1.2 "Exchange Instructions":
12555+
12556+
* link:userland/arch/x86_64/xadd.S[] XADD: exchange and add. This is how C++ `<atomic>`'s' `++` is implemented in GCC 5.1. TODO: why is the exchange part needed?
12557+
* link:userland/arch/x86_64/xchg.S[] XCHG: exchange two values
12558+
12559+
TODO: concrete multi-thread <<gcc-inline-assembly>> examples of how all those instructions are normally used as synchronization primitives.
12560+
12561+
===== x86 CMPXCHG instruction
12562+
12563+
link:userland/arch/x86_64/cmpxchg.S[]
12564+
12565+
CMPXCHG: compare and exchange. `cmpxchg a, b` does:
12566+
12567+
....
12568+
if (RAX == b) {
12569+
ZF = 1
12570+
b = a
12571+
} else {
12572+
ZF = 0
12573+
RAX = b
12574+
}
12575+
....
12576+
12577+
TODO application: https://stackoverflow.com/questions/6935442/x86-spinlock-using-cmpxchg
12578+
1254112579
==== x86 PUSH and POP instructions
1254212580

1254312581
link:userland/arch/x86_64/push.S[]
@@ -13086,6 +13124,14 @@ TODO We didn't manage to find a working ARM analogue to <<x86-rdtsc-instruction>
1308613124
* https://stackoverflow.com/questions/31620375/arm-cortex-a7-returning-pmccntr-0-in-kernel-mode-and-illegal-instruction-in-u/31649809#31649809
1308713125
* https://blog.regehr.org/archives/794
1308813126

13127+
=== x86 LOCK prefix
13128+
13129+
Ensures that memory modifications are visible across all CPUs, which is fundamental for thread synchronization.
13130+
13131+
Inline assembly example at: link:userland/cpp/atomic.cpp[]
13132+
13133+
Apparently already automatically implied by some of the <<x86-exchange-instructions>>
13134+
1308913135
=== x86 assembly bibliography
1309013136

1309113137
==== x86 official bibliography

userland/arch/x86_64/cmpxchg.S

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
/* https://github.com/cirosantilli/linux-kernel-module-cheat#cmpxchg-instruction */
2+
3+
#include <lkmc.h>
4+
5+
LKMC_PROLOGUE
6+
/* rax != r13 */
7+
mov $0, %rax
8+
mov $1, %r13
9+
mov $2, %r14
10+
cmpxchg %r14, %r13
11+
mov %rax, %r12
12+
LKMC_ASSERT(jnz)
13+
LKMC_ASSERT_EQ(%rax, $1)
14+
LKMC_ASSERT_EQ(%r13, $1)
15+
LKMC_ASSERT_EQ(%r14, $2)
16+
17+
/* rax == r13 */
18+
mov $0, %rax
19+
mov $0, %r13
20+
mov $2, %r14
21+
cmpxchg %r14, %r13
22+
mov %rax, %r12
23+
LKMC_ASSERT(jz)
24+
LKMC_ASSERT_EQ(%rax, $0)
25+
LKMC_ASSERT_EQ(%r13, $2)
26+
LKMC_ASSERT_EQ(%r14, $2)
27+
28+
LKMC_EPILOGUE

userland/arch/x86_64/xadd.S

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
/* https://github.com/cirosantilli/linux-kernel-module-cheat#x86-exchange-instructions */
2+
3+
#include <lkmc.h>
4+
5+
LKMC_PROLOGUE
6+
mov $1, %rax
7+
mov $2, %rbx
8+
xadd %rbx, %rax
9+
LKMC_ASSERT_EQ(%rax, $3)
10+
LKMC_ASSERT_EQ(%rbx, $1)
11+
LKMC_EPILOGUE

userland/arch/x86_64/xchg.S

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
/* https://github.com/cirosantilli/linux-kernel-module-cheat#x86-exchange-instructions */
2+
3+
#include <lkmc.h>
4+
5+
LKMC_PROLOGUE
6+
mov $0, %rax
7+
mov $1, %rbx
8+
9+
xchg %rbx, %rax
10+
LKMC_ASSERT_EQ(%rax, $1)
11+
LKMC_ASSERT_EQ(%rbx, $0)
12+
13+
xchg %rbx, %rax
14+
LKMC_ASSERT_EQ(%rax, $0)
15+
LKMC_ASSERT_EQ(%rbx, $1)
16+
LKMC_EPILOGUE

userland/cpp/atomic.cpp

Lines changed: 15 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,27 @@
1-
// https://github.com/cirosantilli/linux-kernel-module-cheat#atomic
1+
// https://github.com/cirosantilli/linux-kernel-module-cheat#cpp
2+
// https://github.com/cirosantilli/linux-kernel-module-cheat#x86-lock-prefix
23
//
3-
// More restricted than mutex as it can only protect a few operations on integers.
4+
// The non-atomic counters have undefined values which get printed:
5+
// they are extremely likely to be less than the correct value due to
6+
// race conditions on the data read and update of the ++.
47
//
5-
// But if that is the use case, may be more efficient.
8+
// The atomic counters have defined values, and are asserted
69
//
7-
// On GCC 4.8 x86-64, using atomic is a huge peformance improvement
8-
// over the same program with mutexes (5x).
10+
// Atomic operations are more restricted than mutex as they can
11+
// only protect a few operations on integers.
12+
//
13+
// But when they can be used, they can be much more efficient than mutees.
14+
//
15+
// On GCC 4.8 x86-64, using atomic offered a 5x peformance improvement
16+
// over the same program with mutexes.
17+
918

19+
#if __cplusplus >= 201103L
1020
#include <atomic>
1121
#include <cassert>
1222
#include <iostream>
1323
#include <thread>
1424
#include <vector>
15-
16-
#if __cplusplus >= 201103L
1725
std::atomic_ulong my_atomic_ulong(0);
1826
unsigned long my_non_atomic_ulong = 0;
1927
#if defined(__x86_64__)

0 commit comments

Comments
 (0)