Skip to content

Commit 6a92995

Browse files
committed
x86 asm: move string instructions from x86-assembly-cheat
1 parent e42d770 commit 6a92995

File tree

8 files changed

+310
-41
lines changed

8 files changed

+310
-41
lines changed

README.adoc

Lines changed: 87 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -12311,6 +12311,17 @@ When reading disassembly, many instructions have either a `.n` or `.w` suffix.
1231112311

1231212312
Bibliography: https://stackoverflow.com/questions/27147043/n-suffix-to-branch-instruction
1231312313

12314+
=== NOP instructions
12315+
12316+
* x86: link:userland/arch/x86_64/nop.S[NOP]
12317+
* ARM: <<arm-nop-instruction>>
12318+
12319+
No OPeration.
12320+
12321+
Does nothing except take up one processor cycle and occupy some instruction memory.
12322+
12323+
Applications: http://stackoverflow.com/questions/234906/whats-the-purpose-of-the-nop-opcode
12324+
1231412325
== x86 userland assembly
1231512326

1231612327
Arch agnostic infrastructure getting started at: <<userland-assembly>>.
@@ -12354,29 +12365,29 @@ Bibliography:
1235412365

1235512366
<<intel-manual-1>> 5.1.2 "Binary Arithmetic Instructions":
1235612367

12357-
* link:userland/arch/x86_64/add.S[ADD]
12358-
** link:userland/arch/x86_64/inc.S[INC]
12359-
** link:userland/arch/x86_64/adc.S[ADC]
12360-
* link:userland/arch/x86_64/sub.S[SUB]
12361-
** link:userland/arch/x86_64/dec.S[DEC]
12362-
** link:userland/arch/x86_64/sbb.S[SBB]
12363-
* link:userland/arch/x86_64/mul.S[MUL]
12364-
** link:userland/arch/x86_64/neg.S[NEG]
12365-
** link:userland/arch/x86_64/imul.S[IMUL]
12366-
* link:userland/arch/x86_64/div.S[DIV]
12367-
** link:userland/arch/x86_64/div_overflow.S[DIV overflow]
12368-
** link:userland/arch/x86_64/div_zero.S[DIV zero]
12369-
** link:userland/arch/x86_64/idiv.S[IDIV]
12370-
* link:userland/arch/x86_64/cmp.S[CMP]
12368+
* link:userland/arch/x86_64/add.S[]: ADD
12369+
** link:userland/arch/x86_64/inc.S[]: INC
12370+
** link:userland/arch/x86_64/adc.S[]: ADC
12371+
* link:userland/arch/x86_64/sub.S[]: SUB
12372+
** link:userland/arch/x86_64/dec.S[]: DEC
12373+
** link:userland/arch/x86_64/sbb.S[]: SBB
12374+
* link:userland/arch/x86_64/mul.S[]: MUL
12375+
** link:userland/arch/x86_64/neg.S[]: NEG
12376+
** link:userland/arch/x86_64/imul.S[]: IMUL
12377+
* link:userland/arch/x86_64/div.S[]: DIV
12378+
** link:userland/arch/x86_64/div_overflow.S[]: DIV overflow
12379+
** link:userland/arch/x86_64/div_zero.S[]: DIV zero
12380+
** link:userland/arch/x86_64/idiv.S[]: IDIV
12381+
* link:userland/arch/x86_64/cmp.S[]: CMP
1237112382

1237212383
=== x86 logical instructions
1237312384

1237412385
<<intel-manual-1>> 5.1.4 "Logical Instructions"
1237512386

12376-
* link:userland/arch/x86_64/and.S[AND]
12377-
* link:userland/arch/x86_64/not.S[NOT]
12378-
* link:userland/arch/x86_64/or.S[OR]
12379-
* link:userland/arch/x86_64/xor.S[XOR]
12387+
* link:userland/arch/x86_64/and.S[]: AND
12388+
* link:userland/arch/x86_64/not.S[]: NOT
12389+
* link:userland/arch/x86_64/or.S[]: OR
12390+
* link:userland/arch/x86_64/xor.S[]: XOR
1238012391

1238112392
=== x86 shift and rotate instructions
1238212393

@@ -12400,37 +12411,39 @@ Keeps the same sign on right shift.
1240012411
Not directly exposed in C, for which signed shift is undetermined behavior, but does exist in Java via the `>>>` operator. C compilers can omit it however.
1240112412
+
1240212413
SHL and SAL are exactly the same and have the same encoding: https://stackoverflow.com/questions/8373415/difference-between-shl-and-sal-in-80x86/56621271#56621271
12403-
* link:userland/arch/x86_64/rol.S[ROL and ROR]
12414+
* link:userland/arch/x86_64/rol.S[]: ROL and ROR
1240412415
+
1240512416
Rotates the bit that is going out around to the other side.
12406-
* link:userland/arch/x86_64/rol.S[RCL and RCR]
12417+
* link:userland/arch/x86_64/rol.S[]: RCL and RCR
1240712418
+
1240812419
Like ROL and ROR, but insert the carry bit instead, which effectively generates a rotation of 8 + 1 bits. TODO application.
1240912420

1241012421
=== x86 bit and byte instructions
1241112422

1241212423
<<intel-manual-1>> 5.1.6 "Bit and Byte Instructions"
1241312424

12414-
* link:userland/arch/x86_64/bt.S[BT]
12425+
* link:userland/arch/x86_64/bt.S[]: BT
1241512426
+
1241612427
Bit test: test if the Nth bit a bit of a register is set and store the result in the CF FLAG.
1241712428
+
1241812429
....
1241912430
CF = reg[N]
1242012431
....
12421-
* link:userland/arch/x86_64/btr.S[BTR]
12432+
* link:userland/arch/x86_64/btr.S[]: BTR
1242212433
+
1242312434
Do a BT and then set the bit to 0.
12424-
* link:userland/arch/x86_64/btc.S[BTC]
12435+
* link:userland/arch/x86_64/btc.S[]: BTC
1242512436
+
1242612437
Do a BT and then swap the value of the tested bit.
12427-
* link:userland/arch/x86_64/setcc.S[SETcc]
12438+
* link:userland/arch/x86_64/setcc.S[]: SETcc
1242812439
+
12429-
Set a a byte of a register to 0 or 1 depending on the cc condition.
12430-
* link:userland/arch/x86_64/popcnt.S[POPCNT]
12440+
Set a byte of a register to 0 or 1 depending on the cc condition.
12441+
+
12442+
Bibliography: https://stackoverflow.com/questions/1406783/how-to-read-and-write-x86-flags-registers-directly/30952577#30952577
12443+
* link:userland/arch/x86_64/popcnt.S[]: POPCNT
1243112444
+
1243212445
Count the number of 1 bits.
12433-
* link:userland/arch/x86_64/test.S[TEST]
12446+
* link:userland/arch/x86_64/test.S[]: TEST
1243412447
+
1243512448
Like <<x86-binary-arithmetic-instructions,CMP>> but does AND instead of SUB:
1243612449
+
@@ -12442,12 +12455,12 @@ ZF = (!(X && Y)) ? 1 : 0
1244212455

1244312456
<<intel-manual-1>> 5.1.7 "Control Transfer Instructions"
1244412457

12445-
* link:userland/arch/x86_64/jmp.S[JMP]
12446-
** link:userland/arch/x86_64/jmp_indirect.S[JMP indirect]
12458+
* link:userland/arch/x86_64/jmp.S[]: JMP
12459+
** link:userland/arch/x86_64/jmp_indirect.S[]: JMP indirect
1244712460

1244812461
==== x86 Jcc instructions
1244912462

12450-
link:userland/arch/x86_64/jcc.S[Jcc]
12463+
link:userland/arch/x86_64/jcc.S[]
1245112464

1245212465
Jump if certain conditions of the flags register are met.
1245312466

@@ -12472,29 +12485,61 @@ JG vs JA and JL vs JB:
1247212485

1247312486
==== x86 LOOP instruction
1247412487

12475-
link:userland/arch/x86_64/loop.S[LOOP]
12488+
link:userland/arch/x86_64/loop.S[]
1247612489

1247712490
Vs <<x86-jcc-instructions,Jcc>>: https://stackoverflow.com/questions/6805692/x86-assembly-programming-loops-with-ecx-and-loop-instruction-versus-jmp-jcond Holy CISC!
1247812491

12479-
=== x86 miscellaneous instructions
12492+
==== x86 string instructions
1248012493

12481-
<<intel-manual-1>> 5.1.13 "Miscellaneous Instructions"
12494+
<<intel-manual-1>> 5.1.8 "String Instructions"
1248212495

12483-
==== x86 NOP instruction
12496+
These instructions do some operation on an array item, and automatically update the index to the next item:
1248412497

12485-
link:userland/arch/x86_64/nop.S[NOP]
12498+
* First example explained in more detail
12499+
** link:userland/arch/x86_64/stos.S[]: STOS: STOre String: store register to memory. STOSD is called STOSL in GNU GAS as usual: https://stackoverflow.com/questions/6211629/gcc-inline-assembly-error-no-such-instruction-stosd
12500+
* Further examples
12501+
** link:userland/arch/x86_64/cmps.S[]: CMPS: CoMPare Strings: compare two values in memory with addresses given by RSI and RDI. Could be used to implement `memcmp`. Store the result in JZ as usual.
12502+
** link:userland/arch/x86_64/lods.S[]: LODS: LOaD String: load from memory to register.
12503+
** link:userland/arch/x86_64/movs.S[]: MOVS: MOV String: move from one memory to another with addresses given by RSI and RDI. Could be used to implement `memmov`.
12504+
** link:userland/arch/x86_64/scas.S[]: SCAS: SCan String: compare memory to the value in a register. Could be used to implement `strchr`.
1248612505

12487-
No OPeration.
12506+
The RSI and RDI registers are actually named after these intructions! S is the source of string instructions, D is the destination of string instructions.
1248812507

12489-
Does nothing except take up one processor cycle and occupy some instruction memory.
12508+
The direction of the index increment depends on the direction flag of the FLAGS register: 0 means forward and 1 means backward: https://stackoverflow.com/questions/9636691/what-are-cld-and-std-for-in-x86-assembly-language-what-does-df-do
1249012509

12491-
Applications: http://stackoverflow.com/questions/234906/whats-the-purpose-of-the-nop-opcode
12510+
These instructions were originally developed to speed up "string" operations such as those present in the `<string.h>` header of the C standard library.
12511+
12512+
However, as computer architecture evolved, those instructions might not offer considerable speedups anymore, and modern glibc such as 2.29 just uses <<x86-simd>> operations instead:, see also: https://stackoverflow.com/questions/33480999/how-can-the-rep-stosb-instruction-execute-faster-than-the-equivalent-loop
12513+
12514+
===== x86 REP prefix
12515+
12516+
Example: link:userland/arch/x86_64/rep.S[]
12517+
12518+
Repeat a string instruction RCX times:
12519+
12520+
As the repetitions happen:
12521+
12522+
* RCX decreases, until it reaches 0
12523+
* RDI and RSI increase
12524+
12525+
The variants: REPZ, REPNZ (alias REPE, REPNE) repeat a given instruction until something happens.
12526+
12527+
REP and REPZ also additionally stop if the comparison operation they repeat fails.
12528+
12529+
* REP: INS, OUTS, MOVS, LODS, and STOS
12530+
* REPZ: CMPS and SCAS
12531+
12532+
=== x86 miscellaneous instructions
12533+
12534+
<<intel-manual-1>> 5.1.13 "Miscellaneous Instructions"
12535+
12536+
NOP: <<nop-instructions>>
1249212537

1249312538
=== x86 random number generator instructions
1249412539

1249512540
<<intel-manual-1>> 5.1.15 Random Number Generator Instructions
1249612541

12497-
Example: link:userland/arch/x86_64/rdrand.S[RDRAND]
12542+
Example: link:userland/arch/x86_64/rdrand.S[]: RDRAND
1249812543

1249912544
If you run that executable multiple times, it prints a random number every time to stdout.
1250012545

@@ -12508,7 +12553,7 @@ RDRAND sets the carry flag when data is ready so we must loop if the carry flag
1250812553

1250912554
==== x86 CPUID instruction
1251012555

12511-
Example: link:userland/arch/x86_64/cpuid.S[CPUID]
12556+
Example: link:userland/arch/x86_64/cpuid.S[]
1251212557

1251312558
Fills EAX, EBX, ECX and EDX with CPU information.
1251412559

@@ -13299,6 +13344,8 @@ See: <<arm-adr-instruction>>.
1329913344

1330013345
==== ARM NOP instruction
1330113346

13347+
Parent section: <<nop-instructions>>
13348+
1330213349
There are a few different ways to encode NOP, notably MOV a register into itself, and a dedicated miscellaneous instruction.
1330313350

1330413351
Example: link:userland/arch/arm/nop.S[]

userland/arch/x86_64/cmps.S

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
/* https://github.com/cirosantilli/linux-kernel-module-cheat#x86-string-instructions */
2+
3+
# Compare two arrays
4+
5+
#include <lkmc.h>
6+
7+
.section .rodata
8+
my_quad_array_1: .quad 1, 2
9+
my_quad_array_2: .quad 1, 3
10+
LKMC_PROLOGUE
11+
mov $0, %r12
12+
mov $0, %r13
13+
cld
14+
lea my_quad_array_1(%rip), %rsi
15+
lea my_quad_array_2(%rip), %rdi
16+
cmpsq
17+
setz %r12b
18+
cmpsq
19+
setz %r13b
20+
/* 1 == 1 */
21+
LKMC_ASSERT_EQ(%r12, $1)
22+
/* 2 != 3 */
23+
LKMC_ASSERT_EQ(%r13, $0)
24+
LKMC_EPILOGUE

userland/arch/x86_64/lods.S

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
/* https://github.com/cirosantilli/linux-kernel-module-cheat#x86-string-instructions */
2+
3+
#include <lkmc.h>
4+
5+
.section .rodata
6+
my_quad_array: .quad 1, 2
7+
LKMC_PROLOGUE
8+
lea my_quad_array(%rip), %rsi
9+
cld
10+
lodsq
11+
mov %rax, %r12
12+
lodsq
13+
mov %rax, %r13
14+
LKMC_ASSERT_EQ(%r12, $1)
15+
LKMC_ASSERT_EQ(%r13, $2)
16+
LKMC_EPILOGUE

userland/arch/x86_64/movs.S

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
/* https://github.com/cirosantilli/linux-kernel-module-cheat#x86-string-instructions */
2+
# # movs
3+
4+
# Copy one string into another.
5+
6+
# Input pointed by esi, output by edi.
7+
8+
#include <lkmc.h>
9+
10+
.section .rodata
11+
src: .quad 1, 2
12+
.bss
13+
dest: .skip 16
14+
LKMC_PROLOGUE
15+
cld
16+
lea src(%rip), %rsi
17+
lea dest(%rip), %rdi
18+
movsq
19+
movsq
20+
LKMC_ASSERT_EQ(dest + 0, $1)
21+
LKMC_ASSERT_EQ(dest + 8, $2)
22+
LKMC_EPILOGUE

userland/arch/x86_64/nop.S

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
/* https://github.com/cirosantilli/linux-kernel-module-cheat#x86-nop-instruction */
1+
/* https://github.com/cirosantilli/linux-kernel-module-cheat#nop-instructions */
22

33
#include <lkmc.h>
44

userland/arch/x86_64/rep.S

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
/* https://github.com/cirosantilli/linux-kernel-module-cheat#x86-rep-prefix */
2+
3+
#include <lkmc.h>
4+
5+
.bss
6+
src: .skip 16
7+
dst: .skip 16
8+
LKMC_PROLOGUE
9+
10+
/* memset: REP STOSQ */
11+
cld
12+
lea dst(%rip), %rdi
13+
/* 2 elements. */
14+
mov $2, %rcx
15+
/* Set every element to 42. */
16+
mov $0x2A, %rax
17+
rep stosq
18+
/* RCX was decremented down to zero. */
19+
LKMC_ASSERT_EQ(%rcx, $0)
20+
/* And the memory was set. */
21+
LKMC_ASSERT_EQ(dst + 0, $0x2A)
22+
LKMC_ASSERT_EQ(dst + 8, $0x2A)
23+
24+
/* memcpy: REP MOVSQ */
25+
cld
26+
movq $2, src + 0
27+
movq $3, src + 8
28+
lea src(%rip), %rsi
29+
lea dst(%rip), %rdi
30+
mov $2, %rcx
31+
rep movsq
32+
LKMC_ASSERT_EQ(dst + 0, $2)
33+
LKMC_ASSERT_EQ(dst + 8, $3)
34+
35+
/* memcmp: REPZ CMPSQ */
36+
37+
/* Setup src. */
38+
movl $2, src + 0x0
39+
movl $3, src + 0x4
40+
movl $4, src + 0x8
41+
movl $5, src + 0xA
42+
43+
/* Equal. */
44+
movl $2, dst + 0x0
45+
movl $3, dst + 0x4
46+
movl $4, dst + 0x8
47+
movl $5, dst + 0xA
48+
cld
49+
mov $src, %rsi
50+
mov $dst, %rdi
51+
mov $4, %rcx
52+
repz cmpsl
53+
mov %rcx, %r12
54+
/* Last flag was equal. */
55+
LKMC_ASSERT(jz)
56+
/* RCX was decreased all the way to zero. */
57+
LKMC_ASSERT_EQ(%r12, $0)
58+
59+
/* Different. */
60+
movl $2, dst + 0x0
61+
movl $3, dst + 0x4
62+
movl $2, dst + 0x8
63+
movl $5, dst + 0xA
64+
mov $src, %rsi
65+
mov $dst, %rdi
66+
mov $4, %rcx
67+
repz cmpsl
68+
mov %rcx, %r12
69+
LKMC_ASSERT(jnz)
70+
/* We stopped half-way with 1 comparision missing. */
71+
LKMC_ASSERT_EQ(%r12, $1)
72+
73+
LKMC_EPILOGUE

userland/arch/x86_64/scas.S

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
/* https://github.com/cirosantilli/linux-kernel-module-cheat#x86-string-instructions */
2+
3+
#include <lkmc.h>
4+
5+
.section .rodata
6+
my_quad_array: .quad 1, 2
7+
LKMC_PROLOGUE
8+
mov $0, %r12
9+
mov $0, %r13
10+
/* RDI holds the address. */
11+
lea my_quad_array(%rip), %rdi
12+
cld
13+
mov $1, %rax
14+
/* Compare RAX to *RDI (1 == 1) */
15+
scasq
16+
setz %r12b
17+
mov $3, %rax
18+
/* Compare RAX to *RDI (3 == 2) */
19+
scasq
20+
setz %r13b
21+
/* 1 == 1 */
22+
LKMC_ASSERT_EQ(%r12, $1)
23+
/* 2 != 3 */
24+
LKMC_ASSERT_EQ(%r13, $0)
25+
LKMC_EPILOGUE

0 commit comments

Comments
 (0)