|
| 1 | +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 |
| 2 | +From: "Jason A. Donenfeld" < [email protected]> |
| 3 | +Date: Wed, 3 Nov 2021 18:19:04 +0100 |
| 4 | +Subject: [PATCH] runtime: on windows, read nanotime with one instruction or |
| 5 | + issue barrier |
| 6 | + |
| 7 | +On 64-bit, this is more efficient, and on ARM64, this prevents the time |
| 8 | +from moving backwards due to the weaker memory model. On ARM32 due to |
| 9 | +the weaker memory model, we issue a memory barrier. |
| 10 | + |
| 11 | +Updates #48072. |
| 12 | + |
| 13 | +Change-Id: If4695716c3039d8af14e14808af217f5c99fc93a |
| 14 | +Reviewed-on: https://go-review.googlesource.com/c/go/+/361057 |
| 15 | +Trust: Jason A. Donenfeld < [email protected]> |
| 16 | +Run-TryBot: Jason A. Donenfeld < [email protected]> |
| 17 | +TryBot-Result: Go Bot < [email protected]> |
| 18 | +Reviewed-by: Austin Clements < [email protected]> |
| 19 | +--- |
| 20 | + src/runtime/sys_windows_amd64.s | 13 +++---------- |
| 21 | + src/runtime/sys_windows_arm.s | 2 ++ |
| 22 | + src/runtime/sys_windows_arm64.s | 10 +--------- |
| 23 | + src/runtime/time_windows.h | 1 + |
| 24 | + src/runtime/time_windows_amd64.s | 19 +++---------------- |
| 25 | + src/runtime/time_windows_arm.s | 4 ++++ |
| 26 | + src/runtime/time_windows_arm64.s | 22 +++------------------- |
| 27 | + 7 files changed, 17 insertions(+), 54 deletions(-) |
| 28 | + |
| 29 | +diff --git a/src/runtime/sys_windows_amd64.s b/src/runtime/sys_windows_amd64.s |
| 30 | +index e7782846b2..64fa6791f4 100644 |
| 31 | +--- a/src/runtime/sys_windows_amd64.s |
| 32 | ++++ b/src/runtime/sys_windows_amd64.s |
| 33 | +@@ -348,16 +348,9 @@ TEXT runtime·nanotime1(SB),NOSPLIT,$0-8 |
| 34 | + CMPB runtime·useQPCTime(SB), $0 |
| 35 | + JNE useQPC |
| 36 | + MOVQ $_INTERRUPT_TIME, DI |
| 37 | +-loop: |
| 38 | +- MOVL time_hi1(DI), AX |
| 39 | +- MOVL time_lo(DI), BX |
| 40 | +- MOVL time_hi2(DI), CX |
| 41 | +- CMPL AX, CX |
| 42 | +- JNE loop |
| 43 | +- SHLQ $32, CX |
| 44 | +- ORQ BX, CX |
| 45 | +- IMULQ $100, CX |
| 46 | +- MOVQ CX, ret+0(FP) |
| 47 | ++ MOVQ time_lo(DI), AX |
| 48 | ++ IMULQ $100, AX |
| 49 | ++ MOVQ AX, ret+0(FP) |
| 50 | + RET |
| 51 | + useQPC: |
| 52 | + JMP runtime·nanotimeQPC(SB) |
| 53 | +diff --git a/src/runtime/sys_windows_arm.s b/src/runtime/sys_windows_arm.s |
| 54 | +index 48f8c7dedf..d7ad244161 100644 |
| 55 | +--- a/src/runtime/sys_windows_arm.s |
| 56 | ++++ b/src/runtime/sys_windows_arm.s |
| 57 | +@@ -350,7 +350,9 @@ TEXT runtime·nanotime1(SB),NOSPLIT|NOFRAME,$0-8 |
| 58 | + MOVW $_INTERRUPT_TIME, R3 |
| 59 | + loop: |
| 60 | + MOVW time_hi1(R3), R1 |
| 61 | ++ DMB MB_ISH |
| 62 | + MOVW time_lo(R3), R0 |
| 63 | ++ DMB MB_ISH |
| 64 | + MOVW time_hi2(R3), R2 |
| 65 | + CMP R1, R2 |
| 66 | + BNE loop |
| 67 | +diff --git a/src/runtime/sys_windows_arm64.s b/src/runtime/sys_windows_arm64.s |
| 68 | +index 7a2e11f5ae..183128dd09 100644 |
| 69 | +--- a/src/runtime/sys_windows_arm64.s |
| 70 | ++++ b/src/runtime/sys_windows_arm64.s |
| 71 | +@@ -415,15 +415,7 @@ TEXT runtime·nanotime1(SB),NOSPLIT|NOFRAME,$0-8 |
| 72 | + CMP $0, R0 |
| 73 | + BNE useQPC |
| 74 | + MOVD $_INTERRUPT_TIME, R3 |
| 75 | +-loop: |
| 76 | +- MOVWU time_hi1(R3), R1 |
| 77 | +- MOVWU time_lo(R3), R0 |
| 78 | +- MOVWU time_hi2(R3), R2 |
| 79 | +- CMP R1, R2 |
| 80 | +- BNE loop |
| 81 | +- |
| 82 | +- // wintime = R1:R0, multiply by 100 |
| 83 | +- ORR R1<<32, R0 |
| 84 | ++ MOVD time_lo(R3), R0 |
| 85 | + MOVD $100, R1 |
| 86 | + MUL R1, R0 |
| 87 | + MOVD R0, ret+0(FP) |
| 88 | +diff --git a/src/runtime/time_windows.h b/src/runtime/time_windows.h |
| 89 | +index cd16fd163b..7c2e65c328 100644 |
| 90 | +--- a/src/runtime/time_windows.h |
| 91 | ++++ b/src/runtime/time_windows.h |
| 92 | +@@ -9,6 +9,7 @@ |
| 93 | + // http://web.archive.org/web/20210411000829/https://wrkhpi.wordpress.com/2007/08/09/getting-os-information-the-kuser_shared_data-structure/ |
| 94 | + |
| 95 | + // Must read hi1, then lo, then hi2. The snapshot is valid if hi1 == hi2. |
| 96 | ++// Or, on 64-bit, just read lo:hi1 all at once atomically. |
| 97 | + #define _INTERRUPT_TIME 0x7ffe0008 |
| 98 | + #define _SYSTEM_TIME 0x7ffe0014 |
| 99 | + #define time_lo 0 |
| 100 | +diff --git a/src/runtime/time_windows_amd64.s b/src/runtime/time_windows_amd64.s |
| 101 | +index 93ab960b06..045f64eb46 100644 |
| 102 | +--- a/src/runtime/time_windows_amd64.s |
| 103 | ++++ b/src/runtime/time_windows_amd64.s |
| 104 | +@@ -12,27 +12,14 @@ |
| 105 | + TEXT time·now(SB),NOSPLIT,$0-24 |
| 106 | + CMPB runtime·useQPCTime(SB), $0 |
| 107 | + JNE useQPC |
| 108 | ++ |
| 109 | + MOVQ $_INTERRUPT_TIME, DI |
| 110 | +-loop: |
| 111 | +- MOVL time_hi1(DI), AX |
| 112 | +- MOVL time_lo(DI), BX |
| 113 | +- MOVL time_hi2(DI), CX |
| 114 | +- CMPL AX, CX |
| 115 | +- JNE loop |
| 116 | +- SHLQ $32, AX |
| 117 | +- ORQ BX, AX |
| 118 | ++ MOVQ time_lo(DI), AX |
| 119 | + IMULQ $100, AX |
| 120 | + MOVQ AX, mono+16(FP) |
| 121 | + |
| 122 | + MOVQ $_SYSTEM_TIME, DI |
| 123 | +-wall: |
| 124 | +- MOVL time_hi1(DI), AX |
| 125 | +- MOVL time_lo(DI), BX |
| 126 | +- MOVL time_hi2(DI), CX |
| 127 | +- CMPL AX, CX |
| 128 | +- JNE wall |
| 129 | +- SHLQ $32, AX |
| 130 | +- ORQ BX, AX |
| 131 | ++ MOVQ time_lo(DI), AX |
| 132 | + MOVQ $116444736000000000, DI |
| 133 | + SUBQ DI, AX |
| 134 | + IMULQ $100, AX |
| 135 | +diff --git a/src/runtime/time_windows_arm.s b/src/runtime/time_windows_arm.s |
| 136 | +index 7c763b66ed..6552d75ff1 100644 |
| 137 | +--- a/src/runtime/time_windows_arm.s |
| 138 | ++++ b/src/runtime/time_windows_arm.s |
| 139 | +@@ -17,7 +17,9 @@ TEXT time·now(SB),NOSPLIT|NOFRAME,$0-20 |
| 140 | + MOVW $_INTERRUPT_TIME, R3 |
| 141 | + loop: |
| 142 | + MOVW time_hi1(R3), R1 |
| 143 | ++ DMB MB_ISH |
| 144 | + MOVW time_lo(R3), R0 |
| 145 | ++ DMB MB_ISH |
| 146 | + MOVW time_hi2(R3), R2 |
| 147 | + CMP R1, R2 |
| 148 | + BNE loop |
| 149 | +@@ -34,7 +36,9 @@ loop: |
| 150 | + MOVW $_SYSTEM_TIME, R3 |
| 151 | + wall: |
| 152 | + MOVW time_hi1(R3), R1 |
| 153 | ++ DMB MB_ISH |
| 154 | + MOVW time_lo(R3), R0 |
| 155 | ++ DMB MB_ISH |
| 156 | + MOVW time_hi2(R3), R2 |
| 157 | + CMP R1, R2 |
| 158 | + BNE wall |
| 159 | +diff --git a/src/runtime/time_windows_arm64.s b/src/runtime/time_windows_arm64.s |
| 160 | +index ef52ce4c99..e8a0eb2f93 100644 |
| 161 | +--- a/src/runtime/time_windows_arm64.s |
| 162 | ++++ b/src/runtime/time_windows_arm64.s |
| 163 | +@@ -13,34 +13,18 @@ TEXT time·now(SB),NOSPLIT|NOFRAME,$0-24 |
| 164 | + MOVB runtime·useQPCTime(SB), R0 |
| 165 | + CMP $0, R0 |
| 166 | + BNE useQPC |
| 167 | +- MOVD $_INTERRUPT_TIME, R3 |
| 168 | +-loop: |
| 169 | +- MOVWU time_hi1(R3), R1 |
| 170 | +- MOVWU time_lo(R3), R0 |
| 171 | +- MOVWU time_hi2(R3), R2 |
| 172 | +- CMP R1, R2 |
| 173 | +- BNE loop |
| 174 | + |
| 175 | +- // wintime = R1:R0, multiply by 100 |
| 176 | +- ORR R1<<32, R0 |
| 177 | ++ MOVD $_INTERRUPT_TIME, R3 |
| 178 | ++ MOVD time_lo(R3), R0 |
| 179 | + MOVD $100, R1 |
| 180 | + MUL R1, R0 |
| 181 | + MOVD R0, mono+16(FP) |
| 182 | + |
| 183 | + MOVD $_SYSTEM_TIME, R3 |
| 184 | +-wall: |
| 185 | +- MOVWU time_hi1(R3), R1 |
| 186 | +- MOVWU time_lo(R3), R0 |
| 187 | +- MOVWU time_hi2(R3), R2 |
| 188 | +- CMP R1, R2 |
| 189 | +- BNE wall |
| 190 | +- |
| 191 | +- // w = R1:R0 in 100ns units |
| 192 | ++ MOVD time_lo(R3), R0 |
| 193 | + // convert to Unix epoch (but still 100ns units) |
| 194 | + #define delta 116444736000000000 |
| 195 | +- ORR R1<<32, R0 |
| 196 | + SUB $delta, R0 |
| 197 | +- |
| 198 | + // Convert to nSec |
| 199 | + MOVD $100, R1 |
| 200 | + MUL R1, R0 |
0 commit comments