diff --git a/go.mod b/go.mod
index e912202c37..3b4ee54dac 100644
--- a/go.mod
+++ b/go.mod
@@ -1,6 +1,6 @@
module github.com/loft-sh/vcluster
-go 1.22.4
+go 1.23.0
require (
github.com/blang/semver v3.5.1+incompatible
@@ -42,7 +42,7 @@ require (
github.com/vmware-labs/yaml-jsonpath v0.3.2
go.uber.org/atomic v1.11.0
golang.org/x/mod v0.18.0
- golang.org/x/sync v0.7.0
+ golang.org/x/sync v0.12.0
google.golang.org/grpc v1.64.0
google.golang.org/protobuf v1.34.1
gopkg.in/square/go-jose.v2 v2.6.0
@@ -190,13 +190,13 @@ require (
go.starlark.net v0.0.0-20230525235612-a134d8f9ddca // indirect
go.uber.org/multierr v1.11.0 // indirect
go.uber.org/zap v1.27.0
- golang.org/x/crypto v0.24.0 // indirect
+ golang.org/x/crypto v0.36.0 // indirect
golang.org/x/exp v0.0.0-20240604190554-fc45aab8b7f8
- golang.org/x/net v0.26.0 // indirect
+ golang.org/x/net v0.38.0 // indirect
golang.org/x/oauth2 v0.21.0 // indirect
- golang.org/x/sys v0.21.0 // indirect
- golang.org/x/term v0.21.0 // indirect
- golang.org/x/text v0.16.0 // indirect
+ golang.org/x/sys v0.31.0 // indirect
+ golang.org/x/term v0.30.0 // indirect
+ golang.org/x/text v0.23.0 // indirect
golang.org/x/time v0.5.0 // indirect
golang.org/x/tools v0.22.0 // indirect
gomodules.xyz/jsonpatch/v2 v2.4.0 // indirect
diff --git a/go.sum b/go.sum
index efd1b41d2a..9d7bdcca79 100644
--- a/go.sum
+++ b/go.sum
@@ -579,8 +579,8 @@ golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5y
golang.org/x/crypto v0.0.0-20220622213112-05595931fe9d/go.mod h1:IxCIyHEi3zRg3s0A5j5BB6A9Jmi73HwBIUl50j+osU4=
golang.org/x/crypto v0.3.1-0.20221117191849-2c476679df9a/go.mod h1:hebNnKkNXi2UzZN1eVRvBB7co0a+JxK6XbPiWVs/3J4=
golang.org/x/crypto v0.7.0/go.mod h1:pYwdfH91IfpZVANVyUOhSIPZaFoJGxTFbZhFTx+dXZU=
-golang.org/x/crypto v0.24.0 h1:mnl8DM0o513X8fdIkmyFE/5hTYxbwYOjDS/+rK6qpRI=
-golang.org/x/crypto v0.24.0/go.mod h1:Z1PMYSOR5nyMcyAVAIQSKCDwalqy85Aqn1x3Ws4L5DM=
+golang.org/x/crypto v0.36.0 h1:AnAEvhDddvBdpY+uR+MyHmuZzzNqXSe/GvuDeob5L34=
+golang.org/x/crypto v0.36.0/go.mod h1:Y4J0ReaxCR1IMaabaSMugxJES1EpwhBHhv2bDHklZvc=
golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
golang.org/x/exp v0.0.0-20190306152737-a1d7652674e8/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
golang.org/x/exp v0.0.0-20190510132918-efd6b22b2522/go.mod h1:ZjyILWgesfNpC6sMxTJOJm9Kp84zZh5NQWvqDGG3Qr8=
@@ -633,8 +633,8 @@ golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug
golang.org/x/net v0.2.0/go.mod h1:KqCZLdyyvdV855qA2rE3GC2aiw5xGR5TEjj8smXukLY=
golang.org/x/net v0.6.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs=
golang.org/x/net v0.8.0/go.mod h1:QVkue5JL9kW//ek3r6jTKnTFis1tRmNAW2P1shuFdJc=
-golang.org/x/net v0.26.0 h1:soB7SVo0PWrY4vPW/+ay0jKDNScG2X9wFeYlXIvJsOQ=
-golang.org/x/net v0.26.0/go.mod h1:5YKkiSynbBIh3p6iOc/vibscux0x38BZDkn8sCUPxHE=
+golang.org/x/net v0.38.0 h1:vRMAPTMaeGqVhG5QyLJHqNDwecKTomGeqbnfZyKlBI8=
+golang.org/x/net v0.38.0/go.mod h1:ivrbrMbzFq5J41QOQh0siUuly180yBYtLp+CKbEaFx8=
golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
golang.org/x/oauth2 v0.0.0-20181106182150-f42d05182288/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
@@ -653,8 +653,8 @@ golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJ
golang.org/x/sync v0.0.0-20210220032951-036812b2e83c/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
-golang.org/x/sync v0.7.0 h1:YsImfSBoP9QPYL0xyKJPq0gcaJdG3rInoqxTWbfQu9M=
-golang.org/x/sync v0.7.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
+golang.org/x/sync v0.12.0 h1:MHc5BpPuC30uJk597Ri8TV3CNZcTLu6B6z4lJy+g6Jw=
+golang.org/x/sync v0.12.0/go.mod h1:1dzgHSNfp02xaA81J2MS99Qcpr2w7fw1gpm99rleRqA=
golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20180909124046-d0be0721c37e/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
@@ -690,8 +690,8 @@ golang.org/x/sys v0.2.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.3.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
-golang.org/x/sys v0.21.0 h1:rF+pYz3DAGSQAxAu1CbC7catZg4ebC4UIeIhKxBZvws=
-golang.org/x/sys v0.21.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
+golang.org/x/sys v0.31.0 h1:ioabZlmFYtWhL+TRYpcnNlLwhyxaM9kWTDEmfnprqik=
+golang.org/x/sys v0.31.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k=
golang.org/x/term v0.0.0-20201117132131-f5c789dd3221/go.mod h1:Nr5EML6q2oocZ2LXRh80K7BxOlk5/8JxuGnuhpl+muw=
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
@@ -699,8 +699,8 @@ golang.org/x/term v0.0.0-20220526004731-065cf7ba2467/go.mod h1:jbD1KX2456YbFQfuX
golang.org/x/term v0.2.0/go.mod h1:TVmDHMZPmdnySmBfhjOoOdhjzdE1h4u1VwSiw2l1Nuc=
golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k=
golang.org/x/term v0.6.0/go.mod h1:m6U89DPEgQRMq3DNkDClhWw02AUbt2daBVO4cn4Hv9U=
-golang.org/x/term v0.21.0 h1:WVXCp+/EBEHOj53Rvu+7KiT/iElMrO8ACK16SMZ3jaA=
-golang.org/x/term v0.21.0/go.mod h1:ooXLefLobQVslOqselCNF4SxFAaoS6KujMbsGzSDmX0=
+golang.org/x/term v0.30.0 h1:PQ39fJZ+mfadBm0y5WlL4vlM7Sx1Hgf13sMIY2+QS9Y=
+golang.org/x/term v0.30.0/go.mod h1:NYYFdzHoI5wRh/h5tDMdMqCqPJZEuNqVR5xJLd/n67g=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.1-0.20180807135948-17ff2d5776d2/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk=
@@ -711,8 +711,8 @@ golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
golang.org/x/text v0.4.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8=
golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8=
golang.org/x/text v0.8.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8=
-golang.org/x/text v0.16.0 h1:a94ExnEXNtEwYLGJSIUxnWoxoRz/ZcCsV63ROupILh4=
-golang.org/x/text v0.16.0/go.mod h1:GhwF1Be+LQoKShO3cGOHzqOgRrGaYc9AvblQOmPVHnI=
+golang.org/x/text v0.23.0 h1:D71I7dUrlY+VX0gQShAThNGHFxZ13dGLBHQLVl1mJlY=
+golang.org/x/text v0.23.0/go.mod h1:/BLNzu4aZCJ1+kcD0DNRotWKage4q2rGVAg4o22unh4=
golang.org/x/time v0.0.0-20181108054448-85acf8d2951c/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
golang.org/x/time v0.0.0-20190308202827-9d24e82272b4/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
golang.org/x/time v0.0.0-20191024005414-555d28b269f0/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
diff --git a/vendor/golang.org/x/crypto/LICENSE b/vendor/golang.org/x/crypto/LICENSE
index 6a66aea5ea..2a7cf70da6 100644
--- a/vendor/golang.org/x/crypto/LICENSE
+++ b/vendor/golang.org/x/crypto/LICENSE
@@ -1,4 +1,4 @@
-Copyright (c) 2009 The Go Authors. All rights reserved.
+Copyright 2009 The Go Authors.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
@@ -10,7 +10,7 @@ notice, this list of conditions and the following disclaimer.
copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the
distribution.
- * Neither the name of Google Inc. nor the names of its
+ * Neither the name of Google LLC nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
diff --git a/vendor/golang.org/x/crypto/argon2/blamka_amd64.s b/vendor/golang.org/x/crypto/argon2/blamka_amd64.s
index 6713accac0..c3895478ed 100644
--- a/vendor/golang.org/x/crypto/argon2/blamka_amd64.s
+++ b/vendor/golang.org/x/crypto/argon2/blamka_amd64.s
@@ -1,243 +1,2791 @@
-// Copyright 2017 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
+// Code generated by command: go run blamka_amd64.go -out ../blamka_amd64.s -pkg argon2. DO NOT EDIT.
//go:build amd64 && gc && !purego
#include "textflag.h"
-DATA ·c40<>+0x00(SB)/8, $0x0201000706050403
-DATA ·c40<>+0x08(SB)/8, $0x0a09080f0e0d0c0b
-GLOBL ·c40<>(SB), (NOPTR+RODATA), $16
-
-DATA ·c48<>+0x00(SB)/8, $0x0100070605040302
-DATA ·c48<>+0x08(SB)/8, $0x09080f0e0d0c0b0a
-GLOBL ·c48<>(SB), (NOPTR+RODATA), $16
-
-#define SHUFFLE(v2, v3, v4, v5, v6, v7, t1, t2) \
- MOVO v4, t1; \
- MOVO v5, v4; \
- MOVO t1, v5; \
- MOVO v6, t1; \
- PUNPCKLQDQ v6, t2; \
- PUNPCKHQDQ v7, v6; \
- PUNPCKHQDQ t2, v6; \
- PUNPCKLQDQ v7, t2; \
- MOVO t1, v7; \
- MOVO v2, t1; \
- PUNPCKHQDQ t2, v7; \
- PUNPCKLQDQ v3, t2; \
- PUNPCKHQDQ t2, v2; \
- PUNPCKLQDQ t1, t2; \
- PUNPCKHQDQ t2, v3
-
-#define SHUFFLE_INV(v2, v3, v4, v5, v6, v7, t1, t2) \
- MOVO v4, t1; \
- MOVO v5, v4; \
- MOVO t1, v5; \
- MOVO v2, t1; \
- PUNPCKLQDQ v2, t2; \
- PUNPCKHQDQ v3, v2; \
- PUNPCKHQDQ t2, v2; \
- PUNPCKLQDQ v3, t2; \
- MOVO t1, v3; \
- MOVO v6, t1; \
- PUNPCKHQDQ t2, v3; \
- PUNPCKLQDQ v7, t2; \
- PUNPCKHQDQ t2, v6; \
- PUNPCKLQDQ t1, t2; \
- PUNPCKHQDQ t2, v7
-
-#define HALF_ROUND(v0, v1, v2, v3, v4, v5, v6, v7, t0, c40, c48) \
- MOVO v0, t0; \
- PMULULQ v2, t0; \
- PADDQ v2, v0; \
- PADDQ t0, v0; \
- PADDQ t0, v0; \
- PXOR v0, v6; \
- PSHUFD $0xB1, v6, v6; \
- MOVO v4, t0; \
- PMULULQ v6, t0; \
- PADDQ v6, v4; \
- PADDQ t0, v4; \
- PADDQ t0, v4; \
- PXOR v4, v2; \
- PSHUFB c40, v2; \
- MOVO v0, t0; \
- PMULULQ v2, t0; \
- PADDQ v2, v0; \
- PADDQ t0, v0; \
- PADDQ t0, v0; \
- PXOR v0, v6; \
- PSHUFB c48, v6; \
- MOVO v4, t0; \
- PMULULQ v6, t0; \
- PADDQ v6, v4; \
- PADDQ t0, v4; \
- PADDQ t0, v4; \
- PXOR v4, v2; \
- MOVO v2, t0; \
- PADDQ v2, t0; \
- PSRLQ $63, v2; \
- PXOR t0, v2; \
- MOVO v1, t0; \
- PMULULQ v3, t0; \
- PADDQ v3, v1; \
- PADDQ t0, v1; \
- PADDQ t0, v1; \
- PXOR v1, v7; \
- PSHUFD $0xB1, v7, v7; \
- MOVO v5, t0; \
- PMULULQ v7, t0; \
- PADDQ v7, v5; \
- PADDQ t0, v5; \
- PADDQ t0, v5; \
- PXOR v5, v3; \
- PSHUFB c40, v3; \
- MOVO v1, t0; \
- PMULULQ v3, t0; \
- PADDQ v3, v1; \
- PADDQ t0, v1; \
- PADDQ t0, v1; \
- PXOR v1, v7; \
- PSHUFB c48, v7; \
- MOVO v5, t0; \
- PMULULQ v7, t0; \
- PADDQ v7, v5; \
- PADDQ t0, v5; \
- PADDQ t0, v5; \
- PXOR v5, v3; \
- MOVO v3, t0; \
- PADDQ v3, t0; \
- PSRLQ $63, v3; \
- PXOR t0, v3
-
-#define LOAD_MSG_0(block, off) \
- MOVOU 8*(off+0)(block), X0; \
- MOVOU 8*(off+2)(block), X1; \
- MOVOU 8*(off+4)(block), X2; \
- MOVOU 8*(off+6)(block), X3; \
- MOVOU 8*(off+8)(block), X4; \
- MOVOU 8*(off+10)(block), X5; \
- MOVOU 8*(off+12)(block), X6; \
- MOVOU 8*(off+14)(block), X7
-
-#define STORE_MSG_0(block, off) \
- MOVOU X0, 8*(off+0)(block); \
- MOVOU X1, 8*(off+2)(block); \
- MOVOU X2, 8*(off+4)(block); \
- MOVOU X3, 8*(off+6)(block); \
- MOVOU X4, 8*(off+8)(block); \
- MOVOU X5, 8*(off+10)(block); \
- MOVOU X6, 8*(off+12)(block); \
- MOVOU X7, 8*(off+14)(block)
-
-#define LOAD_MSG_1(block, off) \
- MOVOU 8*off+0*8(block), X0; \
- MOVOU 8*off+16*8(block), X1; \
- MOVOU 8*off+32*8(block), X2; \
- MOVOU 8*off+48*8(block), X3; \
- MOVOU 8*off+64*8(block), X4; \
- MOVOU 8*off+80*8(block), X5; \
- MOVOU 8*off+96*8(block), X6; \
- MOVOU 8*off+112*8(block), X7
-
-#define STORE_MSG_1(block, off) \
- MOVOU X0, 8*off+0*8(block); \
- MOVOU X1, 8*off+16*8(block); \
- MOVOU X2, 8*off+32*8(block); \
- MOVOU X3, 8*off+48*8(block); \
- MOVOU X4, 8*off+64*8(block); \
- MOVOU X5, 8*off+80*8(block); \
- MOVOU X6, 8*off+96*8(block); \
- MOVOU X7, 8*off+112*8(block)
-
-#define BLAMKA_ROUND_0(block, off, t0, t1, c40, c48) \
- LOAD_MSG_0(block, off); \
- HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, t0, c40, c48); \
- SHUFFLE(X2, X3, X4, X5, X6, X7, t0, t1); \
- HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, t0, c40, c48); \
- SHUFFLE_INV(X2, X3, X4, X5, X6, X7, t0, t1); \
- STORE_MSG_0(block, off)
-
-#define BLAMKA_ROUND_1(block, off, t0, t1, c40, c48) \
- LOAD_MSG_1(block, off); \
- HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, t0, c40, c48); \
- SHUFFLE(X2, X3, X4, X5, X6, X7, t0, t1); \
- HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, t0, c40, c48); \
- SHUFFLE_INV(X2, X3, X4, X5, X6, X7, t0, t1); \
- STORE_MSG_1(block, off)
-
// func blamkaSSE4(b *block)
-TEXT ·blamkaSSE4(SB), 4, $0-8
- MOVQ b+0(FP), AX
-
- MOVOU ·c40<>(SB), X10
- MOVOU ·c48<>(SB), X11
+// Requires: SSE2, SSSE3
+TEXT ·blamkaSSE4(SB), NOSPLIT, $0-8
+ MOVQ b+0(FP), AX
+ MOVOU ·c40<>+0(SB), X10
+ MOVOU ·c48<>+0(SB), X11
+ MOVOU (AX), X0
+ MOVOU 16(AX), X1
+ MOVOU 32(AX), X2
+ MOVOU 48(AX), X3
+ MOVOU 64(AX), X4
+ MOVOU 80(AX), X5
+ MOVOU 96(AX), X6
+ MOVOU 112(AX), X7
+ MOVO X0, X8
+ PMULULQ X2, X8
+ PADDQ X2, X0
+ PADDQ X8, X0
+ PADDQ X8, X0
+ PXOR X0, X6
+ PSHUFD $0xb1, X6, X6
+ MOVO X4, X8
+ PMULULQ X6, X8
+ PADDQ X6, X4
+ PADDQ X8, X4
+ PADDQ X8, X4
+ PXOR X4, X2
+ PSHUFB X10, X2
+ MOVO X0, X8
+ PMULULQ X2, X8
+ PADDQ X2, X0
+ PADDQ X8, X0
+ PADDQ X8, X0
+ PXOR X0, X6
+ PSHUFB X11, X6
+ MOVO X4, X8
+ PMULULQ X6, X8
+ PADDQ X6, X4
+ PADDQ X8, X4
+ PADDQ X8, X4
+ PXOR X4, X2
+ MOVO X2, X8
+ PADDQ X2, X8
+ PSRLQ $0x3f, X2
+ PXOR X8, X2
+ MOVO X1, X8
+ PMULULQ X3, X8
+ PADDQ X3, X1
+ PADDQ X8, X1
+ PADDQ X8, X1
+ PXOR X1, X7
+ PSHUFD $0xb1, X7, X7
+ MOVO X5, X8
+ PMULULQ X7, X8
+ PADDQ X7, X5
+ PADDQ X8, X5
+ PADDQ X8, X5
+ PXOR X5, X3
+ PSHUFB X10, X3
+ MOVO X1, X8
+ PMULULQ X3, X8
+ PADDQ X3, X1
+ PADDQ X8, X1
+ PADDQ X8, X1
+ PXOR X1, X7
+ PSHUFB X11, X7
+ MOVO X5, X8
+ PMULULQ X7, X8
+ PADDQ X7, X5
+ PADDQ X8, X5
+ PADDQ X8, X5
+ PXOR X5, X3
+ MOVO X3, X8
+ PADDQ X3, X8
+ PSRLQ $0x3f, X3
+ PXOR X8, X3
+ MOVO X4, X8
+ MOVO X5, X4
+ MOVO X8, X5
+ MOVO X6, X8
+ PUNPCKLQDQ X6, X9
+ PUNPCKHQDQ X7, X6
+ PUNPCKHQDQ X9, X6
+ PUNPCKLQDQ X7, X9
+ MOVO X8, X7
+ MOVO X2, X8
+ PUNPCKHQDQ X9, X7
+ PUNPCKLQDQ X3, X9
+ PUNPCKHQDQ X9, X2
+ PUNPCKLQDQ X8, X9
+ PUNPCKHQDQ X9, X3
+ MOVO X0, X8
+ PMULULQ X2, X8
+ PADDQ X2, X0
+ PADDQ X8, X0
+ PADDQ X8, X0
+ PXOR X0, X6
+ PSHUFD $0xb1, X6, X6
+ MOVO X4, X8
+ PMULULQ X6, X8
+ PADDQ X6, X4
+ PADDQ X8, X4
+ PADDQ X8, X4
+ PXOR X4, X2
+ PSHUFB X10, X2
+ MOVO X0, X8
+ PMULULQ X2, X8
+ PADDQ X2, X0
+ PADDQ X8, X0
+ PADDQ X8, X0
+ PXOR X0, X6
+ PSHUFB X11, X6
+ MOVO X4, X8
+ PMULULQ X6, X8
+ PADDQ X6, X4
+ PADDQ X8, X4
+ PADDQ X8, X4
+ PXOR X4, X2
+ MOVO X2, X8
+ PADDQ X2, X8
+ PSRLQ $0x3f, X2
+ PXOR X8, X2
+ MOVO X1, X8
+ PMULULQ X3, X8
+ PADDQ X3, X1
+ PADDQ X8, X1
+ PADDQ X8, X1
+ PXOR X1, X7
+ PSHUFD $0xb1, X7, X7
+ MOVO X5, X8
+ PMULULQ X7, X8
+ PADDQ X7, X5
+ PADDQ X8, X5
+ PADDQ X8, X5
+ PXOR X5, X3
+ PSHUFB X10, X3
+ MOVO X1, X8
+ PMULULQ X3, X8
+ PADDQ X3, X1
+ PADDQ X8, X1
+ PADDQ X8, X1
+ PXOR X1, X7
+ PSHUFB X11, X7
+ MOVO X5, X8
+ PMULULQ X7, X8
+ PADDQ X7, X5
+ PADDQ X8, X5
+ PADDQ X8, X5
+ PXOR X5, X3
+ MOVO X3, X8
+ PADDQ X3, X8
+ PSRLQ $0x3f, X3
+ PXOR X8, X3
+ MOVO X4, X8
+ MOVO X5, X4
+ MOVO X8, X5
+ MOVO X2, X8
+ PUNPCKLQDQ X2, X9
+ PUNPCKHQDQ X3, X2
+ PUNPCKHQDQ X9, X2
+ PUNPCKLQDQ X3, X9
+ MOVO X8, X3
+ MOVO X6, X8
+ PUNPCKHQDQ X9, X3
+ PUNPCKLQDQ X7, X9
+ PUNPCKHQDQ X9, X6
+ PUNPCKLQDQ X8, X9
+ PUNPCKHQDQ X9, X7
+ MOVOU X0, (AX)
+ MOVOU X1, 16(AX)
+ MOVOU X2, 32(AX)
+ MOVOU X3, 48(AX)
+ MOVOU X4, 64(AX)
+ MOVOU X5, 80(AX)
+ MOVOU X6, 96(AX)
+ MOVOU X7, 112(AX)
+ MOVOU 128(AX), X0
+ MOVOU 144(AX), X1
+ MOVOU 160(AX), X2
+ MOVOU 176(AX), X3
+ MOVOU 192(AX), X4
+ MOVOU 208(AX), X5
+ MOVOU 224(AX), X6
+ MOVOU 240(AX), X7
+ MOVO X0, X8
+ PMULULQ X2, X8
+ PADDQ X2, X0
+ PADDQ X8, X0
+ PADDQ X8, X0
+ PXOR X0, X6
+ PSHUFD $0xb1, X6, X6
+ MOVO X4, X8
+ PMULULQ X6, X8
+ PADDQ X6, X4
+ PADDQ X8, X4
+ PADDQ X8, X4
+ PXOR X4, X2
+ PSHUFB X10, X2
+ MOVO X0, X8
+ PMULULQ X2, X8
+ PADDQ X2, X0
+ PADDQ X8, X0
+ PADDQ X8, X0
+ PXOR X0, X6
+ PSHUFB X11, X6
+ MOVO X4, X8
+ PMULULQ X6, X8
+ PADDQ X6, X4
+ PADDQ X8, X4
+ PADDQ X8, X4
+ PXOR X4, X2
+ MOVO X2, X8
+ PADDQ X2, X8
+ PSRLQ $0x3f, X2
+ PXOR X8, X2
+ MOVO X1, X8
+ PMULULQ X3, X8
+ PADDQ X3, X1
+ PADDQ X8, X1
+ PADDQ X8, X1
+ PXOR X1, X7
+ PSHUFD $0xb1, X7, X7
+ MOVO X5, X8
+ PMULULQ X7, X8
+ PADDQ X7, X5
+ PADDQ X8, X5
+ PADDQ X8, X5
+ PXOR X5, X3
+ PSHUFB X10, X3
+ MOVO X1, X8
+ PMULULQ X3, X8
+ PADDQ X3, X1
+ PADDQ X8, X1
+ PADDQ X8, X1
+ PXOR X1, X7
+ PSHUFB X11, X7
+ MOVO X5, X8
+ PMULULQ X7, X8
+ PADDQ X7, X5
+ PADDQ X8, X5
+ PADDQ X8, X5
+ PXOR X5, X3
+ MOVO X3, X8
+ PADDQ X3, X8
+ PSRLQ $0x3f, X3
+ PXOR X8, X3
+ MOVO X4, X8
+ MOVO X5, X4
+ MOVO X8, X5
+ MOVO X6, X8
+ PUNPCKLQDQ X6, X9
+ PUNPCKHQDQ X7, X6
+ PUNPCKHQDQ X9, X6
+ PUNPCKLQDQ X7, X9
+ MOVO X8, X7
+ MOVO X2, X8
+ PUNPCKHQDQ X9, X7
+ PUNPCKLQDQ X3, X9
+ PUNPCKHQDQ X9, X2
+ PUNPCKLQDQ X8, X9
+ PUNPCKHQDQ X9, X3
+ MOVO X0, X8
+ PMULULQ X2, X8
+ PADDQ X2, X0
+ PADDQ X8, X0
+ PADDQ X8, X0
+ PXOR X0, X6
+ PSHUFD $0xb1, X6, X6
+ MOVO X4, X8
+ PMULULQ X6, X8
+ PADDQ X6, X4
+ PADDQ X8, X4
+ PADDQ X8, X4
+ PXOR X4, X2
+ PSHUFB X10, X2
+ MOVO X0, X8
+ PMULULQ X2, X8
+ PADDQ X2, X0
+ PADDQ X8, X0
+ PADDQ X8, X0
+ PXOR X0, X6
+ PSHUFB X11, X6
+ MOVO X4, X8
+ PMULULQ X6, X8
+ PADDQ X6, X4
+ PADDQ X8, X4
+ PADDQ X8, X4
+ PXOR X4, X2
+ MOVO X2, X8
+ PADDQ X2, X8
+ PSRLQ $0x3f, X2
+ PXOR X8, X2
+ MOVO X1, X8
+ PMULULQ X3, X8
+ PADDQ X3, X1
+ PADDQ X8, X1
+ PADDQ X8, X1
+ PXOR X1, X7
+ PSHUFD $0xb1, X7, X7
+ MOVO X5, X8
+ PMULULQ X7, X8
+ PADDQ X7, X5
+ PADDQ X8, X5
+ PADDQ X8, X5
+ PXOR X5, X3
+ PSHUFB X10, X3
+ MOVO X1, X8
+ PMULULQ X3, X8
+ PADDQ X3, X1
+ PADDQ X8, X1
+ PADDQ X8, X1
+ PXOR X1, X7
+ PSHUFB X11, X7
+ MOVO X5, X8
+ PMULULQ X7, X8
+ PADDQ X7, X5
+ PADDQ X8, X5
+ PADDQ X8, X5
+ PXOR X5, X3
+ MOVO X3, X8
+ PADDQ X3, X8
+ PSRLQ $0x3f, X3
+ PXOR X8, X3
+ MOVO X4, X8
+ MOVO X5, X4
+ MOVO X8, X5
+ MOVO X2, X8
+ PUNPCKLQDQ X2, X9
+ PUNPCKHQDQ X3, X2
+ PUNPCKHQDQ X9, X2
+ PUNPCKLQDQ X3, X9
+ MOVO X8, X3
+ MOVO X6, X8
+ PUNPCKHQDQ X9, X3
+ PUNPCKLQDQ X7, X9
+ PUNPCKHQDQ X9, X6
+ PUNPCKLQDQ X8, X9
+ PUNPCKHQDQ X9, X7
+ MOVOU X0, 128(AX)
+ MOVOU X1, 144(AX)
+ MOVOU X2, 160(AX)
+ MOVOU X3, 176(AX)
+ MOVOU X4, 192(AX)
+ MOVOU X5, 208(AX)
+ MOVOU X6, 224(AX)
+ MOVOU X7, 240(AX)
+ MOVOU 256(AX), X0
+ MOVOU 272(AX), X1
+ MOVOU 288(AX), X2
+ MOVOU 304(AX), X3
+ MOVOU 320(AX), X4
+ MOVOU 336(AX), X5
+ MOVOU 352(AX), X6
+ MOVOU 368(AX), X7
+ MOVO X0, X8
+ PMULULQ X2, X8
+ PADDQ X2, X0
+ PADDQ X8, X0
+ PADDQ X8, X0
+ PXOR X0, X6
+ PSHUFD $0xb1, X6, X6
+ MOVO X4, X8
+ PMULULQ X6, X8
+ PADDQ X6, X4
+ PADDQ X8, X4
+ PADDQ X8, X4
+ PXOR X4, X2
+ PSHUFB X10, X2
+ MOVO X0, X8
+ PMULULQ X2, X8
+ PADDQ X2, X0
+ PADDQ X8, X0
+ PADDQ X8, X0
+ PXOR X0, X6
+ PSHUFB X11, X6
+ MOVO X4, X8
+ PMULULQ X6, X8
+ PADDQ X6, X4
+ PADDQ X8, X4
+ PADDQ X8, X4
+ PXOR X4, X2
+ MOVO X2, X8
+ PADDQ X2, X8
+ PSRLQ $0x3f, X2
+ PXOR X8, X2
+ MOVO X1, X8
+ PMULULQ X3, X8
+ PADDQ X3, X1
+ PADDQ X8, X1
+ PADDQ X8, X1
+ PXOR X1, X7
+ PSHUFD $0xb1, X7, X7
+ MOVO X5, X8
+ PMULULQ X7, X8
+ PADDQ X7, X5
+ PADDQ X8, X5
+ PADDQ X8, X5
+ PXOR X5, X3
+ PSHUFB X10, X3
+ MOVO X1, X8
+ PMULULQ X3, X8
+ PADDQ X3, X1
+ PADDQ X8, X1
+ PADDQ X8, X1
+ PXOR X1, X7
+ PSHUFB X11, X7
+ MOVO X5, X8
+ PMULULQ X7, X8
+ PADDQ X7, X5
+ PADDQ X8, X5
+ PADDQ X8, X5
+ PXOR X5, X3
+ MOVO X3, X8
+ PADDQ X3, X8
+ PSRLQ $0x3f, X3
+ PXOR X8, X3
+ MOVO X4, X8
+ MOVO X5, X4
+ MOVO X8, X5
+ MOVO X6, X8
+ PUNPCKLQDQ X6, X9
+ PUNPCKHQDQ X7, X6
+ PUNPCKHQDQ X9, X6
+ PUNPCKLQDQ X7, X9
+ MOVO X8, X7
+ MOVO X2, X8
+ PUNPCKHQDQ X9, X7
+ PUNPCKLQDQ X3, X9
+ PUNPCKHQDQ X9, X2
+ PUNPCKLQDQ X8, X9
+ PUNPCKHQDQ X9, X3
+ MOVO X0, X8
+ PMULULQ X2, X8
+ PADDQ X2, X0
+ PADDQ X8, X0
+ PADDQ X8, X0
+ PXOR X0, X6
+ PSHUFD $0xb1, X6, X6
+ MOVO X4, X8
+ PMULULQ X6, X8
+ PADDQ X6, X4
+ PADDQ X8, X4
+ PADDQ X8, X4
+ PXOR X4, X2
+ PSHUFB X10, X2
+ MOVO X0, X8
+ PMULULQ X2, X8
+ PADDQ X2, X0
+ PADDQ X8, X0
+ PADDQ X8, X0
+ PXOR X0, X6
+ PSHUFB X11, X6
+ MOVO X4, X8
+ PMULULQ X6, X8
+ PADDQ X6, X4
+ PADDQ X8, X4
+ PADDQ X8, X4
+ PXOR X4, X2
+ MOVO X2, X8
+ PADDQ X2, X8
+ PSRLQ $0x3f, X2
+ PXOR X8, X2
+ MOVO X1, X8
+ PMULULQ X3, X8
+ PADDQ X3, X1
+ PADDQ X8, X1
+ PADDQ X8, X1
+ PXOR X1, X7
+ PSHUFD $0xb1, X7, X7
+ MOVO X5, X8
+ PMULULQ X7, X8
+ PADDQ X7, X5
+ PADDQ X8, X5
+ PADDQ X8, X5
+ PXOR X5, X3
+ PSHUFB X10, X3
+ MOVO X1, X8
+ PMULULQ X3, X8
+ PADDQ X3, X1
+ PADDQ X8, X1
+ PADDQ X8, X1
+ PXOR X1, X7
+ PSHUFB X11, X7
+ MOVO X5, X8
+ PMULULQ X7, X8
+ PADDQ X7, X5
+ PADDQ X8, X5
+ PADDQ X8, X5
+ PXOR X5, X3
+ MOVO X3, X8
+ PADDQ X3, X8
+ PSRLQ $0x3f, X3
+ PXOR X8, X3
+ MOVO X4, X8
+ MOVO X5, X4
+ MOVO X8, X5
+ MOVO X2, X8
+ PUNPCKLQDQ X2, X9
+ PUNPCKHQDQ X3, X2
+ PUNPCKHQDQ X9, X2
+ PUNPCKLQDQ X3, X9
+ MOVO X8, X3
+ MOVO X6, X8
+ PUNPCKHQDQ X9, X3
+ PUNPCKLQDQ X7, X9
+ PUNPCKHQDQ X9, X6
+ PUNPCKLQDQ X8, X9
+ PUNPCKHQDQ X9, X7
+ MOVOU X0, 256(AX)
+ MOVOU X1, 272(AX)
+ MOVOU X2, 288(AX)
+ MOVOU X3, 304(AX)
+ MOVOU X4, 320(AX)
+ MOVOU X5, 336(AX)
+ MOVOU X6, 352(AX)
+ MOVOU X7, 368(AX)
+ MOVOU 384(AX), X0
+ MOVOU 400(AX), X1
+ MOVOU 416(AX), X2
+ MOVOU 432(AX), X3
+ MOVOU 448(AX), X4
+ MOVOU 464(AX), X5
+ MOVOU 480(AX), X6
+ MOVOU 496(AX), X7
+ MOVO X0, X8
+ PMULULQ X2, X8
+ PADDQ X2, X0
+ PADDQ X8, X0
+ PADDQ X8, X0
+ PXOR X0, X6
+ PSHUFD $0xb1, X6, X6
+ MOVO X4, X8
+ PMULULQ X6, X8
+ PADDQ X6, X4
+ PADDQ X8, X4
+ PADDQ X8, X4
+ PXOR X4, X2
+ PSHUFB X10, X2
+ MOVO X0, X8
+ PMULULQ X2, X8
+ PADDQ X2, X0
+ PADDQ X8, X0
+ PADDQ X8, X0
+ PXOR X0, X6
+ PSHUFB X11, X6
+ MOVO X4, X8
+ PMULULQ X6, X8
+ PADDQ X6, X4
+ PADDQ X8, X4
+ PADDQ X8, X4
+ PXOR X4, X2
+ MOVO X2, X8
+ PADDQ X2, X8
+ PSRLQ $0x3f, X2
+ PXOR X8, X2
+ MOVO X1, X8
+ PMULULQ X3, X8
+ PADDQ X3, X1
+ PADDQ X8, X1
+ PADDQ X8, X1
+ PXOR X1, X7
+ PSHUFD $0xb1, X7, X7
+ MOVO X5, X8
+ PMULULQ X7, X8
+ PADDQ X7, X5
+ PADDQ X8, X5
+ PADDQ X8, X5
+ PXOR X5, X3
+ PSHUFB X10, X3
+ MOVO X1, X8
+ PMULULQ X3, X8
+ PADDQ X3, X1
+ PADDQ X8, X1
+ PADDQ X8, X1
+ PXOR X1, X7
+ PSHUFB X11, X7
+ MOVO X5, X8
+ PMULULQ X7, X8
+ PADDQ X7, X5
+ PADDQ X8, X5
+ PADDQ X8, X5
+ PXOR X5, X3
+ MOVO X3, X8
+ PADDQ X3, X8
+ PSRLQ $0x3f, X3
+ PXOR X8, X3
+ MOVO X4, X8
+ MOVO X5, X4
+ MOVO X8, X5
+ MOVO X6, X8
+ PUNPCKLQDQ X6, X9
+ PUNPCKHQDQ X7, X6
+ PUNPCKHQDQ X9, X6
+ PUNPCKLQDQ X7, X9
+ MOVO X8, X7
+ MOVO X2, X8
+ PUNPCKHQDQ X9, X7
+ PUNPCKLQDQ X3, X9
+ PUNPCKHQDQ X9, X2
+ PUNPCKLQDQ X8, X9
+ PUNPCKHQDQ X9, X3
+ MOVO X0, X8
+ PMULULQ X2, X8
+ PADDQ X2, X0
+ PADDQ X8, X0
+ PADDQ X8, X0
+ PXOR X0, X6
+ PSHUFD $0xb1, X6, X6
+ MOVO X4, X8
+ PMULULQ X6, X8
+ PADDQ X6, X4
+ PADDQ X8, X4
+ PADDQ X8, X4
+ PXOR X4, X2
+ PSHUFB X10, X2
+ MOVO X0, X8
+ PMULULQ X2, X8
+ PADDQ X2, X0
+ PADDQ X8, X0
+ PADDQ X8, X0
+ PXOR X0, X6
+ PSHUFB X11, X6
+ MOVO X4, X8
+ PMULULQ X6, X8
+ PADDQ X6, X4
+ PADDQ X8, X4
+ PADDQ X8, X4
+ PXOR X4, X2
+ MOVO X2, X8
+ PADDQ X2, X8
+ PSRLQ $0x3f, X2
+ PXOR X8, X2
+ MOVO X1, X8
+ PMULULQ X3, X8
+ PADDQ X3, X1
+ PADDQ X8, X1
+ PADDQ X8, X1
+ PXOR X1, X7
+ PSHUFD $0xb1, X7, X7
+ MOVO X5, X8
+ PMULULQ X7, X8
+ PADDQ X7, X5
+ PADDQ X8, X5
+ PADDQ X8, X5
+ PXOR X5, X3
+ PSHUFB X10, X3
+ MOVO X1, X8
+ PMULULQ X3, X8
+ PADDQ X3, X1
+ PADDQ X8, X1
+ PADDQ X8, X1
+ PXOR X1, X7
+ PSHUFB X11, X7
+ MOVO X5, X8
+ PMULULQ X7, X8
+ PADDQ X7, X5
+ PADDQ X8, X5
+ PADDQ X8, X5
+ PXOR X5, X3
+ MOVO X3, X8
+ PADDQ X3, X8
+ PSRLQ $0x3f, X3
+ PXOR X8, X3
+ MOVO X4, X8
+ MOVO X5, X4
+ MOVO X8, X5
+ MOVO X2, X8
+ PUNPCKLQDQ X2, X9
+ PUNPCKHQDQ X3, X2
+ PUNPCKHQDQ X9, X2
+ PUNPCKLQDQ X3, X9
+ MOVO X8, X3
+ MOVO X6, X8
+ PUNPCKHQDQ X9, X3
+ PUNPCKLQDQ X7, X9
+ PUNPCKHQDQ X9, X6
+ PUNPCKLQDQ X8, X9
+ PUNPCKHQDQ X9, X7
+ MOVOU X0, 384(AX)
+ MOVOU X1, 400(AX)
+ MOVOU X2, 416(AX)
+ MOVOU X3, 432(AX)
+ MOVOU X4, 448(AX)
+ MOVOU X5, 464(AX)
+ MOVOU X6, 480(AX)
+ MOVOU X7, 496(AX)
+ MOVOU 512(AX), X0
+ MOVOU 528(AX), X1
+ MOVOU 544(AX), X2
+ MOVOU 560(AX), X3
+ MOVOU 576(AX), X4
+ MOVOU 592(AX), X5
+ MOVOU 608(AX), X6
+ MOVOU 624(AX), X7
+ MOVO X0, X8
+ PMULULQ X2, X8
+ PADDQ X2, X0
+ PADDQ X8, X0
+ PADDQ X8, X0
+ PXOR X0, X6
+ PSHUFD $0xb1, X6, X6
+ MOVO X4, X8
+ PMULULQ X6, X8
+ PADDQ X6, X4
+ PADDQ X8, X4
+ PADDQ X8, X4
+ PXOR X4, X2
+ PSHUFB X10, X2
+ MOVO X0, X8
+ PMULULQ X2, X8
+ PADDQ X2, X0
+ PADDQ X8, X0
+ PADDQ X8, X0
+ PXOR X0, X6
+ PSHUFB X11, X6
+ MOVO X4, X8
+ PMULULQ X6, X8
+ PADDQ X6, X4
+ PADDQ X8, X4
+ PADDQ X8, X4
+ PXOR X4, X2
+ MOVO X2, X8
+ PADDQ X2, X8
+ PSRLQ $0x3f, X2
+ PXOR X8, X2
+ MOVO X1, X8
+ PMULULQ X3, X8
+ PADDQ X3, X1
+ PADDQ X8, X1
+ PADDQ X8, X1
+ PXOR X1, X7
+ PSHUFD $0xb1, X7, X7
+ MOVO X5, X8
+ PMULULQ X7, X8
+ PADDQ X7, X5
+ PADDQ X8, X5
+ PADDQ X8, X5
+ PXOR X5, X3
+ PSHUFB X10, X3
+ MOVO X1, X8
+ PMULULQ X3, X8
+ PADDQ X3, X1
+ PADDQ X8, X1
+ PADDQ X8, X1
+ PXOR X1, X7
+ PSHUFB X11, X7
+ MOVO X5, X8
+ PMULULQ X7, X8
+ PADDQ X7, X5
+ PADDQ X8, X5
+ PADDQ X8, X5
+ PXOR X5, X3
+ MOVO X3, X8
+ PADDQ X3, X8
+ PSRLQ $0x3f, X3
+ PXOR X8, X3
+ MOVO X4, X8
+ MOVO X5, X4
+ MOVO X8, X5
+ MOVO X6, X8
+ PUNPCKLQDQ X6, X9
+ PUNPCKHQDQ X7, X6
+ PUNPCKHQDQ X9, X6
+ PUNPCKLQDQ X7, X9
+ MOVO X8, X7
+ MOVO X2, X8
+ PUNPCKHQDQ X9, X7
+ PUNPCKLQDQ X3, X9
+ PUNPCKHQDQ X9, X2
+ PUNPCKLQDQ X8, X9
+ PUNPCKHQDQ X9, X3
+ MOVO X0, X8
+ PMULULQ X2, X8
+ PADDQ X2, X0
+ PADDQ X8, X0
+ PADDQ X8, X0
+ PXOR X0, X6
+ PSHUFD $0xb1, X6, X6
+ MOVO X4, X8
+ PMULULQ X6, X8
+ PADDQ X6, X4
+ PADDQ X8, X4
+ PADDQ X8, X4
+ PXOR X4, X2
+ PSHUFB X10, X2
+ MOVO X0, X8
+ PMULULQ X2, X8
+ PADDQ X2, X0
+ PADDQ X8, X0
+ PADDQ X8, X0
+ PXOR X0, X6
+ PSHUFB X11, X6
+ MOVO X4, X8
+ PMULULQ X6, X8
+ PADDQ X6, X4
+ PADDQ X8, X4
+ PADDQ X8, X4
+ PXOR X4, X2
+ MOVO X2, X8
+ PADDQ X2, X8
+ PSRLQ $0x3f, X2
+ PXOR X8, X2
+ MOVO X1, X8
+ PMULULQ X3, X8
+ PADDQ X3, X1
+ PADDQ X8, X1
+ PADDQ X8, X1
+ PXOR X1, X7
+ PSHUFD $0xb1, X7, X7
+ MOVO X5, X8
+ PMULULQ X7, X8
+ PADDQ X7, X5
+ PADDQ X8, X5
+ PADDQ X8, X5
+ PXOR X5, X3
+ PSHUFB X10, X3
+ MOVO X1, X8
+ PMULULQ X3, X8
+ PADDQ X3, X1
+ PADDQ X8, X1
+ PADDQ X8, X1
+ PXOR X1, X7
+ PSHUFB X11, X7
+ MOVO X5, X8
+ PMULULQ X7, X8
+ PADDQ X7, X5
+ PADDQ X8, X5
+ PADDQ X8, X5
+ PXOR X5, X3
+ MOVO X3, X8
+ PADDQ X3, X8
+ PSRLQ $0x3f, X3
+ PXOR X8, X3
+ MOVO X4, X8
+ MOVO X5, X4
+ MOVO X8, X5
+ MOVO X2, X8
+ PUNPCKLQDQ X2, X9
+ PUNPCKHQDQ X3, X2
+ PUNPCKHQDQ X9, X2
+ PUNPCKLQDQ X3, X9
+ MOVO X8, X3
+ MOVO X6, X8
+ PUNPCKHQDQ X9, X3
+ PUNPCKLQDQ X7, X9
+ PUNPCKHQDQ X9, X6
+ PUNPCKLQDQ X8, X9
+ PUNPCKHQDQ X9, X7
+ MOVOU X0, 512(AX)
+ MOVOU X1, 528(AX)
+ MOVOU X2, 544(AX)
+ MOVOU X3, 560(AX)
+ MOVOU X4, 576(AX)
+ MOVOU X5, 592(AX)
+ MOVOU X6, 608(AX)
+ MOVOU X7, 624(AX)
+ MOVOU 640(AX), X0
+ MOVOU 656(AX), X1
+ MOVOU 672(AX), X2
+ MOVOU 688(AX), X3
+ MOVOU 704(AX), X4
+ MOVOU 720(AX), X5
+ MOVOU 736(AX), X6
+ MOVOU 752(AX), X7
+ MOVO X0, X8
+ PMULULQ X2, X8
+ PADDQ X2, X0
+ PADDQ X8, X0
+ PADDQ X8, X0
+ PXOR X0, X6
+ PSHUFD $0xb1, X6, X6
+ MOVO X4, X8
+ PMULULQ X6, X8
+ PADDQ X6, X4
+ PADDQ X8, X4
+ PADDQ X8, X4
+ PXOR X4, X2
+ PSHUFB X10, X2
+ MOVO X0, X8
+ PMULULQ X2, X8
+ PADDQ X2, X0
+ PADDQ X8, X0
+ PADDQ X8, X0
+ PXOR X0, X6
+ PSHUFB X11, X6
+ MOVO X4, X8
+ PMULULQ X6, X8
+ PADDQ X6, X4
+ PADDQ X8, X4
+ PADDQ X8, X4
+ PXOR X4, X2
+ MOVO X2, X8
+ PADDQ X2, X8
+ PSRLQ $0x3f, X2
+ PXOR X8, X2
+ MOVO X1, X8
+ PMULULQ X3, X8
+ PADDQ X3, X1
+ PADDQ X8, X1
+ PADDQ X8, X1
+ PXOR X1, X7
+ PSHUFD $0xb1, X7, X7
+ MOVO X5, X8
+ PMULULQ X7, X8
+ PADDQ X7, X5
+ PADDQ X8, X5
+ PADDQ X8, X5
+ PXOR X5, X3
+ PSHUFB X10, X3
+ MOVO X1, X8
+ PMULULQ X3, X8
+ PADDQ X3, X1
+ PADDQ X8, X1
+ PADDQ X8, X1
+ PXOR X1, X7
+ PSHUFB X11, X7
+ MOVO X5, X8
+ PMULULQ X7, X8
+ PADDQ X7, X5
+ PADDQ X8, X5
+ PADDQ X8, X5
+ PXOR X5, X3
+ MOVO X3, X8
+ PADDQ X3, X8
+ PSRLQ $0x3f, X3
+ PXOR X8, X3
+ MOVO X4, X8
+ MOVO X5, X4
+ MOVO X8, X5
+ MOVO X6, X8
+ PUNPCKLQDQ X6, X9
+ PUNPCKHQDQ X7, X6
+ PUNPCKHQDQ X9, X6
+ PUNPCKLQDQ X7, X9
+ MOVO X8, X7
+ MOVO X2, X8
+ PUNPCKHQDQ X9, X7
+ PUNPCKLQDQ X3, X9
+ PUNPCKHQDQ X9, X2
+ PUNPCKLQDQ X8, X9
+ PUNPCKHQDQ X9, X3
+ MOVO X0, X8
+ PMULULQ X2, X8
+ PADDQ X2, X0
+ PADDQ X8, X0
+ PADDQ X8, X0
+ PXOR X0, X6
+ PSHUFD $0xb1, X6, X6
+ MOVO X4, X8
+ PMULULQ X6, X8
+ PADDQ X6, X4
+ PADDQ X8, X4
+ PADDQ X8, X4
+ PXOR X4, X2
+ PSHUFB X10, X2
+ MOVO X0, X8
+ PMULULQ X2, X8
+ PADDQ X2, X0
+ PADDQ X8, X0
+ PADDQ X8, X0
+ PXOR X0, X6
+ PSHUFB X11, X6
+ MOVO X4, X8
+ PMULULQ X6, X8
+ PADDQ X6, X4
+ PADDQ X8, X4
+ PADDQ X8, X4
+ PXOR X4, X2
+ MOVO X2, X8
+ PADDQ X2, X8
+ PSRLQ $0x3f, X2
+ PXOR X8, X2
+ MOVO X1, X8
+ PMULULQ X3, X8
+ PADDQ X3, X1
+ PADDQ X8, X1
+ PADDQ X8, X1
+ PXOR X1, X7
+ PSHUFD $0xb1, X7, X7
+ MOVO X5, X8
+ PMULULQ X7, X8
+ PADDQ X7, X5
+ PADDQ X8, X5
+ PADDQ X8, X5
+ PXOR X5, X3
+ PSHUFB X10, X3
+ MOVO X1, X8
+ PMULULQ X3, X8
+ PADDQ X3, X1
+ PADDQ X8, X1
+ PADDQ X8, X1
+ PXOR X1, X7
+ PSHUFB X11, X7
+ MOVO X5, X8
+ PMULULQ X7, X8
+ PADDQ X7, X5
+ PADDQ X8, X5
+ PADDQ X8, X5
+ PXOR X5, X3
+ MOVO X3, X8
+ PADDQ X3, X8
+ PSRLQ $0x3f, X3
+ PXOR X8, X3
+ MOVO X4, X8
+ MOVO X5, X4
+ MOVO X8, X5
+ MOVO X2, X8
+ PUNPCKLQDQ X2, X9
+ PUNPCKHQDQ X3, X2
+ PUNPCKHQDQ X9, X2
+ PUNPCKLQDQ X3, X9
+ MOVO X8, X3
+ MOVO X6, X8
+ PUNPCKHQDQ X9, X3
+ PUNPCKLQDQ X7, X9
+ PUNPCKHQDQ X9, X6
+ PUNPCKLQDQ X8, X9
+ PUNPCKHQDQ X9, X7
+ MOVOU X0, 640(AX)
+ MOVOU X1, 656(AX)
+ MOVOU X2, 672(AX)
+ MOVOU X3, 688(AX)
+ MOVOU X4, 704(AX)
+ MOVOU X5, 720(AX)
+ MOVOU X6, 736(AX)
+ MOVOU X7, 752(AX)
+ MOVOU 768(AX), X0
+ MOVOU 784(AX), X1
+ MOVOU 800(AX), X2
+ MOVOU 816(AX), X3
+ MOVOU 832(AX), X4
+ MOVOU 848(AX), X5
+ MOVOU 864(AX), X6
+ MOVOU 880(AX), X7
+ MOVO X0, X8
+ PMULULQ X2, X8
+ PADDQ X2, X0
+ PADDQ X8, X0
+ PADDQ X8, X0
+ PXOR X0, X6
+ PSHUFD $0xb1, X6, X6
+ MOVO X4, X8
+ PMULULQ X6, X8
+ PADDQ X6, X4
+ PADDQ X8, X4
+ PADDQ X8, X4
+ PXOR X4, X2
+ PSHUFB X10, X2
+ MOVO X0, X8
+ PMULULQ X2, X8
+ PADDQ X2, X0
+ PADDQ X8, X0
+ PADDQ X8, X0
+ PXOR X0, X6
+ PSHUFB X11, X6
+ MOVO X4, X8
+ PMULULQ X6, X8
+ PADDQ X6, X4
+ PADDQ X8, X4
+ PADDQ X8, X4
+ PXOR X4, X2
+ MOVO X2, X8
+ PADDQ X2, X8
+ PSRLQ $0x3f, X2
+ PXOR X8, X2
+ MOVO X1, X8
+ PMULULQ X3, X8
+ PADDQ X3, X1
+ PADDQ X8, X1
+ PADDQ X8, X1
+ PXOR X1, X7
+ PSHUFD $0xb1, X7, X7
+ MOVO X5, X8
+ PMULULQ X7, X8
+ PADDQ X7, X5
+ PADDQ X8, X5
+ PADDQ X8, X5
+ PXOR X5, X3
+ PSHUFB X10, X3
+ MOVO X1, X8
+ PMULULQ X3, X8
+ PADDQ X3, X1
+ PADDQ X8, X1
+ PADDQ X8, X1
+ PXOR X1, X7
+ PSHUFB X11, X7
+ MOVO X5, X8
+ PMULULQ X7, X8
+ PADDQ X7, X5
+ PADDQ X8, X5
+ PADDQ X8, X5
+ PXOR X5, X3
+ MOVO X3, X8
+ PADDQ X3, X8
+ PSRLQ $0x3f, X3
+ PXOR X8, X3
+ MOVO X4, X8
+ MOVO X5, X4
+ MOVO X8, X5
+ MOVO X6, X8
+ PUNPCKLQDQ X6, X9
+ PUNPCKHQDQ X7, X6
+ PUNPCKHQDQ X9, X6
+ PUNPCKLQDQ X7, X9
+ MOVO X8, X7
+ MOVO X2, X8
+ PUNPCKHQDQ X9, X7
+ PUNPCKLQDQ X3, X9
+ PUNPCKHQDQ X9, X2
+ PUNPCKLQDQ X8, X9
+ PUNPCKHQDQ X9, X3
+ MOVO X0, X8
+ PMULULQ X2, X8
+ PADDQ X2, X0
+ PADDQ X8, X0
+ PADDQ X8, X0
+ PXOR X0, X6
+ PSHUFD $0xb1, X6, X6
+ MOVO X4, X8
+ PMULULQ X6, X8
+ PADDQ X6, X4
+ PADDQ X8, X4
+ PADDQ X8, X4
+ PXOR X4, X2
+ PSHUFB X10, X2
+ MOVO X0, X8
+ PMULULQ X2, X8
+ PADDQ X2, X0
+ PADDQ X8, X0
+ PADDQ X8, X0
+ PXOR X0, X6
+ PSHUFB X11, X6
+ MOVO X4, X8
+ PMULULQ X6, X8
+ PADDQ X6, X4
+ PADDQ X8, X4
+ PADDQ X8, X4
+ PXOR X4, X2
+ MOVO X2, X8
+ PADDQ X2, X8
+ PSRLQ $0x3f, X2
+ PXOR X8, X2
+ MOVO X1, X8
+ PMULULQ X3, X8
+ PADDQ X3, X1
+ PADDQ X8, X1
+ PADDQ X8, X1
+ PXOR X1, X7
+ PSHUFD $0xb1, X7, X7
+ MOVO X5, X8
+ PMULULQ X7, X8
+ PADDQ X7, X5
+ PADDQ X8, X5
+ PADDQ X8, X5
+ PXOR X5, X3
+ PSHUFB X10, X3
+ MOVO X1, X8
+ PMULULQ X3, X8
+ PADDQ X3, X1
+ PADDQ X8, X1
+ PADDQ X8, X1
+ PXOR X1, X7
+ PSHUFB X11, X7
+ MOVO X5, X8
+ PMULULQ X7, X8
+ PADDQ X7, X5
+ PADDQ X8, X5
+ PADDQ X8, X5
+ PXOR X5, X3
+ MOVO X3, X8
+ PADDQ X3, X8
+ PSRLQ $0x3f, X3
+ PXOR X8, X3
+ MOVO X4, X8
+ MOVO X5, X4
+ MOVO X8, X5
+ MOVO X2, X8
+ PUNPCKLQDQ X2, X9
+ PUNPCKHQDQ X3, X2
+ PUNPCKHQDQ X9, X2
+ PUNPCKLQDQ X3, X9
+ MOVO X8, X3
+ MOVO X6, X8
+ PUNPCKHQDQ X9, X3
+ PUNPCKLQDQ X7, X9
+ PUNPCKHQDQ X9, X6
+ PUNPCKLQDQ X8, X9
+ PUNPCKHQDQ X9, X7
+ MOVOU X0, 768(AX)
+ MOVOU X1, 784(AX)
+ MOVOU X2, 800(AX)
+ MOVOU X3, 816(AX)
+ MOVOU X4, 832(AX)
+ MOVOU X5, 848(AX)
+ MOVOU X6, 864(AX)
+ MOVOU X7, 880(AX)
+ MOVOU 896(AX), X0
+ MOVOU 912(AX), X1
+ MOVOU 928(AX), X2
+ MOVOU 944(AX), X3
+ MOVOU 960(AX), X4
+ MOVOU 976(AX), X5
+ MOVOU 992(AX), X6
+ MOVOU 1008(AX), X7
+ MOVO X0, X8
+ PMULULQ X2, X8
+ PADDQ X2, X0
+ PADDQ X8, X0
+ PADDQ X8, X0
+ PXOR X0, X6
+ PSHUFD $0xb1, X6, X6
+ MOVO X4, X8
+ PMULULQ X6, X8
+ PADDQ X6, X4
+ PADDQ X8, X4
+ PADDQ X8, X4
+ PXOR X4, X2
+ PSHUFB X10, X2
+ MOVO X0, X8
+ PMULULQ X2, X8
+ PADDQ X2, X0
+ PADDQ X8, X0
+ PADDQ X8, X0
+ PXOR X0, X6
+ PSHUFB X11, X6
+ MOVO X4, X8
+ PMULULQ X6, X8
+ PADDQ X6, X4
+ PADDQ X8, X4
+ PADDQ X8, X4
+ PXOR X4, X2
+ MOVO X2, X8
+ PADDQ X2, X8
+ PSRLQ $0x3f, X2
+ PXOR X8, X2
+ MOVO X1, X8
+ PMULULQ X3, X8
+ PADDQ X3, X1
+ PADDQ X8, X1
+ PADDQ X8, X1
+ PXOR X1, X7
+ PSHUFD $0xb1, X7, X7
+ MOVO X5, X8
+ PMULULQ X7, X8
+ PADDQ X7, X5
+ PADDQ X8, X5
+ PADDQ X8, X5
+ PXOR X5, X3
+ PSHUFB X10, X3
+ MOVO X1, X8
+ PMULULQ X3, X8
+ PADDQ X3, X1
+ PADDQ X8, X1
+ PADDQ X8, X1
+ PXOR X1, X7
+ PSHUFB X11, X7
+ MOVO X5, X8
+ PMULULQ X7, X8
+ PADDQ X7, X5
+ PADDQ X8, X5
+ PADDQ X8, X5
+ PXOR X5, X3
+ MOVO X3, X8
+ PADDQ X3, X8
+ PSRLQ $0x3f, X3
+ PXOR X8, X3
+ MOVO X4, X8
+ MOVO X5, X4
+ MOVO X8, X5
+ MOVO X6, X8
+ PUNPCKLQDQ X6, X9
+ PUNPCKHQDQ X7, X6
+ PUNPCKHQDQ X9, X6
+ PUNPCKLQDQ X7, X9
+ MOVO X8, X7
+ MOVO X2, X8
+ PUNPCKHQDQ X9, X7
+ PUNPCKLQDQ X3, X9
+ PUNPCKHQDQ X9, X2
+ PUNPCKLQDQ X8, X9
+ PUNPCKHQDQ X9, X3
+ MOVO X0, X8
+ PMULULQ X2, X8
+ PADDQ X2, X0
+ PADDQ X8, X0
+ PADDQ X8, X0
+ PXOR X0, X6
+ PSHUFD $0xb1, X6, X6
+ MOVO X4, X8
+ PMULULQ X6, X8
+ PADDQ X6, X4
+ PADDQ X8, X4
+ PADDQ X8, X4
+ PXOR X4, X2
+ PSHUFB X10, X2
+ MOVO X0, X8
+ PMULULQ X2, X8
+ PADDQ X2, X0
+ PADDQ X8, X0
+ PADDQ X8, X0
+ PXOR X0, X6
+ PSHUFB X11, X6
+ MOVO X4, X8
+ PMULULQ X6, X8
+ PADDQ X6, X4
+ PADDQ X8, X4
+ PADDQ X8, X4
+ PXOR X4, X2
+ MOVO X2, X8
+ PADDQ X2, X8
+ PSRLQ $0x3f, X2
+ PXOR X8, X2
+ MOVO X1, X8
+ PMULULQ X3, X8
+ PADDQ X3, X1
+ PADDQ X8, X1
+ PADDQ X8, X1
+ PXOR X1, X7
+ PSHUFD $0xb1, X7, X7
+ MOVO X5, X8
+ PMULULQ X7, X8
+ PADDQ X7, X5
+ PADDQ X8, X5
+ PADDQ X8, X5
+ PXOR X5, X3
+ PSHUFB X10, X3
+ MOVO X1, X8
+ PMULULQ X3, X8
+ PADDQ X3, X1
+ PADDQ X8, X1
+ PADDQ X8, X1
+ PXOR X1, X7
+ PSHUFB X11, X7
+ MOVO X5, X8
+ PMULULQ X7, X8
+ PADDQ X7, X5
+ PADDQ X8, X5
+ PADDQ X8, X5
+ PXOR X5, X3
+ MOVO X3, X8
+ PADDQ X3, X8
+ PSRLQ $0x3f, X3
+ PXOR X8, X3
+ MOVO X4, X8
+ MOVO X5, X4
+ MOVO X8, X5
+ MOVO X2, X8
+ PUNPCKLQDQ X2, X9
+ PUNPCKHQDQ X3, X2
+ PUNPCKHQDQ X9, X2
+ PUNPCKLQDQ X3, X9
+ MOVO X8, X3
+ MOVO X6, X8
+ PUNPCKHQDQ X9, X3
+ PUNPCKLQDQ X7, X9
+ PUNPCKHQDQ X9, X6
+ PUNPCKLQDQ X8, X9
+ PUNPCKHQDQ X9, X7
+ MOVOU X0, 896(AX)
+ MOVOU X1, 912(AX)
+ MOVOU X2, 928(AX)
+ MOVOU X3, 944(AX)
+ MOVOU X4, 960(AX)
+ MOVOU X5, 976(AX)
+ MOVOU X6, 992(AX)
+ MOVOU X7, 1008(AX)
+ MOVOU (AX), X0
+ MOVOU 128(AX), X1
+ MOVOU 256(AX), X2
+ MOVOU 384(AX), X3
+ MOVOU 512(AX), X4
+ MOVOU 640(AX), X5
+ MOVOU 768(AX), X6
+ MOVOU 896(AX), X7
+ MOVO X0, X8
+ PMULULQ X2, X8
+ PADDQ X2, X0
+ PADDQ X8, X0
+ PADDQ X8, X0
+ PXOR X0, X6
+ PSHUFD $0xb1, X6, X6
+ MOVO X4, X8
+ PMULULQ X6, X8
+ PADDQ X6, X4
+ PADDQ X8, X4
+ PADDQ X8, X4
+ PXOR X4, X2
+ PSHUFB X10, X2
+ MOVO X0, X8
+ PMULULQ X2, X8
+ PADDQ X2, X0
+ PADDQ X8, X0
+ PADDQ X8, X0
+ PXOR X0, X6
+ PSHUFB X11, X6
+ MOVO X4, X8
+ PMULULQ X6, X8
+ PADDQ X6, X4
+ PADDQ X8, X4
+ PADDQ X8, X4
+ PXOR X4, X2
+ MOVO X2, X8
+ PADDQ X2, X8
+ PSRLQ $0x3f, X2
+ PXOR X8, X2
+ MOVO X1, X8
+ PMULULQ X3, X8
+ PADDQ X3, X1
+ PADDQ X8, X1
+ PADDQ X8, X1
+ PXOR X1, X7
+ PSHUFD $0xb1, X7, X7
+ MOVO X5, X8
+ PMULULQ X7, X8
+ PADDQ X7, X5
+ PADDQ X8, X5
+ PADDQ X8, X5
+ PXOR X5, X3
+ PSHUFB X10, X3
+ MOVO X1, X8
+ PMULULQ X3, X8
+ PADDQ X3, X1
+ PADDQ X8, X1
+ PADDQ X8, X1
+ PXOR X1, X7
+ PSHUFB X11, X7
+ MOVO X5, X8
+ PMULULQ X7, X8
+ PADDQ X7, X5
+ PADDQ X8, X5
+ PADDQ X8, X5
+ PXOR X5, X3
+ MOVO X3, X8
+ PADDQ X3, X8
+ PSRLQ $0x3f, X3
+ PXOR X8, X3
+ MOVO X4, X8
+ MOVO X5, X4
+ MOVO X8, X5
+ MOVO X6, X8
+ PUNPCKLQDQ X6, X9
+ PUNPCKHQDQ X7, X6
+ PUNPCKHQDQ X9, X6
+ PUNPCKLQDQ X7, X9
+ MOVO X8, X7
+ MOVO X2, X8
+ PUNPCKHQDQ X9, X7
+ PUNPCKLQDQ X3, X9
+ PUNPCKHQDQ X9, X2
+ PUNPCKLQDQ X8, X9
+ PUNPCKHQDQ X9, X3
+ MOVO X0, X8
+ PMULULQ X2, X8
+ PADDQ X2, X0
+ PADDQ X8, X0
+ PADDQ X8, X0
+ PXOR X0, X6
+ PSHUFD $0xb1, X6, X6
+ MOVO X4, X8
+ PMULULQ X6, X8
+ PADDQ X6, X4
+ PADDQ X8, X4
+ PADDQ X8, X4
+ PXOR X4, X2
+ PSHUFB X10, X2
+ MOVO X0, X8
+ PMULULQ X2, X8
+ PADDQ X2, X0
+ PADDQ X8, X0
+ PADDQ X8, X0
+ PXOR X0, X6
+ PSHUFB X11, X6
+ MOVO X4, X8
+ PMULULQ X6, X8
+ PADDQ X6, X4
+ PADDQ X8, X4
+ PADDQ X8, X4
+ PXOR X4, X2
+ MOVO X2, X8
+ PADDQ X2, X8
+ PSRLQ $0x3f, X2
+ PXOR X8, X2
+ MOVO X1, X8
+ PMULULQ X3, X8
+ PADDQ X3, X1
+ PADDQ X8, X1
+ PADDQ X8, X1
+ PXOR X1, X7
+ PSHUFD $0xb1, X7, X7
+ MOVO X5, X8
+ PMULULQ X7, X8
+ PADDQ X7, X5
+ PADDQ X8, X5
+ PADDQ X8, X5
+ PXOR X5, X3
+ PSHUFB X10, X3
+ MOVO X1, X8
+ PMULULQ X3, X8
+ PADDQ X3, X1
+ PADDQ X8, X1
+ PADDQ X8, X1
+ PXOR X1, X7
+ PSHUFB X11, X7
+ MOVO X5, X8
+ PMULULQ X7, X8
+ PADDQ X7, X5
+ PADDQ X8, X5
+ PADDQ X8, X5
+ PXOR X5, X3
+ MOVO X3, X8
+ PADDQ X3, X8
+ PSRLQ $0x3f, X3
+ PXOR X8, X3
+ MOVO X4, X8
+ MOVO X5, X4
+ MOVO X8, X5
+ MOVO X2, X8
+ PUNPCKLQDQ X2, X9
+ PUNPCKHQDQ X3, X2
+ PUNPCKHQDQ X9, X2
+ PUNPCKLQDQ X3, X9
+ MOVO X8, X3
+ MOVO X6, X8
+ PUNPCKHQDQ X9, X3
+ PUNPCKLQDQ X7, X9
+ PUNPCKHQDQ X9, X6
+ PUNPCKLQDQ X8, X9
+ PUNPCKHQDQ X9, X7
+ MOVOU X0, (AX)
+ MOVOU X1, 128(AX)
+ MOVOU X2, 256(AX)
+ MOVOU X3, 384(AX)
+ MOVOU X4, 512(AX)
+ MOVOU X5, 640(AX)
+ MOVOU X6, 768(AX)
+ MOVOU X7, 896(AX)
+ MOVOU 16(AX), X0
+ MOVOU 144(AX), X1
+ MOVOU 272(AX), X2
+ MOVOU 400(AX), X3
+ MOVOU 528(AX), X4
+ MOVOU 656(AX), X5
+ MOVOU 784(AX), X6
+ MOVOU 912(AX), X7
+ MOVO X0, X8
+ PMULULQ X2, X8
+ PADDQ X2, X0
+ PADDQ X8, X0
+ PADDQ X8, X0
+ PXOR X0, X6
+ PSHUFD $0xb1, X6, X6
+ MOVO X4, X8
+ PMULULQ X6, X8
+ PADDQ X6, X4
+ PADDQ X8, X4
+ PADDQ X8, X4
+ PXOR X4, X2
+ PSHUFB X10, X2
+ MOVO X0, X8
+ PMULULQ X2, X8
+ PADDQ X2, X0
+ PADDQ X8, X0
+ PADDQ X8, X0
+ PXOR X0, X6
+ PSHUFB X11, X6
+ MOVO X4, X8
+ PMULULQ X6, X8
+ PADDQ X6, X4
+ PADDQ X8, X4
+ PADDQ X8, X4
+ PXOR X4, X2
+ MOVO X2, X8
+ PADDQ X2, X8
+ PSRLQ $0x3f, X2
+ PXOR X8, X2
+ MOVO X1, X8
+ PMULULQ X3, X8
+ PADDQ X3, X1
+ PADDQ X8, X1
+ PADDQ X8, X1
+ PXOR X1, X7
+ PSHUFD $0xb1, X7, X7
+ MOVO X5, X8
+ PMULULQ X7, X8
+ PADDQ X7, X5
+ PADDQ X8, X5
+ PADDQ X8, X5
+ PXOR X5, X3
+ PSHUFB X10, X3
+ MOVO X1, X8
+ PMULULQ X3, X8
+ PADDQ X3, X1
+ PADDQ X8, X1
+ PADDQ X8, X1
+ PXOR X1, X7
+ PSHUFB X11, X7
+ MOVO X5, X8
+ PMULULQ X7, X8
+ PADDQ X7, X5
+ PADDQ X8, X5
+ PADDQ X8, X5
+ PXOR X5, X3
+ MOVO X3, X8
+ PADDQ X3, X8
+ PSRLQ $0x3f, X3
+ PXOR X8, X3
+ MOVO X4, X8
+ MOVO X5, X4
+ MOVO X8, X5
+ MOVO X6, X8
+ PUNPCKLQDQ X6, X9
+ PUNPCKHQDQ X7, X6
+ PUNPCKHQDQ X9, X6
+ PUNPCKLQDQ X7, X9
+ MOVO X8, X7
+ MOVO X2, X8
+ PUNPCKHQDQ X9, X7
+ PUNPCKLQDQ X3, X9
+ PUNPCKHQDQ X9, X2
+ PUNPCKLQDQ X8, X9
+ PUNPCKHQDQ X9, X3
+ MOVO X0, X8
+ PMULULQ X2, X8
+ PADDQ X2, X0
+ PADDQ X8, X0
+ PADDQ X8, X0
+ PXOR X0, X6
+ PSHUFD $0xb1, X6, X6
+ MOVO X4, X8
+ PMULULQ X6, X8
+ PADDQ X6, X4
+ PADDQ X8, X4
+ PADDQ X8, X4
+ PXOR X4, X2
+ PSHUFB X10, X2
+ MOVO X0, X8
+ PMULULQ X2, X8
+ PADDQ X2, X0
+ PADDQ X8, X0
+ PADDQ X8, X0
+ PXOR X0, X6
+ PSHUFB X11, X6
+ MOVO X4, X8
+ PMULULQ X6, X8
+ PADDQ X6, X4
+ PADDQ X8, X4
+ PADDQ X8, X4
+ PXOR X4, X2
+ MOVO X2, X8
+ PADDQ X2, X8
+ PSRLQ $0x3f, X2
+ PXOR X8, X2
+ MOVO X1, X8
+ PMULULQ X3, X8
+ PADDQ X3, X1
+ PADDQ X8, X1
+ PADDQ X8, X1
+ PXOR X1, X7
+ PSHUFD $0xb1, X7, X7
+ MOVO X5, X8
+ PMULULQ X7, X8
+ PADDQ X7, X5
+ PADDQ X8, X5
+ PADDQ X8, X5
+ PXOR X5, X3
+ PSHUFB X10, X3
+ MOVO X1, X8
+ PMULULQ X3, X8
+ PADDQ X3, X1
+ PADDQ X8, X1
+ PADDQ X8, X1
+ PXOR X1, X7
+ PSHUFB X11, X7
+ MOVO X5, X8
+ PMULULQ X7, X8
+ PADDQ X7, X5
+ PADDQ X8, X5
+ PADDQ X8, X5
+ PXOR X5, X3
+ MOVO X3, X8
+ PADDQ X3, X8
+ PSRLQ $0x3f, X3
+ PXOR X8, X3
+ MOVO X4, X8
+ MOVO X5, X4
+ MOVO X8, X5
+ MOVO X2, X8
+ PUNPCKLQDQ X2, X9
+ PUNPCKHQDQ X3, X2
+ PUNPCKHQDQ X9, X2
+ PUNPCKLQDQ X3, X9
+ MOVO X8, X3
+ MOVO X6, X8
+ PUNPCKHQDQ X9, X3
+ PUNPCKLQDQ X7, X9
+ PUNPCKHQDQ X9, X6
+ PUNPCKLQDQ X8, X9
+ PUNPCKHQDQ X9, X7
+ MOVOU X0, 16(AX)
+ MOVOU X1, 144(AX)
+ MOVOU X2, 272(AX)
+ MOVOU X3, 400(AX)
+ MOVOU X4, 528(AX)
+ MOVOU X5, 656(AX)
+ MOVOU X6, 784(AX)
+ MOVOU X7, 912(AX)
+ MOVOU 32(AX), X0
+ MOVOU 160(AX), X1
+ MOVOU 288(AX), X2
+ MOVOU 416(AX), X3
+ MOVOU 544(AX), X4
+ MOVOU 672(AX), X5
+ MOVOU 800(AX), X6
+ MOVOU 928(AX), X7
+ MOVO X0, X8
+ PMULULQ X2, X8
+ PADDQ X2, X0
+ PADDQ X8, X0
+ PADDQ X8, X0
+ PXOR X0, X6
+ PSHUFD $0xb1, X6, X6
+ MOVO X4, X8
+ PMULULQ X6, X8
+ PADDQ X6, X4
+ PADDQ X8, X4
+ PADDQ X8, X4
+ PXOR X4, X2
+ PSHUFB X10, X2
+ MOVO X0, X8
+ PMULULQ X2, X8
+ PADDQ X2, X0
+ PADDQ X8, X0
+ PADDQ X8, X0
+ PXOR X0, X6
+ PSHUFB X11, X6
+ MOVO X4, X8
+ PMULULQ X6, X8
+ PADDQ X6, X4
+ PADDQ X8, X4
+ PADDQ X8, X4
+ PXOR X4, X2
+ MOVO X2, X8
+ PADDQ X2, X8
+ PSRLQ $0x3f, X2
+ PXOR X8, X2
+ MOVO X1, X8
+ PMULULQ X3, X8
+ PADDQ X3, X1
+ PADDQ X8, X1
+ PADDQ X8, X1
+ PXOR X1, X7
+ PSHUFD $0xb1, X7, X7
+ MOVO X5, X8
+ PMULULQ X7, X8
+ PADDQ X7, X5
+ PADDQ X8, X5
+ PADDQ X8, X5
+ PXOR X5, X3
+ PSHUFB X10, X3
+ MOVO X1, X8
+ PMULULQ X3, X8
+ PADDQ X3, X1
+ PADDQ X8, X1
+ PADDQ X8, X1
+ PXOR X1, X7
+ PSHUFB X11, X7
+ MOVO X5, X8
+ PMULULQ X7, X8
+ PADDQ X7, X5
+ PADDQ X8, X5
+ PADDQ X8, X5
+ PXOR X5, X3
+ MOVO X3, X8
+ PADDQ X3, X8
+ PSRLQ $0x3f, X3
+ PXOR X8, X3
+ MOVO X4, X8
+ MOVO X5, X4
+ MOVO X8, X5
+ MOVO X6, X8
+ PUNPCKLQDQ X6, X9
+ PUNPCKHQDQ X7, X6
+ PUNPCKHQDQ X9, X6
+ PUNPCKLQDQ X7, X9
+ MOVO X8, X7
+ MOVO X2, X8
+ PUNPCKHQDQ X9, X7
+ PUNPCKLQDQ X3, X9
+ PUNPCKHQDQ X9, X2
+ PUNPCKLQDQ X8, X9
+ PUNPCKHQDQ X9, X3
+ MOVO X0, X8
+ PMULULQ X2, X8
+ PADDQ X2, X0
+ PADDQ X8, X0
+ PADDQ X8, X0
+ PXOR X0, X6
+ PSHUFD $0xb1, X6, X6
+ MOVO X4, X8
+ PMULULQ X6, X8
+ PADDQ X6, X4
+ PADDQ X8, X4
+ PADDQ X8, X4
+ PXOR X4, X2
+ PSHUFB X10, X2
+ MOVO X0, X8
+ PMULULQ X2, X8
+ PADDQ X2, X0
+ PADDQ X8, X0
+ PADDQ X8, X0
+ PXOR X0, X6
+ PSHUFB X11, X6
+ MOVO X4, X8
+ PMULULQ X6, X8
+ PADDQ X6, X4
+ PADDQ X8, X4
+ PADDQ X8, X4
+ PXOR X4, X2
+ MOVO X2, X8
+ PADDQ X2, X8
+ PSRLQ $0x3f, X2
+ PXOR X8, X2
+ MOVO X1, X8
+ PMULULQ X3, X8
+ PADDQ X3, X1
+ PADDQ X8, X1
+ PADDQ X8, X1
+ PXOR X1, X7
+ PSHUFD $0xb1, X7, X7
+ MOVO X5, X8
+ PMULULQ X7, X8
+ PADDQ X7, X5
+ PADDQ X8, X5
+ PADDQ X8, X5
+ PXOR X5, X3
+ PSHUFB X10, X3
+ MOVO X1, X8
+ PMULULQ X3, X8
+ PADDQ X3, X1
+ PADDQ X8, X1
+ PADDQ X8, X1
+ PXOR X1, X7
+ PSHUFB X11, X7
+ MOVO X5, X8
+ PMULULQ X7, X8
+ PADDQ X7, X5
+ PADDQ X8, X5
+ PADDQ X8, X5
+ PXOR X5, X3
+ MOVO X3, X8
+ PADDQ X3, X8
+ PSRLQ $0x3f, X3
+ PXOR X8, X3
+ MOVO X4, X8
+ MOVO X5, X4
+ MOVO X8, X5
+ MOVO X2, X8
+ PUNPCKLQDQ X2, X9
+ PUNPCKHQDQ X3, X2
+ PUNPCKHQDQ X9, X2
+ PUNPCKLQDQ X3, X9
+ MOVO X8, X3
+ MOVO X6, X8
+ PUNPCKHQDQ X9, X3
+ PUNPCKLQDQ X7, X9
+ PUNPCKHQDQ X9, X6
+ PUNPCKLQDQ X8, X9
+ PUNPCKHQDQ X9, X7
+ MOVOU X0, 32(AX)
+ MOVOU X1, 160(AX)
+ MOVOU X2, 288(AX)
+ MOVOU X3, 416(AX)
+ MOVOU X4, 544(AX)
+ MOVOU X5, 672(AX)
+ MOVOU X6, 800(AX)
+ MOVOU X7, 928(AX)
+ MOVOU 48(AX), X0
+ MOVOU 176(AX), X1
+ MOVOU 304(AX), X2
+ MOVOU 432(AX), X3
+ MOVOU 560(AX), X4
+ MOVOU 688(AX), X5
+ MOVOU 816(AX), X6
+ MOVOU 944(AX), X7
+ MOVO X0, X8
+ PMULULQ X2, X8
+ PADDQ X2, X0
+ PADDQ X8, X0
+ PADDQ X8, X0
+ PXOR X0, X6
+ PSHUFD $0xb1, X6, X6
+ MOVO X4, X8
+ PMULULQ X6, X8
+ PADDQ X6, X4
+ PADDQ X8, X4
+ PADDQ X8, X4
+ PXOR X4, X2
+ PSHUFB X10, X2
+ MOVO X0, X8
+ PMULULQ X2, X8
+ PADDQ X2, X0
+ PADDQ X8, X0
+ PADDQ X8, X0
+ PXOR X0, X6
+ PSHUFB X11, X6
+ MOVO X4, X8
+ PMULULQ X6, X8
+ PADDQ X6, X4
+ PADDQ X8, X4
+ PADDQ X8, X4
+ PXOR X4, X2
+ MOVO X2, X8
+ PADDQ X2, X8
+ PSRLQ $0x3f, X2
+ PXOR X8, X2
+ MOVO X1, X8
+ PMULULQ X3, X8
+ PADDQ X3, X1
+ PADDQ X8, X1
+ PADDQ X8, X1
+ PXOR X1, X7
+ PSHUFD $0xb1, X7, X7
+ MOVO X5, X8
+ PMULULQ X7, X8
+ PADDQ X7, X5
+ PADDQ X8, X5
+ PADDQ X8, X5
+ PXOR X5, X3
+ PSHUFB X10, X3
+ MOVO X1, X8
+ PMULULQ X3, X8
+ PADDQ X3, X1
+ PADDQ X8, X1
+ PADDQ X8, X1
+ PXOR X1, X7
+ PSHUFB X11, X7
+ MOVO X5, X8
+ PMULULQ X7, X8
+ PADDQ X7, X5
+ PADDQ X8, X5
+ PADDQ X8, X5
+ PXOR X5, X3
+ MOVO X3, X8
+ PADDQ X3, X8
+ PSRLQ $0x3f, X3
+ PXOR X8, X3
+ MOVO X4, X8
+ MOVO X5, X4
+ MOVO X8, X5
+ MOVO X6, X8
+ PUNPCKLQDQ X6, X9
+ PUNPCKHQDQ X7, X6
+ PUNPCKHQDQ X9, X6
+ PUNPCKLQDQ X7, X9
+ MOVO X8, X7
+ MOVO X2, X8
+ PUNPCKHQDQ X9, X7
+ PUNPCKLQDQ X3, X9
+ PUNPCKHQDQ X9, X2
+ PUNPCKLQDQ X8, X9
+ PUNPCKHQDQ X9, X3
+ MOVO X0, X8
+ PMULULQ X2, X8
+ PADDQ X2, X0
+ PADDQ X8, X0
+ PADDQ X8, X0
+ PXOR X0, X6
+ PSHUFD $0xb1, X6, X6
+ MOVO X4, X8
+ PMULULQ X6, X8
+ PADDQ X6, X4
+ PADDQ X8, X4
+ PADDQ X8, X4
+ PXOR X4, X2
+ PSHUFB X10, X2
+ MOVO X0, X8
+ PMULULQ X2, X8
+ PADDQ X2, X0
+ PADDQ X8, X0
+ PADDQ X8, X0
+ PXOR X0, X6
+ PSHUFB X11, X6
+ MOVO X4, X8
+ PMULULQ X6, X8
+ PADDQ X6, X4
+ PADDQ X8, X4
+ PADDQ X8, X4
+ PXOR X4, X2
+ MOVO X2, X8
+ PADDQ X2, X8
+ PSRLQ $0x3f, X2
+ PXOR X8, X2
+ MOVO X1, X8
+ PMULULQ X3, X8
+ PADDQ X3, X1
+ PADDQ X8, X1
+ PADDQ X8, X1
+ PXOR X1, X7
+ PSHUFD $0xb1, X7, X7
+ MOVO X5, X8
+ PMULULQ X7, X8
+ PADDQ X7, X5
+ PADDQ X8, X5
+ PADDQ X8, X5
+ PXOR X5, X3
+ PSHUFB X10, X3
+ MOVO X1, X8
+ PMULULQ X3, X8
+ PADDQ X3, X1
+ PADDQ X8, X1
+ PADDQ X8, X1
+ PXOR X1, X7
+ PSHUFB X11, X7
+ MOVO X5, X8
+ PMULULQ X7, X8
+ PADDQ X7, X5
+ PADDQ X8, X5
+ PADDQ X8, X5
+ PXOR X5, X3
+ MOVO X3, X8
+ PADDQ X3, X8
+ PSRLQ $0x3f, X3
+ PXOR X8, X3
+ MOVO X4, X8
+ MOVO X5, X4
+ MOVO X8, X5
+ MOVO X2, X8
+ PUNPCKLQDQ X2, X9
+ PUNPCKHQDQ X3, X2
+ PUNPCKHQDQ X9, X2
+ PUNPCKLQDQ X3, X9
+ MOVO X8, X3
+ MOVO X6, X8
+ PUNPCKHQDQ X9, X3
+ PUNPCKLQDQ X7, X9
+ PUNPCKHQDQ X9, X6
+ PUNPCKLQDQ X8, X9
+ PUNPCKHQDQ X9, X7
+ MOVOU X0, 48(AX)
+ MOVOU X1, 176(AX)
+ MOVOU X2, 304(AX)
+ MOVOU X3, 432(AX)
+ MOVOU X4, 560(AX)
+ MOVOU X5, 688(AX)
+ MOVOU X6, 816(AX)
+ MOVOU X7, 944(AX)
+ MOVOU 64(AX), X0
+ MOVOU 192(AX), X1
+ MOVOU 320(AX), X2
+ MOVOU 448(AX), X3
+ MOVOU 576(AX), X4
+ MOVOU 704(AX), X5
+ MOVOU 832(AX), X6
+ MOVOU 960(AX), X7
+ MOVO X0, X8
+ PMULULQ X2, X8
+ PADDQ X2, X0
+ PADDQ X8, X0
+ PADDQ X8, X0
+ PXOR X0, X6
+ PSHUFD $0xb1, X6, X6
+ MOVO X4, X8
+ PMULULQ X6, X8
+ PADDQ X6, X4
+ PADDQ X8, X4
+ PADDQ X8, X4
+ PXOR X4, X2
+ PSHUFB X10, X2
+ MOVO X0, X8
+ PMULULQ X2, X8
+ PADDQ X2, X0
+ PADDQ X8, X0
+ PADDQ X8, X0
+ PXOR X0, X6
+ PSHUFB X11, X6
+ MOVO X4, X8
+ PMULULQ X6, X8
+ PADDQ X6, X4
+ PADDQ X8, X4
+ PADDQ X8, X4
+ PXOR X4, X2
+ MOVO X2, X8
+ PADDQ X2, X8
+ PSRLQ $0x3f, X2
+ PXOR X8, X2
+ MOVO X1, X8
+ PMULULQ X3, X8
+ PADDQ X3, X1
+ PADDQ X8, X1
+ PADDQ X8, X1
+ PXOR X1, X7
+ PSHUFD $0xb1, X7, X7
+ MOVO X5, X8
+ PMULULQ X7, X8
+ PADDQ X7, X5
+ PADDQ X8, X5
+ PADDQ X8, X5
+ PXOR X5, X3
+ PSHUFB X10, X3
+ MOVO X1, X8
+ PMULULQ X3, X8
+ PADDQ X3, X1
+ PADDQ X8, X1
+ PADDQ X8, X1
+ PXOR X1, X7
+ PSHUFB X11, X7
+ MOVO X5, X8
+ PMULULQ X7, X8
+ PADDQ X7, X5
+ PADDQ X8, X5
+ PADDQ X8, X5
+ PXOR X5, X3
+ MOVO X3, X8
+ PADDQ X3, X8
+ PSRLQ $0x3f, X3
+ PXOR X8, X3
+ MOVO X4, X8
+ MOVO X5, X4
+ MOVO X8, X5
+ MOVO X6, X8
+ PUNPCKLQDQ X6, X9
+ PUNPCKHQDQ X7, X6
+ PUNPCKHQDQ X9, X6
+ PUNPCKLQDQ X7, X9
+ MOVO X8, X7
+ MOVO X2, X8
+ PUNPCKHQDQ X9, X7
+ PUNPCKLQDQ X3, X9
+ PUNPCKHQDQ X9, X2
+ PUNPCKLQDQ X8, X9
+ PUNPCKHQDQ X9, X3
+ MOVO X0, X8
+ PMULULQ X2, X8
+ PADDQ X2, X0
+ PADDQ X8, X0
+ PADDQ X8, X0
+ PXOR X0, X6
+ PSHUFD $0xb1, X6, X6
+ MOVO X4, X8
+ PMULULQ X6, X8
+ PADDQ X6, X4
+ PADDQ X8, X4
+ PADDQ X8, X4
+ PXOR X4, X2
+ PSHUFB X10, X2
+ MOVO X0, X8
+ PMULULQ X2, X8
+ PADDQ X2, X0
+ PADDQ X8, X0
+ PADDQ X8, X0
+ PXOR X0, X6
+ PSHUFB X11, X6
+ MOVO X4, X8
+ PMULULQ X6, X8
+ PADDQ X6, X4
+ PADDQ X8, X4
+ PADDQ X8, X4
+ PXOR X4, X2
+ MOVO X2, X8
+ PADDQ X2, X8
+ PSRLQ $0x3f, X2
+ PXOR X8, X2
+ MOVO X1, X8
+ PMULULQ X3, X8
+ PADDQ X3, X1
+ PADDQ X8, X1
+ PADDQ X8, X1
+ PXOR X1, X7
+ PSHUFD $0xb1, X7, X7
+ MOVO X5, X8
+ PMULULQ X7, X8
+ PADDQ X7, X5
+ PADDQ X8, X5
+ PADDQ X8, X5
+ PXOR X5, X3
+ PSHUFB X10, X3
+ MOVO X1, X8
+ PMULULQ X3, X8
+ PADDQ X3, X1
+ PADDQ X8, X1
+ PADDQ X8, X1
+ PXOR X1, X7
+ PSHUFB X11, X7
+ MOVO X5, X8
+ PMULULQ X7, X8
+ PADDQ X7, X5
+ PADDQ X8, X5
+ PADDQ X8, X5
+ PXOR X5, X3
+ MOVO X3, X8
+ PADDQ X3, X8
+ PSRLQ $0x3f, X3
+ PXOR X8, X3
+ MOVO X4, X8
+ MOVO X5, X4
+ MOVO X8, X5
+ MOVO X2, X8
+ PUNPCKLQDQ X2, X9
+ PUNPCKHQDQ X3, X2
+ PUNPCKHQDQ X9, X2
+ PUNPCKLQDQ X3, X9
+ MOVO X8, X3
+ MOVO X6, X8
+ PUNPCKHQDQ X9, X3
+ PUNPCKLQDQ X7, X9
+ PUNPCKHQDQ X9, X6
+ PUNPCKLQDQ X8, X9
+ PUNPCKHQDQ X9, X7
+ MOVOU X0, 64(AX)
+ MOVOU X1, 192(AX)
+ MOVOU X2, 320(AX)
+ MOVOU X3, 448(AX)
+ MOVOU X4, 576(AX)
+ MOVOU X5, 704(AX)
+ MOVOU X6, 832(AX)
+ MOVOU X7, 960(AX)
+ MOVOU 80(AX), X0
+ MOVOU 208(AX), X1
+ MOVOU 336(AX), X2
+ MOVOU 464(AX), X3
+ MOVOU 592(AX), X4
+ MOVOU 720(AX), X5
+ MOVOU 848(AX), X6
+ MOVOU 976(AX), X7
+ MOVO X0, X8
+ PMULULQ X2, X8
+ PADDQ X2, X0
+ PADDQ X8, X0
+ PADDQ X8, X0
+ PXOR X0, X6
+ PSHUFD $0xb1, X6, X6
+ MOVO X4, X8
+ PMULULQ X6, X8
+ PADDQ X6, X4
+ PADDQ X8, X4
+ PADDQ X8, X4
+ PXOR X4, X2
+ PSHUFB X10, X2
+ MOVO X0, X8
+ PMULULQ X2, X8
+ PADDQ X2, X0
+ PADDQ X8, X0
+ PADDQ X8, X0
+ PXOR X0, X6
+ PSHUFB X11, X6
+ MOVO X4, X8
+ PMULULQ X6, X8
+ PADDQ X6, X4
+ PADDQ X8, X4
+ PADDQ X8, X4
+ PXOR X4, X2
+ MOVO X2, X8
+ PADDQ X2, X8
+ PSRLQ $0x3f, X2
+ PXOR X8, X2
+ MOVO X1, X8
+ PMULULQ X3, X8
+ PADDQ X3, X1
+ PADDQ X8, X1
+ PADDQ X8, X1
+ PXOR X1, X7
+ PSHUFD $0xb1, X7, X7
+ MOVO X5, X8
+ PMULULQ X7, X8
+ PADDQ X7, X5
+ PADDQ X8, X5
+ PADDQ X8, X5
+ PXOR X5, X3
+ PSHUFB X10, X3
+ MOVO X1, X8
+ PMULULQ X3, X8
+ PADDQ X3, X1
+ PADDQ X8, X1
+ PADDQ X8, X1
+ PXOR X1, X7
+ PSHUFB X11, X7
+ MOVO X5, X8
+ PMULULQ X7, X8
+ PADDQ X7, X5
+ PADDQ X8, X5
+ PADDQ X8, X5
+ PXOR X5, X3
+ MOVO X3, X8
+ PADDQ X3, X8
+ PSRLQ $0x3f, X3
+ PXOR X8, X3
+ MOVO X4, X8
+ MOVO X5, X4
+ MOVO X8, X5
+ MOVO X6, X8
+ PUNPCKLQDQ X6, X9
+ PUNPCKHQDQ X7, X6
+ PUNPCKHQDQ X9, X6
+ PUNPCKLQDQ X7, X9
+ MOVO X8, X7
+ MOVO X2, X8
+ PUNPCKHQDQ X9, X7
+ PUNPCKLQDQ X3, X9
+ PUNPCKHQDQ X9, X2
+ PUNPCKLQDQ X8, X9
+ PUNPCKHQDQ X9, X3
+ MOVO X0, X8
+ PMULULQ X2, X8
+ PADDQ X2, X0
+ PADDQ X8, X0
+ PADDQ X8, X0
+ PXOR X0, X6
+ PSHUFD $0xb1, X6, X6
+ MOVO X4, X8
+ PMULULQ X6, X8
+ PADDQ X6, X4
+ PADDQ X8, X4
+ PADDQ X8, X4
+ PXOR X4, X2
+ PSHUFB X10, X2
+ MOVO X0, X8
+ PMULULQ X2, X8
+ PADDQ X2, X0
+ PADDQ X8, X0
+ PADDQ X8, X0
+ PXOR X0, X6
+ PSHUFB X11, X6
+ MOVO X4, X8
+ PMULULQ X6, X8
+ PADDQ X6, X4
+ PADDQ X8, X4
+ PADDQ X8, X4
+ PXOR X4, X2
+ MOVO X2, X8
+ PADDQ X2, X8
+ PSRLQ $0x3f, X2
+ PXOR X8, X2
+ MOVO X1, X8
+ PMULULQ X3, X8
+ PADDQ X3, X1
+ PADDQ X8, X1
+ PADDQ X8, X1
+ PXOR X1, X7
+ PSHUFD $0xb1, X7, X7
+ MOVO X5, X8
+ PMULULQ X7, X8
+ PADDQ X7, X5
+ PADDQ X8, X5
+ PADDQ X8, X5
+ PXOR X5, X3
+ PSHUFB X10, X3
+ MOVO X1, X8
+ PMULULQ X3, X8
+ PADDQ X3, X1
+ PADDQ X8, X1
+ PADDQ X8, X1
+ PXOR X1, X7
+ PSHUFB X11, X7
+ MOVO X5, X8
+ PMULULQ X7, X8
+ PADDQ X7, X5
+ PADDQ X8, X5
+ PADDQ X8, X5
+ PXOR X5, X3
+ MOVO X3, X8
+ PADDQ X3, X8
+ PSRLQ $0x3f, X3
+ PXOR X8, X3
+ MOVO X4, X8
+ MOVO X5, X4
+ MOVO X8, X5
+ MOVO X2, X8
+ PUNPCKLQDQ X2, X9
+ PUNPCKHQDQ X3, X2
+ PUNPCKHQDQ X9, X2
+ PUNPCKLQDQ X3, X9
+ MOVO X8, X3
+ MOVO X6, X8
+ PUNPCKHQDQ X9, X3
+ PUNPCKLQDQ X7, X9
+ PUNPCKHQDQ X9, X6
+ PUNPCKLQDQ X8, X9
+ PUNPCKHQDQ X9, X7
+ MOVOU X0, 80(AX)
+ MOVOU X1, 208(AX)
+ MOVOU X2, 336(AX)
+ MOVOU X3, 464(AX)
+ MOVOU X4, 592(AX)
+ MOVOU X5, 720(AX)
+ MOVOU X6, 848(AX)
+ MOVOU X7, 976(AX)
+ MOVOU 96(AX), X0
+ MOVOU 224(AX), X1
+ MOVOU 352(AX), X2
+ MOVOU 480(AX), X3
+ MOVOU 608(AX), X4
+ MOVOU 736(AX), X5
+ MOVOU 864(AX), X6
+ MOVOU 992(AX), X7
+ MOVO X0, X8
+ PMULULQ X2, X8
+ PADDQ X2, X0
+ PADDQ X8, X0
+ PADDQ X8, X0
+ PXOR X0, X6
+ PSHUFD $0xb1, X6, X6
+ MOVO X4, X8
+ PMULULQ X6, X8
+ PADDQ X6, X4
+ PADDQ X8, X4
+ PADDQ X8, X4
+ PXOR X4, X2
+ PSHUFB X10, X2
+ MOVO X0, X8
+ PMULULQ X2, X8
+ PADDQ X2, X0
+ PADDQ X8, X0
+ PADDQ X8, X0
+ PXOR X0, X6
+ PSHUFB X11, X6
+ MOVO X4, X8
+ PMULULQ X6, X8
+ PADDQ X6, X4
+ PADDQ X8, X4
+ PADDQ X8, X4
+ PXOR X4, X2
+ MOVO X2, X8
+ PADDQ X2, X8
+ PSRLQ $0x3f, X2
+ PXOR X8, X2
+ MOVO X1, X8
+ PMULULQ X3, X8
+ PADDQ X3, X1
+ PADDQ X8, X1
+ PADDQ X8, X1
+ PXOR X1, X7
+ PSHUFD $0xb1, X7, X7
+ MOVO X5, X8
+ PMULULQ X7, X8
+ PADDQ X7, X5
+ PADDQ X8, X5
+ PADDQ X8, X5
+ PXOR X5, X3
+ PSHUFB X10, X3
+ MOVO X1, X8
+ PMULULQ X3, X8
+ PADDQ X3, X1
+ PADDQ X8, X1
+ PADDQ X8, X1
+ PXOR X1, X7
+ PSHUFB X11, X7
+ MOVO X5, X8
+ PMULULQ X7, X8
+ PADDQ X7, X5
+ PADDQ X8, X5
+ PADDQ X8, X5
+ PXOR X5, X3
+ MOVO X3, X8
+ PADDQ X3, X8
+ PSRLQ $0x3f, X3
+ PXOR X8, X3
+ MOVO X4, X8
+ MOVO X5, X4
+ MOVO X8, X5
+ MOVO X6, X8
+ PUNPCKLQDQ X6, X9
+ PUNPCKHQDQ X7, X6
+ PUNPCKHQDQ X9, X6
+ PUNPCKLQDQ X7, X9
+ MOVO X8, X7
+ MOVO X2, X8
+ PUNPCKHQDQ X9, X7
+ PUNPCKLQDQ X3, X9
+ PUNPCKHQDQ X9, X2
+ PUNPCKLQDQ X8, X9
+ PUNPCKHQDQ X9, X3
+ MOVO X0, X8
+ PMULULQ X2, X8
+ PADDQ X2, X0
+ PADDQ X8, X0
+ PADDQ X8, X0
+ PXOR X0, X6
+ PSHUFD $0xb1, X6, X6
+ MOVO X4, X8
+ PMULULQ X6, X8
+ PADDQ X6, X4
+ PADDQ X8, X4
+ PADDQ X8, X4
+ PXOR X4, X2
+ PSHUFB X10, X2
+ MOVO X0, X8
+ PMULULQ X2, X8
+ PADDQ X2, X0
+ PADDQ X8, X0
+ PADDQ X8, X0
+ PXOR X0, X6
+ PSHUFB X11, X6
+ MOVO X4, X8
+ PMULULQ X6, X8
+ PADDQ X6, X4
+ PADDQ X8, X4
+ PADDQ X8, X4
+ PXOR X4, X2
+ MOVO X2, X8
+ PADDQ X2, X8
+ PSRLQ $0x3f, X2
+ PXOR X8, X2
+ MOVO X1, X8
+ PMULULQ X3, X8
+ PADDQ X3, X1
+ PADDQ X8, X1
+ PADDQ X8, X1
+ PXOR X1, X7
+ PSHUFD $0xb1, X7, X7
+ MOVO X5, X8
+ PMULULQ X7, X8
+ PADDQ X7, X5
+ PADDQ X8, X5
+ PADDQ X8, X5
+ PXOR X5, X3
+ PSHUFB X10, X3
+ MOVO X1, X8
+ PMULULQ X3, X8
+ PADDQ X3, X1
+ PADDQ X8, X1
+ PADDQ X8, X1
+ PXOR X1, X7
+ PSHUFB X11, X7
+ MOVO X5, X8
+ PMULULQ X7, X8
+ PADDQ X7, X5
+ PADDQ X8, X5
+ PADDQ X8, X5
+ PXOR X5, X3
+ MOVO X3, X8
+ PADDQ X3, X8
+ PSRLQ $0x3f, X3
+ PXOR X8, X3
+ MOVO X4, X8
+ MOVO X5, X4
+ MOVO X8, X5
+ MOVO X2, X8
+ PUNPCKLQDQ X2, X9
+ PUNPCKHQDQ X3, X2
+ PUNPCKHQDQ X9, X2
+ PUNPCKLQDQ X3, X9
+ MOVO X8, X3
+ MOVO X6, X8
+ PUNPCKHQDQ X9, X3
+ PUNPCKLQDQ X7, X9
+ PUNPCKHQDQ X9, X6
+ PUNPCKLQDQ X8, X9
+ PUNPCKHQDQ X9, X7
+ MOVOU X0, 96(AX)
+ MOVOU X1, 224(AX)
+ MOVOU X2, 352(AX)
+ MOVOU X3, 480(AX)
+ MOVOU X4, 608(AX)
+ MOVOU X5, 736(AX)
+ MOVOU X6, 864(AX)
+ MOVOU X7, 992(AX)
+ MOVOU 112(AX), X0
+ MOVOU 240(AX), X1
+ MOVOU 368(AX), X2
+ MOVOU 496(AX), X3
+ MOVOU 624(AX), X4
+ MOVOU 752(AX), X5
+ MOVOU 880(AX), X6
+ MOVOU 1008(AX), X7
+ MOVO X0, X8
+ PMULULQ X2, X8
+ PADDQ X2, X0
+ PADDQ X8, X0
+ PADDQ X8, X0
+ PXOR X0, X6
+ PSHUFD $0xb1, X6, X6
+ MOVO X4, X8
+ PMULULQ X6, X8
+ PADDQ X6, X4
+ PADDQ X8, X4
+ PADDQ X8, X4
+ PXOR X4, X2
+ PSHUFB X10, X2
+ MOVO X0, X8
+ PMULULQ X2, X8
+ PADDQ X2, X0
+ PADDQ X8, X0
+ PADDQ X8, X0
+ PXOR X0, X6
+ PSHUFB X11, X6
+ MOVO X4, X8
+ PMULULQ X6, X8
+ PADDQ X6, X4
+ PADDQ X8, X4
+ PADDQ X8, X4
+ PXOR X4, X2
+ MOVO X2, X8
+ PADDQ X2, X8
+ PSRLQ $0x3f, X2
+ PXOR X8, X2
+ MOVO X1, X8
+ PMULULQ X3, X8
+ PADDQ X3, X1
+ PADDQ X8, X1
+ PADDQ X8, X1
+ PXOR X1, X7
+ PSHUFD $0xb1, X7, X7
+ MOVO X5, X8
+ PMULULQ X7, X8
+ PADDQ X7, X5
+ PADDQ X8, X5
+ PADDQ X8, X5
+ PXOR X5, X3
+ PSHUFB X10, X3
+ MOVO X1, X8
+ PMULULQ X3, X8
+ PADDQ X3, X1
+ PADDQ X8, X1
+ PADDQ X8, X1
+ PXOR X1, X7
+ PSHUFB X11, X7
+ MOVO X5, X8
+ PMULULQ X7, X8
+ PADDQ X7, X5
+ PADDQ X8, X5
+ PADDQ X8, X5
+ PXOR X5, X3
+ MOVO X3, X8
+ PADDQ X3, X8
+ PSRLQ $0x3f, X3
+ PXOR X8, X3
+ MOVO X4, X8
+ MOVO X5, X4
+ MOVO X8, X5
+ MOVO X6, X8
+ PUNPCKLQDQ X6, X9
+ PUNPCKHQDQ X7, X6
+ PUNPCKHQDQ X9, X6
+ PUNPCKLQDQ X7, X9
+ MOVO X8, X7
+ MOVO X2, X8
+ PUNPCKHQDQ X9, X7
+ PUNPCKLQDQ X3, X9
+ PUNPCKHQDQ X9, X2
+ PUNPCKLQDQ X8, X9
+ PUNPCKHQDQ X9, X3
+ MOVO X0, X8
+ PMULULQ X2, X8
+ PADDQ X2, X0
+ PADDQ X8, X0
+ PADDQ X8, X0
+ PXOR X0, X6
+ PSHUFD $0xb1, X6, X6
+ MOVO X4, X8
+ PMULULQ X6, X8
+ PADDQ X6, X4
+ PADDQ X8, X4
+ PADDQ X8, X4
+ PXOR X4, X2
+ PSHUFB X10, X2
+ MOVO X0, X8
+ PMULULQ X2, X8
+ PADDQ X2, X0
+ PADDQ X8, X0
+ PADDQ X8, X0
+ PXOR X0, X6
+ PSHUFB X11, X6
+ MOVO X4, X8
+ PMULULQ X6, X8
+ PADDQ X6, X4
+ PADDQ X8, X4
+ PADDQ X8, X4
+ PXOR X4, X2
+ MOVO X2, X8
+ PADDQ X2, X8
+ PSRLQ $0x3f, X2
+ PXOR X8, X2
+ MOVO X1, X8
+ PMULULQ X3, X8
+ PADDQ X3, X1
+ PADDQ X8, X1
+ PADDQ X8, X1
+ PXOR X1, X7
+ PSHUFD $0xb1, X7, X7
+ MOVO X5, X8
+ PMULULQ X7, X8
+ PADDQ X7, X5
+ PADDQ X8, X5
+ PADDQ X8, X5
+ PXOR X5, X3
+ PSHUFB X10, X3
+ MOVO X1, X8
+ PMULULQ X3, X8
+ PADDQ X3, X1
+ PADDQ X8, X1
+ PADDQ X8, X1
+ PXOR X1, X7
+ PSHUFB X11, X7
+ MOVO X5, X8
+ PMULULQ X7, X8
+ PADDQ X7, X5
+ PADDQ X8, X5
+ PADDQ X8, X5
+ PXOR X5, X3
+ MOVO X3, X8
+ PADDQ X3, X8
+ PSRLQ $0x3f, X3
+ PXOR X8, X3
+ MOVO X4, X8
+ MOVO X5, X4
+ MOVO X8, X5
+ MOVO X2, X8
+ PUNPCKLQDQ X2, X9
+ PUNPCKHQDQ X3, X2
+ PUNPCKHQDQ X9, X2
+ PUNPCKLQDQ X3, X9
+ MOVO X8, X3
+ MOVO X6, X8
+ PUNPCKHQDQ X9, X3
+ PUNPCKLQDQ X7, X9
+ PUNPCKHQDQ X9, X6
+ PUNPCKLQDQ X8, X9
+ PUNPCKHQDQ X9, X7
+ MOVOU X0, 112(AX)
+ MOVOU X1, 240(AX)
+ MOVOU X2, 368(AX)
+ MOVOU X3, 496(AX)
+ MOVOU X4, 624(AX)
+ MOVOU X5, 752(AX)
+ MOVOU X6, 880(AX)
+ MOVOU X7, 1008(AX)
+ RET
- BLAMKA_ROUND_0(AX, 0, X8, X9, X10, X11)
- BLAMKA_ROUND_0(AX, 16, X8, X9, X10, X11)
- BLAMKA_ROUND_0(AX, 32, X8, X9, X10, X11)
- BLAMKA_ROUND_0(AX, 48, X8, X9, X10, X11)
- BLAMKA_ROUND_0(AX, 64, X8, X9, X10, X11)
- BLAMKA_ROUND_0(AX, 80, X8, X9, X10, X11)
- BLAMKA_ROUND_0(AX, 96, X8, X9, X10, X11)
- BLAMKA_ROUND_0(AX, 112, X8, X9, X10, X11)
+DATA ·c40<>+0(SB)/8, $0x0201000706050403
+DATA ·c40<>+8(SB)/8, $0x0a09080f0e0d0c0b
+GLOBL ·c40<>(SB), RODATA|NOPTR, $16
- BLAMKA_ROUND_1(AX, 0, X8, X9, X10, X11)
- BLAMKA_ROUND_1(AX, 2, X8, X9, X10, X11)
- BLAMKA_ROUND_1(AX, 4, X8, X9, X10, X11)
- BLAMKA_ROUND_1(AX, 6, X8, X9, X10, X11)
- BLAMKA_ROUND_1(AX, 8, X8, X9, X10, X11)
- BLAMKA_ROUND_1(AX, 10, X8, X9, X10, X11)
- BLAMKA_ROUND_1(AX, 12, X8, X9, X10, X11)
- BLAMKA_ROUND_1(AX, 14, X8, X9, X10, X11)
- RET
+DATA ·c48<>+0(SB)/8, $0x0100070605040302
+DATA ·c48<>+8(SB)/8, $0x09080f0e0d0c0b0a
+GLOBL ·c48<>(SB), RODATA|NOPTR, $16
-// func mixBlocksSSE2(out, a, b, c *block)
-TEXT ·mixBlocksSSE2(SB), 4, $0-32
+// func mixBlocksSSE2(out *block, a *block, b *block, c *block)
+// Requires: SSE2
+TEXT ·mixBlocksSSE2(SB), NOSPLIT, $0-32
MOVQ out+0(FP), DX
MOVQ a+8(FP), AX
MOVQ b+16(FP), BX
MOVQ c+24(FP), CX
- MOVQ $128, DI
+ MOVQ $0x00000080, DI
loop:
- MOVOU 0(AX), X0
- MOVOU 0(BX), X1
- MOVOU 0(CX), X2
+ MOVOU (AX), X0
+ MOVOU (BX), X1
+ MOVOU (CX), X2
PXOR X1, X0
PXOR X2, X0
- MOVOU X0, 0(DX)
- ADDQ $16, AX
- ADDQ $16, BX
- ADDQ $16, CX
- ADDQ $16, DX
- SUBQ $2, DI
+ MOVOU X0, (DX)
+ ADDQ $0x10, AX
+ ADDQ $0x10, BX
+ ADDQ $0x10, CX
+ ADDQ $0x10, DX
+ SUBQ $0x02, DI
JA loop
RET
-// func xorBlocksSSE2(out, a, b, c *block)
-TEXT ·xorBlocksSSE2(SB), 4, $0-32
+// func xorBlocksSSE2(out *block, a *block, b *block, c *block)
+// Requires: SSE2
+TEXT ·xorBlocksSSE2(SB), NOSPLIT, $0-32
MOVQ out+0(FP), DX
MOVQ a+8(FP), AX
MOVQ b+16(FP), BX
MOVQ c+24(FP), CX
- MOVQ $128, DI
+ MOVQ $0x00000080, DI
loop:
- MOVOU 0(AX), X0
- MOVOU 0(BX), X1
- MOVOU 0(CX), X2
- MOVOU 0(DX), X3
+ MOVOU (AX), X0
+ MOVOU (BX), X1
+ MOVOU (CX), X2
+ MOVOU (DX), X3
PXOR X1, X0
PXOR X2, X0
PXOR X3, X0
- MOVOU X0, 0(DX)
- ADDQ $16, AX
- ADDQ $16, BX
- ADDQ $16, CX
- ADDQ $16, DX
- SUBQ $2, DI
+ MOVOU X0, (DX)
+ ADDQ $0x10, AX
+ ADDQ $0x10, BX
+ ADDQ $0x10, CX
+ ADDQ $0x10, DX
+ SUBQ $0x02, DI
JA loop
RET
diff --git a/vendor/golang.org/x/crypto/blake2b/blake2bAVX2_amd64.s b/vendor/golang.org/x/crypto/blake2b/blake2bAVX2_amd64.s
index 9ae8206c20..f75162e039 100644
--- a/vendor/golang.org/x/crypto/blake2b/blake2bAVX2_amd64.s
+++ b/vendor/golang.org/x/crypto/blake2b/blake2bAVX2_amd64.s
@@ -1,722 +1,4517 @@
-// Copyright 2016 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
+// Code generated by command: go run blake2bAVX2_amd64_asm.go -out ../../blake2bAVX2_amd64.s -pkg blake2b. DO NOT EDIT.
//go:build amd64 && gc && !purego
#include "textflag.h"
-DATA ·AVX2_iv0<>+0x00(SB)/8, $0x6a09e667f3bcc908
-DATA ·AVX2_iv0<>+0x08(SB)/8, $0xbb67ae8584caa73b
-DATA ·AVX2_iv0<>+0x10(SB)/8, $0x3c6ef372fe94f82b
-DATA ·AVX2_iv0<>+0x18(SB)/8, $0xa54ff53a5f1d36f1
-GLOBL ·AVX2_iv0<>(SB), (NOPTR+RODATA), $32
-
-DATA ·AVX2_iv1<>+0x00(SB)/8, $0x510e527fade682d1
-DATA ·AVX2_iv1<>+0x08(SB)/8, $0x9b05688c2b3e6c1f
-DATA ·AVX2_iv1<>+0x10(SB)/8, $0x1f83d9abfb41bd6b
-DATA ·AVX2_iv1<>+0x18(SB)/8, $0x5be0cd19137e2179
-GLOBL ·AVX2_iv1<>(SB), (NOPTR+RODATA), $32
-
-DATA ·AVX2_c40<>+0x00(SB)/8, $0x0201000706050403
-DATA ·AVX2_c40<>+0x08(SB)/8, $0x0a09080f0e0d0c0b
-DATA ·AVX2_c40<>+0x10(SB)/8, $0x0201000706050403
-DATA ·AVX2_c40<>+0x18(SB)/8, $0x0a09080f0e0d0c0b
-GLOBL ·AVX2_c40<>(SB), (NOPTR+RODATA), $32
-
-DATA ·AVX2_c48<>+0x00(SB)/8, $0x0100070605040302
-DATA ·AVX2_c48<>+0x08(SB)/8, $0x09080f0e0d0c0b0a
-DATA ·AVX2_c48<>+0x10(SB)/8, $0x0100070605040302
-DATA ·AVX2_c48<>+0x18(SB)/8, $0x09080f0e0d0c0b0a
-GLOBL ·AVX2_c48<>(SB), (NOPTR+RODATA), $32
-
-DATA ·AVX_iv0<>+0x00(SB)/8, $0x6a09e667f3bcc908
-DATA ·AVX_iv0<>+0x08(SB)/8, $0xbb67ae8584caa73b
-GLOBL ·AVX_iv0<>(SB), (NOPTR+RODATA), $16
-
-DATA ·AVX_iv1<>+0x00(SB)/8, $0x3c6ef372fe94f82b
-DATA ·AVX_iv1<>+0x08(SB)/8, $0xa54ff53a5f1d36f1
-GLOBL ·AVX_iv1<>(SB), (NOPTR+RODATA), $16
-
-DATA ·AVX_iv2<>+0x00(SB)/8, $0x510e527fade682d1
-DATA ·AVX_iv2<>+0x08(SB)/8, $0x9b05688c2b3e6c1f
-GLOBL ·AVX_iv2<>(SB), (NOPTR+RODATA), $16
-
-DATA ·AVX_iv3<>+0x00(SB)/8, $0x1f83d9abfb41bd6b
-DATA ·AVX_iv3<>+0x08(SB)/8, $0x5be0cd19137e2179
-GLOBL ·AVX_iv3<>(SB), (NOPTR+RODATA), $16
-
-DATA ·AVX_c40<>+0x00(SB)/8, $0x0201000706050403
-DATA ·AVX_c40<>+0x08(SB)/8, $0x0a09080f0e0d0c0b
-GLOBL ·AVX_c40<>(SB), (NOPTR+RODATA), $16
-
-DATA ·AVX_c48<>+0x00(SB)/8, $0x0100070605040302
-DATA ·AVX_c48<>+0x08(SB)/8, $0x09080f0e0d0c0b0a
-GLOBL ·AVX_c48<>(SB), (NOPTR+RODATA), $16
-
-#define VPERMQ_0x39_Y1_Y1 BYTE $0xc4; BYTE $0xe3; BYTE $0xfd; BYTE $0x00; BYTE $0xc9; BYTE $0x39
-#define VPERMQ_0x93_Y1_Y1 BYTE $0xc4; BYTE $0xe3; BYTE $0xfd; BYTE $0x00; BYTE $0xc9; BYTE $0x93
-#define VPERMQ_0x4E_Y2_Y2 BYTE $0xc4; BYTE $0xe3; BYTE $0xfd; BYTE $0x00; BYTE $0xd2; BYTE $0x4e
-#define VPERMQ_0x93_Y3_Y3 BYTE $0xc4; BYTE $0xe3; BYTE $0xfd; BYTE $0x00; BYTE $0xdb; BYTE $0x93
-#define VPERMQ_0x39_Y3_Y3 BYTE $0xc4; BYTE $0xe3; BYTE $0xfd; BYTE $0x00; BYTE $0xdb; BYTE $0x39
-
-#define ROUND_AVX2(m0, m1, m2, m3, t, c40, c48) \
- VPADDQ m0, Y0, Y0; \
- VPADDQ Y1, Y0, Y0; \
- VPXOR Y0, Y3, Y3; \
- VPSHUFD $-79, Y3, Y3; \
- VPADDQ Y3, Y2, Y2; \
- VPXOR Y2, Y1, Y1; \
- VPSHUFB c40, Y1, Y1; \
- VPADDQ m1, Y0, Y0; \
- VPADDQ Y1, Y0, Y0; \
- VPXOR Y0, Y3, Y3; \
- VPSHUFB c48, Y3, Y3; \
- VPADDQ Y3, Y2, Y2; \
- VPXOR Y2, Y1, Y1; \
- VPADDQ Y1, Y1, t; \
- VPSRLQ $63, Y1, Y1; \
- VPXOR t, Y1, Y1; \
- VPERMQ_0x39_Y1_Y1; \
- VPERMQ_0x4E_Y2_Y2; \
- VPERMQ_0x93_Y3_Y3; \
- VPADDQ m2, Y0, Y0; \
- VPADDQ Y1, Y0, Y0; \
- VPXOR Y0, Y3, Y3; \
- VPSHUFD $-79, Y3, Y3; \
- VPADDQ Y3, Y2, Y2; \
- VPXOR Y2, Y1, Y1; \
- VPSHUFB c40, Y1, Y1; \
- VPADDQ m3, Y0, Y0; \
- VPADDQ Y1, Y0, Y0; \
- VPXOR Y0, Y3, Y3; \
- VPSHUFB c48, Y3, Y3; \
- VPADDQ Y3, Y2, Y2; \
- VPXOR Y2, Y1, Y1; \
- VPADDQ Y1, Y1, t; \
- VPSRLQ $63, Y1, Y1; \
- VPXOR t, Y1, Y1; \
- VPERMQ_0x39_Y3_Y3; \
- VPERMQ_0x4E_Y2_Y2; \
- VPERMQ_0x93_Y1_Y1
-
-#define VMOVQ_SI_X11_0 BYTE $0xC5; BYTE $0x7A; BYTE $0x7E; BYTE $0x1E
-#define VMOVQ_SI_X12_0 BYTE $0xC5; BYTE $0x7A; BYTE $0x7E; BYTE $0x26
-#define VMOVQ_SI_X13_0 BYTE $0xC5; BYTE $0x7A; BYTE $0x7E; BYTE $0x2E
-#define VMOVQ_SI_X14_0 BYTE $0xC5; BYTE $0x7A; BYTE $0x7E; BYTE $0x36
-#define VMOVQ_SI_X15_0 BYTE $0xC5; BYTE $0x7A; BYTE $0x7E; BYTE $0x3E
-
-#define VMOVQ_SI_X11(n) BYTE $0xC5; BYTE $0x7A; BYTE $0x7E; BYTE $0x5E; BYTE $n
-#define VMOVQ_SI_X12(n) BYTE $0xC5; BYTE $0x7A; BYTE $0x7E; BYTE $0x66; BYTE $n
-#define VMOVQ_SI_X13(n) BYTE $0xC5; BYTE $0x7A; BYTE $0x7E; BYTE $0x6E; BYTE $n
-#define VMOVQ_SI_X14(n) BYTE $0xC5; BYTE $0x7A; BYTE $0x7E; BYTE $0x76; BYTE $n
-#define VMOVQ_SI_X15(n) BYTE $0xC5; BYTE $0x7A; BYTE $0x7E; BYTE $0x7E; BYTE $n
-
-#define VPINSRQ_1_SI_X11_0 BYTE $0xC4; BYTE $0x63; BYTE $0xA1; BYTE $0x22; BYTE $0x1E; BYTE $0x01
-#define VPINSRQ_1_SI_X12_0 BYTE $0xC4; BYTE $0x63; BYTE $0x99; BYTE $0x22; BYTE $0x26; BYTE $0x01
-#define VPINSRQ_1_SI_X13_0 BYTE $0xC4; BYTE $0x63; BYTE $0x91; BYTE $0x22; BYTE $0x2E; BYTE $0x01
-#define VPINSRQ_1_SI_X14_0 BYTE $0xC4; BYTE $0x63; BYTE $0x89; BYTE $0x22; BYTE $0x36; BYTE $0x01
-#define VPINSRQ_1_SI_X15_0 BYTE $0xC4; BYTE $0x63; BYTE $0x81; BYTE $0x22; BYTE $0x3E; BYTE $0x01
-
-#define VPINSRQ_1_SI_X11(n) BYTE $0xC4; BYTE $0x63; BYTE $0xA1; BYTE $0x22; BYTE $0x5E; BYTE $n; BYTE $0x01
-#define VPINSRQ_1_SI_X12(n) BYTE $0xC4; BYTE $0x63; BYTE $0x99; BYTE $0x22; BYTE $0x66; BYTE $n; BYTE $0x01
-#define VPINSRQ_1_SI_X13(n) BYTE $0xC4; BYTE $0x63; BYTE $0x91; BYTE $0x22; BYTE $0x6E; BYTE $n; BYTE $0x01
-#define VPINSRQ_1_SI_X14(n) BYTE $0xC4; BYTE $0x63; BYTE $0x89; BYTE $0x22; BYTE $0x76; BYTE $n; BYTE $0x01
-#define VPINSRQ_1_SI_X15(n) BYTE $0xC4; BYTE $0x63; BYTE $0x81; BYTE $0x22; BYTE $0x7E; BYTE $n; BYTE $0x01
-
-#define VMOVQ_R8_X15 BYTE $0xC4; BYTE $0x41; BYTE $0xF9; BYTE $0x6E; BYTE $0xF8
-#define VPINSRQ_1_R9_X15 BYTE $0xC4; BYTE $0x43; BYTE $0x81; BYTE $0x22; BYTE $0xF9; BYTE $0x01
-
-// load msg: Y12 = (i0, i1, i2, i3)
-// i0, i1, i2, i3 must not be 0
-#define LOAD_MSG_AVX2_Y12(i0, i1, i2, i3) \
- VMOVQ_SI_X12(i0*8); \
- VMOVQ_SI_X11(i2*8); \
- VPINSRQ_1_SI_X12(i1*8); \
- VPINSRQ_1_SI_X11(i3*8); \
- VINSERTI128 $1, X11, Y12, Y12
-
-// load msg: Y13 = (i0, i1, i2, i3)
-// i0, i1, i2, i3 must not be 0
-#define LOAD_MSG_AVX2_Y13(i0, i1, i2, i3) \
- VMOVQ_SI_X13(i0*8); \
- VMOVQ_SI_X11(i2*8); \
- VPINSRQ_1_SI_X13(i1*8); \
- VPINSRQ_1_SI_X11(i3*8); \
- VINSERTI128 $1, X11, Y13, Y13
-
-// load msg: Y14 = (i0, i1, i2, i3)
-// i0, i1, i2, i3 must not be 0
-#define LOAD_MSG_AVX2_Y14(i0, i1, i2, i3) \
- VMOVQ_SI_X14(i0*8); \
- VMOVQ_SI_X11(i2*8); \
- VPINSRQ_1_SI_X14(i1*8); \
- VPINSRQ_1_SI_X11(i3*8); \
- VINSERTI128 $1, X11, Y14, Y14
-
-// load msg: Y15 = (i0, i1, i2, i3)
-// i0, i1, i2, i3 must not be 0
-#define LOAD_MSG_AVX2_Y15(i0, i1, i2, i3) \
- VMOVQ_SI_X15(i0*8); \
- VMOVQ_SI_X11(i2*8); \
- VPINSRQ_1_SI_X15(i1*8); \
- VPINSRQ_1_SI_X11(i3*8); \
- VINSERTI128 $1, X11, Y15, Y15
-
-#define LOAD_MSG_AVX2_0_2_4_6_1_3_5_7_8_10_12_14_9_11_13_15() \
- VMOVQ_SI_X12_0; \
- VMOVQ_SI_X11(4*8); \
- VPINSRQ_1_SI_X12(2*8); \
- VPINSRQ_1_SI_X11(6*8); \
- VINSERTI128 $1, X11, Y12, Y12; \
- LOAD_MSG_AVX2_Y13(1, 3, 5, 7); \
- LOAD_MSG_AVX2_Y14(8, 10, 12, 14); \
- LOAD_MSG_AVX2_Y15(9, 11, 13, 15)
-
-#define LOAD_MSG_AVX2_14_4_9_13_10_8_15_6_1_0_11_5_12_2_7_3() \
- LOAD_MSG_AVX2_Y12(14, 4, 9, 13); \
- LOAD_MSG_AVX2_Y13(10, 8, 15, 6); \
- VMOVQ_SI_X11(11*8); \
- VPSHUFD $0x4E, 0*8(SI), X14; \
- VPINSRQ_1_SI_X11(5*8); \
- VINSERTI128 $1, X11, Y14, Y14; \
- LOAD_MSG_AVX2_Y15(12, 2, 7, 3)
-
-#define LOAD_MSG_AVX2_11_12_5_15_8_0_2_13_10_3_7_9_14_6_1_4() \
- VMOVQ_SI_X11(5*8); \
- VMOVDQU 11*8(SI), X12; \
- VPINSRQ_1_SI_X11(15*8); \
- VINSERTI128 $1, X11, Y12, Y12; \
- VMOVQ_SI_X13(8*8); \
- VMOVQ_SI_X11(2*8); \
- VPINSRQ_1_SI_X13_0; \
- VPINSRQ_1_SI_X11(13*8); \
- VINSERTI128 $1, X11, Y13, Y13; \
- LOAD_MSG_AVX2_Y14(10, 3, 7, 9); \
- LOAD_MSG_AVX2_Y15(14, 6, 1, 4)
-
-#define LOAD_MSG_AVX2_7_3_13_11_9_1_12_14_2_5_4_15_6_10_0_8() \
- LOAD_MSG_AVX2_Y12(7, 3, 13, 11); \
- LOAD_MSG_AVX2_Y13(9, 1, 12, 14); \
- LOAD_MSG_AVX2_Y14(2, 5, 4, 15); \
- VMOVQ_SI_X15(6*8); \
- VMOVQ_SI_X11_0; \
- VPINSRQ_1_SI_X15(10*8); \
- VPINSRQ_1_SI_X11(8*8); \
- VINSERTI128 $1, X11, Y15, Y15
-
-#define LOAD_MSG_AVX2_9_5_2_10_0_7_4_15_14_11_6_3_1_12_8_13() \
- LOAD_MSG_AVX2_Y12(9, 5, 2, 10); \
- VMOVQ_SI_X13_0; \
- VMOVQ_SI_X11(4*8); \
- VPINSRQ_1_SI_X13(7*8); \
- VPINSRQ_1_SI_X11(15*8); \
- VINSERTI128 $1, X11, Y13, Y13; \
- LOAD_MSG_AVX2_Y14(14, 11, 6, 3); \
- LOAD_MSG_AVX2_Y15(1, 12, 8, 13)
-
-#define LOAD_MSG_AVX2_2_6_0_8_12_10_11_3_4_7_15_1_13_5_14_9() \
- VMOVQ_SI_X12(2*8); \
- VMOVQ_SI_X11_0; \
- VPINSRQ_1_SI_X12(6*8); \
- VPINSRQ_1_SI_X11(8*8); \
- VINSERTI128 $1, X11, Y12, Y12; \
- LOAD_MSG_AVX2_Y13(12, 10, 11, 3); \
- LOAD_MSG_AVX2_Y14(4, 7, 15, 1); \
- LOAD_MSG_AVX2_Y15(13, 5, 14, 9)
-
-#define LOAD_MSG_AVX2_12_1_14_4_5_15_13_10_0_6_9_8_7_3_2_11() \
- LOAD_MSG_AVX2_Y12(12, 1, 14, 4); \
- LOAD_MSG_AVX2_Y13(5, 15, 13, 10); \
- VMOVQ_SI_X14_0; \
- VPSHUFD $0x4E, 8*8(SI), X11; \
- VPINSRQ_1_SI_X14(6*8); \
- VINSERTI128 $1, X11, Y14, Y14; \
- LOAD_MSG_AVX2_Y15(7, 3, 2, 11)
-
-#define LOAD_MSG_AVX2_13_7_12_3_11_14_1_9_5_15_8_2_0_4_6_10() \
- LOAD_MSG_AVX2_Y12(13, 7, 12, 3); \
- LOAD_MSG_AVX2_Y13(11, 14, 1, 9); \
- LOAD_MSG_AVX2_Y14(5, 15, 8, 2); \
- VMOVQ_SI_X15_0; \
- VMOVQ_SI_X11(6*8); \
- VPINSRQ_1_SI_X15(4*8); \
- VPINSRQ_1_SI_X11(10*8); \
- VINSERTI128 $1, X11, Y15, Y15
-
-#define LOAD_MSG_AVX2_6_14_11_0_15_9_3_8_12_13_1_10_2_7_4_5() \
- VMOVQ_SI_X12(6*8); \
- VMOVQ_SI_X11(11*8); \
- VPINSRQ_1_SI_X12(14*8); \
- VPINSRQ_1_SI_X11_0; \
- VINSERTI128 $1, X11, Y12, Y12; \
- LOAD_MSG_AVX2_Y13(15, 9, 3, 8); \
- VMOVQ_SI_X11(1*8); \
- VMOVDQU 12*8(SI), X14; \
- VPINSRQ_1_SI_X11(10*8); \
- VINSERTI128 $1, X11, Y14, Y14; \
- VMOVQ_SI_X15(2*8); \
- VMOVDQU 4*8(SI), X11; \
- VPINSRQ_1_SI_X15(7*8); \
- VINSERTI128 $1, X11, Y15, Y15
-
-#define LOAD_MSG_AVX2_10_8_7_1_2_4_6_5_15_9_3_13_11_14_12_0() \
- LOAD_MSG_AVX2_Y12(10, 8, 7, 1); \
- VMOVQ_SI_X13(2*8); \
- VPSHUFD $0x4E, 5*8(SI), X11; \
- VPINSRQ_1_SI_X13(4*8); \
- VINSERTI128 $1, X11, Y13, Y13; \
- LOAD_MSG_AVX2_Y14(15, 9, 3, 13); \
- VMOVQ_SI_X15(11*8); \
- VMOVQ_SI_X11(12*8); \
- VPINSRQ_1_SI_X15(14*8); \
- VPINSRQ_1_SI_X11_0; \
- VINSERTI128 $1, X11, Y15, Y15
-
// func hashBlocksAVX2(h *[8]uint64, c *[2]uint64, flag uint64, blocks []byte)
-TEXT ·hashBlocksAVX2(SB), 4, $320-48 // frame size = 288 + 32 byte alignment
- MOVQ h+0(FP), AX
- MOVQ c+8(FP), BX
- MOVQ flag+16(FP), CX
- MOVQ blocks_base+24(FP), SI
- MOVQ blocks_len+32(FP), DI
-
- MOVQ SP, DX
- ADDQ $31, DX
- ANDQ $~31, DX
-
- MOVQ CX, 16(DX)
- XORQ CX, CX
- MOVQ CX, 24(DX)
-
- VMOVDQU ·AVX2_c40<>(SB), Y4
- VMOVDQU ·AVX2_c48<>(SB), Y5
-
- VMOVDQU 0(AX), Y8
+// Requires: AVX, AVX2
+TEXT ·hashBlocksAVX2(SB), NOSPLIT, $320-48
+ MOVQ h+0(FP), AX
+ MOVQ c+8(FP), BX
+ MOVQ flag+16(FP), CX
+ MOVQ blocks_base+24(FP), SI
+ MOVQ blocks_len+32(FP), DI
+ MOVQ SP, DX
+ ADDQ $+31, DX
+ ANDQ $-32, DX
+ MOVQ CX, 16(DX)
+ XORQ CX, CX
+ MOVQ CX, 24(DX)
+ VMOVDQU ·AVX2_c40<>+0(SB), Y4
+ VMOVDQU ·AVX2_c48<>+0(SB), Y5
+ VMOVDQU (AX), Y8
VMOVDQU 32(AX), Y9
- VMOVDQU ·AVX2_iv0<>(SB), Y6
- VMOVDQU ·AVX2_iv1<>(SB), Y7
-
- MOVQ 0(BX), R8
- MOVQ 8(BX), R9
- MOVQ R9, 8(DX)
+ VMOVDQU ·AVX2_iv0<>+0(SB), Y6
+ VMOVDQU ·AVX2_iv1<>+0(SB), Y7
+ MOVQ (BX), R8
+ MOVQ 8(BX), R9
+ MOVQ R9, 8(DX)
loop:
- ADDQ $128, R8
- MOVQ R8, 0(DX)
- CMPQ R8, $128
+ ADDQ $0x80, R8
+ MOVQ R8, (DX)
+ CMPQ R8, $0x80
JGE noinc
INCQ R9
MOVQ R9, 8(DX)
noinc:
- VMOVDQA Y8, Y0
- VMOVDQA Y9, Y1
- VMOVDQA Y6, Y2
- VPXOR 0(DX), Y7, Y3
-
- LOAD_MSG_AVX2_0_2_4_6_1_3_5_7_8_10_12_14_9_11_13_15()
- VMOVDQA Y12, 32(DX)
- VMOVDQA Y13, 64(DX)
- VMOVDQA Y14, 96(DX)
- VMOVDQA Y15, 128(DX)
- ROUND_AVX2(Y12, Y13, Y14, Y15, Y10, Y4, Y5)
- LOAD_MSG_AVX2_14_4_9_13_10_8_15_6_1_0_11_5_12_2_7_3()
- VMOVDQA Y12, 160(DX)
- VMOVDQA Y13, 192(DX)
- VMOVDQA Y14, 224(DX)
- VMOVDQA Y15, 256(DX)
-
- ROUND_AVX2(Y12, Y13, Y14, Y15, Y10, Y4, Y5)
- LOAD_MSG_AVX2_11_12_5_15_8_0_2_13_10_3_7_9_14_6_1_4()
- ROUND_AVX2(Y12, Y13, Y14, Y15, Y10, Y4, Y5)
- LOAD_MSG_AVX2_7_3_13_11_9_1_12_14_2_5_4_15_6_10_0_8()
- ROUND_AVX2(Y12, Y13, Y14, Y15, Y10, Y4, Y5)
- LOAD_MSG_AVX2_9_5_2_10_0_7_4_15_14_11_6_3_1_12_8_13()
- ROUND_AVX2(Y12, Y13, Y14, Y15, Y10, Y4, Y5)
- LOAD_MSG_AVX2_2_6_0_8_12_10_11_3_4_7_15_1_13_5_14_9()
- ROUND_AVX2(Y12, Y13, Y14, Y15, Y10, Y4, Y5)
- LOAD_MSG_AVX2_12_1_14_4_5_15_13_10_0_6_9_8_7_3_2_11()
- ROUND_AVX2(Y12, Y13, Y14, Y15, Y10, Y4, Y5)
- LOAD_MSG_AVX2_13_7_12_3_11_14_1_9_5_15_8_2_0_4_6_10()
- ROUND_AVX2(Y12, Y13, Y14, Y15, Y10, Y4, Y5)
- LOAD_MSG_AVX2_6_14_11_0_15_9_3_8_12_13_1_10_2_7_4_5()
- ROUND_AVX2(Y12, Y13, Y14, Y15, Y10, Y4, Y5)
- LOAD_MSG_AVX2_10_8_7_1_2_4_6_5_15_9_3_13_11_14_12_0()
- ROUND_AVX2(Y12, Y13, Y14, Y15, Y10, Y4, Y5)
-
- ROUND_AVX2(32(DX), 64(DX), 96(DX), 128(DX), Y10, Y4, Y5)
- ROUND_AVX2(160(DX), 192(DX), 224(DX), 256(DX), Y10, Y4, Y5)
-
- VPXOR Y0, Y8, Y8
- VPXOR Y1, Y9, Y9
- VPXOR Y2, Y8, Y8
- VPXOR Y3, Y9, Y9
-
- LEAQ 128(SI), SI
- SUBQ $128, DI
- JNE loop
-
- MOVQ R8, 0(BX)
- MOVQ R9, 8(BX)
-
- VMOVDQU Y8, 0(AX)
- VMOVDQU Y9, 32(AX)
+ VMOVDQA Y8, Y0
+ VMOVDQA Y9, Y1
+ VMOVDQA Y6, Y2
+ VPXOR (DX), Y7, Y3
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x26
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x5e
+ BYTE $0x20
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x99
+ BYTE $0x22
+ BYTE $0x66
+ BYTE $0x10
+ BYTE $0x01
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0xa1
+ BYTE $0x22
+ BYTE $0x5e
+ BYTE $0x30
+ BYTE $0x01
+ VINSERTI128 $0x01, X11, Y12, Y12
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x6e
+ BYTE $0x08
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x5e
+ BYTE $0x28
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x91
+ BYTE $0x22
+ BYTE $0x6e
+ BYTE $0x18
+ BYTE $0x01
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0xa1
+ BYTE $0x22
+ BYTE $0x5e
+ BYTE $0x38
+ BYTE $0x01
+ VINSERTI128 $0x01, X11, Y13, Y13
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x76
+ BYTE $0x40
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x5e
+ BYTE $0x60
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x89
+ BYTE $0x22
+ BYTE $0x76
+ BYTE $0x50
+ BYTE $0x01
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0xa1
+ BYTE $0x22
+ BYTE $0x5e
+ BYTE $0x70
+ BYTE $0x01
+ VINSERTI128 $0x01, X11, Y14, Y14
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x7e
+ BYTE $0x48
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x5e
+ BYTE $0x68
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x81
+ BYTE $0x22
+ BYTE $0x7e
+ BYTE $0x58
+ BYTE $0x01
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0xa1
+ BYTE $0x22
+ BYTE $0x5e
+ BYTE $0x78
+ BYTE $0x01
+ VINSERTI128 $0x01, X11, Y15, Y15
+ VMOVDQA Y12, 32(DX)
+ VMOVDQA Y13, 64(DX)
+ VMOVDQA Y14, 96(DX)
+ VMOVDQA Y15, 128(DX)
+ VPADDQ Y12, Y0, Y0
+ VPADDQ Y1, Y0, Y0
+ VPXOR Y0, Y3, Y3
+ VPSHUFD $-79, Y3, Y3
+ VPADDQ Y3, Y2, Y2
+ VPXOR Y2, Y1, Y1
+ VPSHUFB Y4, Y1, Y1
+ VPADDQ Y13, Y0, Y0
+ VPADDQ Y1, Y0, Y0
+ VPXOR Y0, Y3, Y3
+ VPSHUFB Y5, Y3, Y3
+ VPADDQ Y3, Y2, Y2
+ VPXOR Y2, Y1, Y1
+ VPADDQ Y1, Y1, Y10
+ VPSRLQ $0x3f, Y1, Y1
+ VPXOR Y10, Y1, Y1
+ BYTE $0xc4
+ BYTE $0xe3
+ BYTE $0xfd
+ BYTE $0x00
+ BYTE $0xc9
+ BYTE $0x39
+ BYTE $0xc4
+ BYTE $0xe3
+ BYTE $0xfd
+ BYTE $0x00
+ BYTE $0xd2
+ BYTE $0x4e
+ BYTE $0xc4
+ BYTE $0xe3
+ BYTE $0xfd
+ BYTE $0x00
+ BYTE $0xdb
+ BYTE $0x93
+ VPADDQ Y14, Y0, Y0
+ VPADDQ Y1, Y0, Y0
+ VPXOR Y0, Y3, Y3
+ VPSHUFD $-79, Y3, Y3
+ VPADDQ Y3, Y2, Y2
+ VPXOR Y2, Y1, Y1
+ VPSHUFB Y4, Y1, Y1
+ VPADDQ Y15, Y0, Y0
+ VPADDQ Y1, Y0, Y0
+ VPXOR Y0, Y3, Y3
+ VPSHUFB Y5, Y3, Y3
+ VPADDQ Y3, Y2, Y2
+ VPXOR Y2, Y1, Y1
+ VPADDQ Y1, Y1, Y10
+ VPSRLQ $0x3f, Y1, Y1
+ VPXOR Y10, Y1, Y1
+ BYTE $0xc4
+ BYTE $0xe3
+ BYTE $0xfd
+ BYTE $0x00
+ BYTE $0xdb
+ BYTE $0x39
+ BYTE $0xc4
+ BYTE $0xe3
+ BYTE $0xfd
+ BYTE $0x00
+ BYTE $0xd2
+ BYTE $0x4e
+ BYTE $0xc4
+ BYTE $0xe3
+ BYTE $0xfd
+ BYTE $0x00
+ BYTE $0xc9
+ BYTE $0x93
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x66
+ BYTE $0x70
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x5e
+ BYTE $0x48
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x99
+ BYTE $0x22
+ BYTE $0x66
+ BYTE $0x20
+ BYTE $0x01
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0xa1
+ BYTE $0x22
+ BYTE $0x5e
+ BYTE $0x68
+ BYTE $0x01
+ VINSERTI128 $0x01, X11, Y12, Y12
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x6e
+ BYTE $0x50
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x5e
+ BYTE $0x78
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x91
+ BYTE $0x22
+ BYTE $0x6e
+ BYTE $0x40
+ BYTE $0x01
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0xa1
+ BYTE $0x22
+ BYTE $0x5e
+ BYTE $0x30
+ BYTE $0x01
+ VINSERTI128 $0x01, X11, Y13, Y13
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x5e
+ BYTE $0x58
+ VPSHUFD $0x4e, (SI), X14
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0xa1
+ BYTE $0x22
+ BYTE $0x5e
+ BYTE $0x28
+ BYTE $0x01
+ VINSERTI128 $0x01, X11, Y14, Y14
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x7e
+ BYTE $0x60
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x5e
+ BYTE $0x38
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x81
+ BYTE $0x22
+ BYTE $0x7e
+ BYTE $0x10
+ BYTE $0x01
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0xa1
+ BYTE $0x22
+ BYTE $0x5e
+ BYTE $0x18
+ BYTE $0x01
+ VINSERTI128 $0x01, X11, Y15, Y15
+ VMOVDQA Y12, 160(DX)
+ VMOVDQA Y13, 192(DX)
+ VMOVDQA Y14, 224(DX)
+ VMOVDQA Y15, 256(DX)
+ VPADDQ Y12, Y0, Y0
+ VPADDQ Y1, Y0, Y0
+ VPXOR Y0, Y3, Y3
+ VPSHUFD $-79, Y3, Y3
+ VPADDQ Y3, Y2, Y2
+ VPXOR Y2, Y1, Y1
+ VPSHUFB Y4, Y1, Y1
+ VPADDQ Y13, Y0, Y0
+ VPADDQ Y1, Y0, Y0
+ VPXOR Y0, Y3, Y3
+ VPSHUFB Y5, Y3, Y3
+ VPADDQ Y3, Y2, Y2
+ VPXOR Y2, Y1, Y1
+ VPADDQ Y1, Y1, Y10
+ VPSRLQ $0x3f, Y1, Y1
+ VPXOR Y10, Y1, Y1
+ BYTE $0xc4
+ BYTE $0xe3
+ BYTE $0xfd
+ BYTE $0x00
+ BYTE $0xc9
+ BYTE $0x39
+ BYTE $0xc4
+ BYTE $0xe3
+ BYTE $0xfd
+ BYTE $0x00
+ BYTE $0xd2
+ BYTE $0x4e
+ BYTE $0xc4
+ BYTE $0xe3
+ BYTE $0xfd
+ BYTE $0x00
+ BYTE $0xdb
+ BYTE $0x93
+ VPADDQ Y14, Y0, Y0
+ VPADDQ Y1, Y0, Y0
+ VPXOR Y0, Y3, Y3
+ VPSHUFD $-79, Y3, Y3
+ VPADDQ Y3, Y2, Y2
+ VPXOR Y2, Y1, Y1
+ VPSHUFB Y4, Y1, Y1
+ VPADDQ Y15, Y0, Y0
+ VPADDQ Y1, Y0, Y0
+ VPXOR Y0, Y3, Y3
+ VPSHUFB Y5, Y3, Y3
+ VPADDQ Y3, Y2, Y2
+ VPXOR Y2, Y1, Y1
+ VPADDQ Y1, Y1, Y10
+ VPSRLQ $0x3f, Y1, Y1
+ VPXOR Y10, Y1, Y1
+ BYTE $0xc4
+ BYTE $0xe3
+ BYTE $0xfd
+ BYTE $0x00
+ BYTE $0xdb
+ BYTE $0x39
+ BYTE $0xc4
+ BYTE $0xe3
+ BYTE $0xfd
+ BYTE $0x00
+ BYTE $0xd2
+ BYTE $0x4e
+ BYTE $0xc4
+ BYTE $0xe3
+ BYTE $0xfd
+ BYTE $0x00
+ BYTE $0xc9
+ BYTE $0x93
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x5e
+ BYTE $0x28
+ VMOVDQU 88(SI), X12
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0xa1
+ BYTE $0x22
+ BYTE $0x5e
+ BYTE $0x78
+ BYTE $0x01
+ VINSERTI128 $0x01, X11, Y12, Y12
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x6e
+ BYTE $0x40
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x5e
+ BYTE $0x10
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x91
+ BYTE $0x22
+ BYTE $0x2e
+ BYTE $0x01
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0xa1
+ BYTE $0x22
+ BYTE $0x5e
+ BYTE $0x68
+ BYTE $0x01
+ VINSERTI128 $0x01, X11, Y13, Y13
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x76
+ BYTE $0x50
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x5e
+ BYTE $0x38
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x89
+ BYTE $0x22
+ BYTE $0x76
+ BYTE $0x18
+ BYTE $0x01
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0xa1
+ BYTE $0x22
+ BYTE $0x5e
+ BYTE $0x48
+ BYTE $0x01
+ VINSERTI128 $0x01, X11, Y14, Y14
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x7e
+ BYTE $0x70
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x5e
+ BYTE $0x08
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x81
+ BYTE $0x22
+ BYTE $0x7e
+ BYTE $0x30
+ BYTE $0x01
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0xa1
+ BYTE $0x22
+ BYTE $0x5e
+ BYTE $0x20
+ BYTE $0x01
+ VINSERTI128 $0x01, X11, Y15, Y15
+ VPADDQ Y12, Y0, Y0
+ VPADDQ Y1, Y0, Y0
+ VPXOR Y0, Y3, Y3
+ VPSHUFD $-79, Y3, Y3
+ VPADDQ Y3, Y2, Y2
+ VPXOR Y2, Y1, Y1
+ VPSHUFB Y4, Y1, Y1
+ VPADDQ Y13, Y0, Y0
+ VPADDQ Y1, Y0, Y0
+ VPXOR Y0, Y3, Y3
+ VPSHUFB Y5, Y3, Y3
+ VPADDQ Y3, Y2, Y2
+ VPXOR Y2, Y1, Y1
+ VPADDQ Y1, Y1, Y10
+ VPSRLQ $0x3f, Y1, Y1
+ VPXOR Y10, Y1, Y1
+ BYTE $0xc4
+ BYTE $0xe3
+ BYTE $0xfd
+ BYTE $0x00
+ BYTE $0xc9
+ BYTE $0x39
+ BYTE $0xc4
+ BYTE $0xe3
+ BYTE $0xfd
+ BYTE $0x00
+ BYTE $0xd2
+ BYTE $0x4e
+ BYTE $0xc4
+ BYTE $0xe3
+ BYTE $0xfd
+ BYTE $0x00
+ BYTE $0xdb
+ BYTE $0x93
+ VPADDQ Y14, Y0, Y0
+ VPADDQ Y1, Y0, Y0
+ VPXOR Y0, Y3, Y3
+ VPSHUFD $-79, Y3, Y3
+ VPADDQ Y3, Y2, Y2
+ VPXOR Y2, Y1, Y1
+ VPSHUFB Y4, Y1, Y1
+ VPADDQ Y15, Y0, Y0
+ VPADDQ Y1, Y0, Y0
+ VPXOR Y0, Y3, Y3
+ VPSHUFB Y5, Y3, Y3
+ VPADDQ Y3, Y2, Y2
+ VPXOR Y2, Y1, Y1
+ VPADDQ Y1, Y1, Y10
+ VPSRLQ $0x3f, Y1, Y1
+ VPXOR Y10, Y1, Y1
+ BYTE $0xc4
+ BYTE $0xe3
+ BYTE $0xfd
+ BYTE $0x00
+ BYTE $0xdb
+ BYTE $0x39
+ BYTE $0xc4
+ BYTE $0xe3
+ BYTE $0xfd
+ BYTE $0x00
+ BYTE $0xd2
+ BYTE $0x4e
+ BYTE $0xc4
+ BYTE $0xe3
+ BYTE $0xfd
+ BYTE $0x00
+ BYTE $0xc9
+ BYTE $0x93
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x66
+ BYTE $0x38
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x5e
+ BYTE $0x68
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x99
+ BYTE $0x22
+ BYTE $0x66
+ BYTE $0x18
+ BYTE $0x01
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0xa1
+ BYTE $0x22
+ BYTE $0x5e
+ BYTE $0x58
+ BYTE $0x01
+ VINSERTI128 $0x01, X11, Y12, Y12
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x6e
+ BYTE $0x48
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x5e
+ BYTE $0x60
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x91
+ BYTE $0x22
+ BYTE $0x6e
+ BYTE $0x08
+ BYTE $0x01
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0xa1
+ BYTE $0x22
+ BYTE $0x5e
+ BYTE $0x70
+ BYTE $0x01
+ VINSERTI128 $0x01, X11, Y13, Y13
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x76
+ BYTE $0x10
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x5e
+ BYTE $0x20
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x89
+ BYTE $0x22
+ BYTE $0x76
+ BYTE $0x28
+ BYTE $0x01
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0xa1
+ BYTE $0x22
+ BYTE $0x5e
+ BYTE $0x78
+ BYTE $0x01
+ VINSERTI128 $0x01, X11, Y14, Y14
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x7e
+ BYTE $0x30
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x1e
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x81
+ BYTE $0x22
+ BYTE $0x7e
+ BYTE $0x50
+ BYTE $0x01
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0xa1
+ BYTE $0x22
+ BYTE $0x5e
+ BYTE $0x40
+ BYTE $0x01
+ VINSERTI128 $0x01, X11, Y15, Y15
+ VPADDQ Y12, Y0, Y0
+ VPADDQ Y1, Y0, Y0
+ VPXOR Y0, Y3, Y3
+ VPSHUFD $-79, Y3, Y3
+ VPADDQ Y3, Y2, Y2
+ VPXOR Y2, Y1, Y1
+ VPSHUFB Y4, Y1, Y1
+ VPADDQ Y13, Y0, Y0
+ VPADDQ Y1, Y0, Y0
+ VPXOR Y0, Y3, Y3
+ VPSHUFB Y5, Y3, Y3
+ VPADDQ Y3, Y2, Y2
+ VPXOR Y2, Y1, Y1
+ VPADDQ Y1, Y1, Y10
+ VPSRLQ $0x3f, Y1, Y1
+ VPXOR Y10, Y1, Y1
+ BYTE $0xc4
+ BYTE $0xe3
+ BYTE $0xfd
+ BYTE $0x00
+ BYTE $0xc9
+ BYTE $0x39
+ BYTE $0xc4
+ BYTE $0xe3
+ BYTE $0xfd
+ BYTE $0x00
+ BYTE $0xd2
+ BYTE $0x4e
+ BYTE $0xc4
+ BYTE $0xe3
+ BYTE $0xfd
+ BYTE $0x00
+ BYTE $0xdb
+ BYTE $0x93
+ VPADDQ Y14, Y0, Y0
+ VPADDQ Y1, Y0, Y0
+ VPXOR Y0, Y3, Y3
+ VPSHUFD $-79, Y3, Y3
+ VPADDQ Y3, Y2, Y2
+ VPXOR Y2, Y1, Y1
+ VPSHUFB Y4, Y1, Y1
+ VPADDQ Y15, Y0, Y0
+ VPADDQ Y1, Y0, Y0
+ VPXOR Y0, Y3, Y3
+ VPSHUFB Y5, Y3, Y3
+ VPADDQ Y3, Y2, Y2
+ VPXOR Y2, Y1, Y1
+ VPADDQ Y1, Y1, Y10
+ VPSRLQ $0x3f, Y1, Y1
+ VPXOR Y10, Y1, Y1
+ BYTE $0xc4
+ BYTE $0xe3
+ BYTE $0xfd
+ BYTE $0x00
+ BYTE $0xdb
+ BYTE $0x39
+ BYTE $0xc4
+ BYTE $0xe3
+ BYTE $0xfd
+ BYTE $0x00
+ BYTE $0xd2
+ BYTE $0x4e
+ BYTE $0xc4
+ BYTE $0xe3
+ BYTE $0xfd
+ BYTE $0x00
+ BYTE $0xc9
+ BYTE $0x93
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x66
+ BYTE $0x48
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x5e
+ BYTE $0x10
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x99
+ BYTE $0x22
+ BYTE $0x66
+ BYTE $0x28
+ BYTE $0x01
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0xa1
+ BYTE $0x22
+ BYTE $0x5e
+ BYTE $0x50
+ BYTE $0x01
+ VINSERTI128 $0x01, X11, Y12, Y12
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x2e
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x5e
+ BYTE $0x20
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x91
+ BYTE $0x22
+ BYTE $0x6e
+ BYTE $0x38
+ BYTE $0x01
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0xa1
+ BYTE $0x22
+ BYTE $0x5e
+ BYTE $0x78
+ BYTE $0x01
+ VINSERTI128 $0x01, X11, Y13, Y13
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x76
+ BYTE $0x70
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x5e
+ BYTE $0x30
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x89
+ BYTE $0x22
+ BYTE $0x76
+ BYTE $0x58
+ BYTE $0x01
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0xa1
+ BYTE $0x22
+ BYTE $0x5e
+ BYTE $0x18
+ BYTE $0x01
+ VINSERTI128 $0x01, X11, Y14, Y14
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x7e
+ BYTE $0x08
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x5e
+ BYTE $0x40
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x81
+ BYTE $0x22
+ BYTE $0x7e
+ BYTE $0x60
+ BYTE $0x01
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0xa1
+ BYTE $0x22
+ BYTE $0x5e
+ BYTE $0x68
+ BYTE $0x01
+ VINSERTI128 $0x01, X11, Y15, Y15
+ VPADDQ Y12, Y0, Y0
+ VPADDQ Y1, Y0, Y0
+ VPXOR Y0, Y3, Y3
+ VPSHUFD $-79, Y3, Y3
+ VPADDQ Y3, Y2, Y2
+ VPXOR Y2, Y1, Y1
+ VPSHUFB Y4, Y1, Y1
+ VPADDQ Y13, Y0, Y0
+ VPADDQ Y1, Y0, Y0
+ VPXOR Y0, Y3, Y3
+ VPSHUFB Y5, Y3, Y3
+ VPADDQ Y3, Y2, Y2
+ VPXOR Y2, Y1, Y1
+ VPADDQ Y1, Y1, Y10
+ VPSRLQ $0x3f, Y1, Y1
+ VPXOR Y10, Y1, Y1
+ BYTE $0xc4
+ BYTE $0xe3
+ BYTE $0xfd
+ BYTE $0x00
+ BYTE $0xc9
+ BYTE $0x39
+ BYTE $0xc4
+ BYTE $0xe3
+ BYTE $0xfd
+ BYTE $0x00
+ BYTE $0xd2
+ BYTE $0x4e
+ BYTE $0xc4
+ BYTE $0xe3
+ BYTE $0xfd
+ BYTE $0x00
+ BYTE $0xdb
+ BYTE $0x93
+ VPADDQ Y14, Y0, Y0
+ VPADDQ Y1, Y0, Y0
+ VPXOR Y0, Y3, Y3
+ VPSHUFD $-79, Y3, Y3
+ VPADDQ Y3, Y2, Y2
+ VPXOR Y2, Y1, Y1
+ VPSHUFB Y4, Y1, Y1
+ VPADDQ Y15, Y0, Y0
+ VPADDQ Y1, Y0, Y0
+ VPXOR Y0, Y3, Y3
+ VPSHUFB Y5, Y3, Y3
+ VPADDQ Y3, Y2, Y2
+ VPXOR Y2, Y1, Y1
+ VPADDQ Y1, Y1, Y10
+ VPSRLQ $0x3f, Y1, Y1
+ VPXOR Y10, Y1, Y1
+ BYTE $0xc4
+ BYTE $0xe3
+ BYTE $0xfd
+ BYTE $0x00
+ BYTE $0xdb
+ BYTE $0x39
+ BYTE $0xc4
+ BYTE $0xe3
+ BYTE $0xfd
+ BYTE $0x00
+ BYTE $0xd2
+ BYTE $0x4e
+ BYTE $0xc4
+ BYTE $0xe3
+ BYTE $0xfd
+ BYTE $0x00
+ BYTE $0xc9
+ BYTE $0x93
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x66
+ BYTE $0x10
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x1e
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x99
+ BYTE $0x22
+ BYTE $0x66
+ BYTE $0x30
+ BYTE $0x01
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0xa1
+ BYTE $0x22
+ BYTE $0x5e
+ BYTE $0x40
+ BYTE $0x01
+ VINSERTI128 $0x01, X11, Y12, Y12
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x6e
+ BYTE $0x60
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x5e
+ BYTE $0x58
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x91
+ BYTE $0x22
+ BYTE $0x6e
+ BYTE $0x50
+ BYTE $0x01
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0xa1
+ BYTE $0x22
+ BYTE $0x5e
+ BYTE $0x18
+ BYTE $0x01
+ VINSERTI128 $0x01, X11, Y13, Y13
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x76
+ BYTE $0x20
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x5e
+ BYTE $0x78
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x89
+ BYTE $0x22
+ BYTE $0x76
+ BYTE $0x38
+ BYTE $0x01
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0xa1
+ BYTE $0x22
+ BYTE $0x5e
+ BYTE $0x08
+ BYTE $0x01
+ VINSERTI128 $0x01, X11, Y14, Y14
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x7e
+ BYTE $0x68
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x5e
+ BYTE $0x70
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x81
+ BYTE $0x22
+ BYTE $0x7e
+ BYTE $0x28
+ BYTE $0x01
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0xa1
+ BYTE $0x22
+ BYTE $0x5e
+ BYTE $0x48
+ BYTE $0x01
+ VINSERTI128 $0x01, X11, Y15, Y15
+ VPADDQ Y12, Y0, Y0
+ VPADDQ Y1, Y0, Y0
+ VPXOR Y0, Y3, Y3
+ VPSHUFD $-79, Y3, Y3
+ VPADDQ Y3, Y2, Y2
+ VPXOR Y2, Y1, Y1
+ VPSHUFB Y4, Y1, Y1
+ VPADDQ Y13, Y0, Y0
+ VPADDQ Y1, Y0, Y0
+ VPXOR Y0, Y3, Y3
+ VPSHUFB Y5, Y3, Y3
+ VPADDQ Y3, Y2, Y2
+ VPXOR Y2, Y1, Y1
+ VPADDQ Y1, Y1, Y10
+ VPSRLQ $0x3f, Y1, Y1
+ VPXOR Y10, Y1, Y1
+ BYTE $0xc4
+ BYTE $0xe3
+ BYTE $0xfd
+ BYTE $0x00
+ BYTE $0xc9
+ BYTE $0x39
+ BYTE $0xc4
+ BYTE $0xe3
+ BYTE $0xfd
+ BYTE $0x00
+ BYTE $0xd2
+ BYTE $0x4e
+ BYTE $0xc4
+ BYTE $0xe3
+ BYTE $0xfd
+ BYTE $0x00
+ BYTE $0xdb
+ BYTE $0x93
+ VPADDQ Y14, Y0, Y0
+ VPADDQ Y1, Y0, Y0
+ VPXOR Y0, Y3, Y3
+ VPSHUFD $-79, Y3, Y3
+ VPADDQ Y3, Y2, Y2
+ VPXOR Y2, Y1, Y1
+ VPSHUFB Y4, Y1, Y1
+ VPADDQ Y15, Y0, Y0
+ VPADDQ Y1, Y0, Y0
+ VPXOR Y0, Y3, Y3
+ VPSHUFB Y5, Y3, Y3
+ VPADDQ Y3, Y2, Y2
+ VPXOR Y2, Y1, Y1
+ VPADDQ Y1, Y1, Y10
+ VPSRLQ $0x3f, Y1, Y1
+ VPXOR Y10, Y1, Y1
+ BYTE $0xc4
+ BYTE $0xe3
+ BYTE $0xfd
+ BYTE $0x00
+ BYTE $0xdb
+ BYTE $0x39
+ BYTE $0xc4
+ BYTE $0xe3
+ BYTE $0xfd
+ BYTE $0x00
+ BYTE $0xd2
+ BYTE $0x4e
+ BYTE $0xc4
+ BYTE $0xe3
+ BYTE $0xfd
+ BYTE $0x00
+ BYTE $0xc9
+ BYTE $0x93
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x66
+ BYTE $0x60
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x5e
+ BYTE $0x70
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x99
+ BYTE $0x22
+ BYTE $0x66
+ BYTE $0x08
+ BYTE $0x01
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0xa1
+ BYTE $0x22
+ BYTE $0x5e
+ BYTE $0x20
+ BYTE $0x01
+ VINSERTI128 $0x01, X11, Y12, Y12
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x6e
+ BYTE $0x28
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x5e
+ BYTE $0x68
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x91
+ BYTE $0x22
+ BYTE $0x6e
+ BYTE $0x78
+ BYTE $0x01
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0xa1
+ BYTE $0x22
+ BYTE $0x5e
+ BYTE $0x50
+ BYTE $0x01
+ VINSERTI128 $0x01, X11, Y13, Y13
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x36
+ VPSHUFD $0x4e, 64(SI), X11
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x89
+ BYTE $0x22
+ BYTE $0x76
+ BYTE $0x30
+ BYTE $0x01
+ VINSERTI128 $0x01, X11, Y14, Y14
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x7e
+ BYTE $0x38
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x5e
+ BYTE $0x10
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x81
+ BYTE $0x22
+ BYTE $0x7e
+ BYTE $0x18
+ BYTE $0x01
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0xa1
+ BYTE $0x22
+ BYTE $0x5e
+ BYTE $0x58
+ BYTE $0x01
+ VINSERTI128 $0x01, X11, Y15, Y15
+ VPADDQ Y12, Y0, Y0
+ VPADDQ Y1, Y0, Y0
+ VPXOR Y0, Y3, Y3
+ VPSHUFD $-79, Y3, Y3
+ VPADDQ Y3, Y2, Y2
+ VPXOR Y2, Y1, Y1
+ VPSHUFB Y4, Y1, Y1
+ VPADDQ Y13, Y0, Y0
+ VPADDQ Y1, Y0, Y0
+ VPXOR Y0, Y3, Y3
+ VPSHUFB Y5, Y3, Y3
+ VPADDQ Y3, Y2, Y2
+ VPXOR Y2, Y1, Y1
+ VPADDQ Y1, Y1, Y10
+ VPSRLQ $0x3f, Y1, Y1
+ VPXOR Y10, Y1, Y1
+ BYTE $0xc4
+ BYTE $0xe3
+ BYTE $0xfd
+ BYTE $0x00
+ BYTE $0xc9
+ BYTE $0x39
+ BYTE $0xc4
+ BYTE $0xe3
+ BYTE $0xfd
+ BYTE $0x00
+ BYTE $0xd2
+ BYTE $0x4e
+ BYTE $0xc4
+ BYTE $0xe3
+ BYTE $0xfd
+ BYTE $0x00
+ BYTE $0xdb
+ BYTE $0x93
+ VPADDQ Y14, Y0, Y0
+ VPADDQ Y1, Y0, Y0
+ VPXOR Y0, Y3, Y3
+ VPSHUFD $-79, Y3, Y3
+ VPADDQ Y3, Y2, Y2
+ VPXOR Y2, Y1, Y1
+ VPSHUFB Y4, Y1, Y1
+ VPADDQ Y15, Y0, Y0
+ VPADDQ Y1, Y0, Y0
+ VPXOR Y0, Y3, Y3
+ VPSHUFB Y5, Y3, Y3
+ VPADDQ Y3, Y2, Y2
+ VPXOR Y2, Y1, Y1
+ VPADDQ Y1, Y1, Y10
+ VPSRLQ $0x3f, Y1, Y1
+ VPXOR Y10, Y1, Y1
+ BYTE $0xc4
+ BYTE $0xe3
+ BYTE $0xfd
+ BYTE $0x00
+ BYTE $0xdb
+ BYTE $0x39
+ BYTE $0xc4
+ BYTE $0xe3
+ BYTE $0xfd
+ BYTE $0x00
+ BYTE $0xd2
+ BYTE $0x4e
+ BYTE $0xc4
+ BYTE $0xe3
+ BYTE $0xfd
+ BYTE $0x00
+ BYTE $0xc9
+ BYTE $0x93
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x66
+ BYTE $0x68
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x5e
+ BYTE $0x60
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x99
+ BYTE $0x22
+ BYTE $0x66
+ BYTE $0x38
+ BYTE $0x01
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0xa1
+ BYTE $0x22
+ BYTE $0x5e
+ BYTE $0x18
+ BYTE $0x01
+ VINSERTI128 $0x01, X11, Y12, Y12
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x6e
+ BYTE $0x58
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x5e
+ BYTE $0x08
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x91
+ BYTE $0x22
+ BYTE $0x6e
+ BYTE $0x70
+ BYTE $0x01
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0xa1
+ BYTE $0x22
+ BYTE $0x5e
+ BYTE $0x48
+ BYTE $0x01
+ VINSERTI128 $0x01, X11, Y13, Y13
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x76
+ BYTE $0x28
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x5e
+ BYTE $0x40
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x89
+ BYTE $0x22
+ BYTE $0x76
+ BYTE $0x78
+ BYTE $0x01
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0xa1
+ BYTE $0x22
+ BYTE $0x5e
+ BYTE $0x10
+ BYTE $0x01
+ VINSERTI128 $0x01, X11, Y14, Y14
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x3e
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x5e
+ BYTE $0x30
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x81
+ BYTE $0x22
+ BYTE $0x7e
+ BYTE $0x20
+ BYTE $0x01
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0xa1
+ BYTE $0x22
+ BYTE $0x5e
+ BYTE $0x50
+ BYTE $0x01
+ VINSERTI128 $0x01, X11, Y15, Y15
+ VPADDQ Y12, Y0, Y0
+ VPADDQ Y1, Y0, Y0
+ VPXOR Y0, Y3, Y3
+ VPSHUFD $-79, Y3, Y3
+ VPADDQ Y3, Y2, Y2
+ VPXOR Y2, Y1, Y1
+ VPSHUFB Y4, Y1, Y1
+ VPADDQ Y13, Y0, Y0
+ VPADDQ Y1, Y0, Y0
+ VPXOR Y0, Y3, Y3
+ VPSHUFB Y5, Y3, Y3
+ VPADDQ Y3, Y2, Y2
+ VPXOR Y2, Y1, Y1
+ VPADDQ Y1, Y1, Y10
+ VPSRLQ $0x3f, Y1, Y1
+ VPXOR Y10, Y1, Y1
+ BYTE $0xc4
+ BYTE $0xe3
+ BYTE $0xfd
+ BYTE $0x00
+ BYTE $0xc9
+ BYTE $0x39
+ BYTE $0xc4
+ BYTE $0xe3
+ BYTE $0xfd
+ BYTE $0x00
+ BYTE $0xd2
+ BYTE $0x4e
+ BYTE $0xc4
+ BYTE $0xe3
+ BYTE $0xfd
+ BYTE $0x00
+ BYTE $0xdb
+ BYTE $0x93
+ VPADDQ Y14, Y0, Y0
+ VPADDQ Y1, Y0, Y0
+ VPXOR Y0, Y3, Y3
+ VPSHUFD $-79, Y3, Y3
+ VPADDQ Y3, Y2, Y2
+ VPXOR Y2, Y1, Y1
+ VPSHUFB Y4, Y1, Y1
+ VPADDQ Y15, Y0, Y0
+ VPADDQ Y1, Y0, Y0
+ VPXOR Y0, Y3, Y3
+ VPSHUFB Y5, Y3, Y3
+ VPADDQ Y3, Y2, Y2
+ VPXOR Y2, Y1, Y1
+ VPADDQ Y1, Y1, Y10
+ VPSRLQ $0x3f, Y1, Y1
+ VPXOR Y10, Y1, Y1
+ BYTE $0xc4
+ BYTE $0xe3
+ BYTE $0xfd
+ BYTE $0x00
+ BYTE $0xdb
+ BYTE $0x39
+ BYTE $0xc4
+ BYTE $0xe3
+ BYTE $0xfd
+ BYTE $0x00
+ BYTE $0xd2
+ BYTE $0x4e
+ BYTE $0xc4
+ BYTE $0xe3
+ BYTE $0xfd
+ BYTE $0x00
+ BYTE $0xc9
+ BYTE $0x93
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x66
+ BYTE $0x30
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x5e
+ BYTE $0x58
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x99
+ BYTE $0x22
+ BYTE $0x66
+ BYTE $0x70
+ BYTE $0x01
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0xa1
+ BYTE $0x22
+ BYTE $0x1e
+ BYTE $0x01
+ VINSERTI128 $0x01, X11, Y12, Y12
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x6e
+ BYTE $0x78
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x5e
+ BYTE $0x18
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x91
+ BYTE $0x22
+ BYTE $0x6e
+ BYTE $0x48
+ BYTE $0x01
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0xa1
+ BYTE $0x22
+ BYTE $0x5e
+ BYTE $0x40
+ BYTE $0x01
+ VINSERTI128 $0x01, X11, Y13, Y13
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x5e
+ BYTE $0x08
+ VMOVDQU 96(SI), X14
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0xa1
+ BYTE $0x22
+ BYTE $0x5e
+ BYTE $0x50
+ BYTE $0x01
+ VINSERTI128 $0x01, X11, Y14, Y14
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x7e
+ BYTE $0x10
+ VMOVDQU 32(SI), X11
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x81
+ BYTE $0x22
+ BYTE $0x7e
+ BYTE $0x38
+ BYTE $0x01
+ VINSERTI128 $0x01, X11, Y15, Y15
+ VPADDQ Y12, Y0, Y0
+ VPADDQ Y1, Y0, Y0
+ VPXOR Y0, Y3, Y3
+ VPSHUFD $-79, Y3, Y3
+ VPADDQ Y3, Y2, Y2
+ VPXOR Y2, Y1, Y1
+ VPSHUFB Y4, Y1, Y1
+ VPADDQ Y13, Y0, Y0
+ VPADDQ Y1, Y0, Y0
+ VPXOR Y0, Y3, Y3
+ VPSHUFB Y5, Y3, Y3
+ VPADDQ Y3, Y2, Y2
+ VPXOR Y2, Y1, Y1
+ VPADDQ Y1, Y1, Y10
+ VPSRLQ $0x3f, Y1, Y1
+ VPXOR Y10, Y1, Y1
+ BYTE $0xc4
+ BYTE $0xe3
+ BYTE $0xfd
+ BYTE $0x00
+ BYTE $0xc9
+ BYTE $0x39
+ BYTE $0xc4
+ BYTE $0xe3
+ BYTE $0xfd
+ BYTE $0x00
+ BYTE $0xd2
+ BYTE $0x4e
+ BYTE $0xc4
+ BYTE $0xe3
+ BYTE $0xfd
+ BYTE $0x00
+ BYTE $0xdb
+ BYTE $0x93
+ VPADDQ Y14, Y0, Y0
+ VPADDQ Y1, Y0, Y0
+ VPXOR Y0, Y3, Y3
+ VPSHUFD $-79, Y3, Y3
+ VPADDQ Y3, Y2, Y2
+ VPXOR Y2, Y1, Y1
+ VPSHUFB Y4, Y1, Y1
+ VPADDQ Y15, Y0, Y0
+ VPADDQ Y1, Y0, Y0
+ VPXOR Y0, Y3, Y3
+ VPSHUFB Y5, Y3, Y3
+ VPADDQ Y3, Y2, Y2
+ VPXOR Y2, Y1, Y1
+ VPADDQ Y1, Y1, Y10
+ VPSRLQ $0x3f, Y1, Y1
+ VPXOR Y10, Y1, Y1
+ BYTE $0xc4
+ BYTE $0xe3
+ BYTE $0xfd
+ BYTE $0x00
+ BYTE $0xdb
+ BYTE $0x39
+ BYTE $0xc4
+ BYTE $0xe3
+ BYTE $0xfd
+ BYTE $0x00
+ BYTE $0xd2
+ BYTE $0x4e
+ BYTE $0xc4
+ BYTE $0xe3
+ BYTE $0xfd
+ BYTE $0x00
+ BYTE $0xc9
+ BYTE $0x93
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x66
+ BYTE $0x50
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x5e
+ BYTE $0x38
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x99
+ BYTE $0x22
+ BYTE $0x66
+ BYTE $0x40
+ BYTE $0x01
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0xa1
+ BYTE $0x22
+ BYTE $0x5e
+ BYTE $0x08
+ BYTE $0x01
+ VINSERTI128 $0x01, X11, Y12, Y12
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x6e
+ BYTE $0x10
+ VPSHUFD $0x4e, 40(SI), X11
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x91
+ BYTE $0x22
+ BYTE $0x6e
+ BYTE $0x20
+ BYTE $0x01
+ VINSERTI128 $0x01, X11, Y13, Y13
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x76
+ BYTE $0x78
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x5e
+ BYTE $0x18
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x89
+ BYTE $0x22
+ BYTE $0x76
+ BYTE $0x48
+ BYTE $0x01
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0xa1
+ BYTE $0x22
+ BYTE $0x5e
+ BYTE $0x68
+ BYTE $0x01
+ VINSERTI128 $0x01, X11, Y14, Y14
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x7e
+ BYTE $0x58
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x5e
+ BYTE $0x60
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x81
+ BYTE $0x22
+ BYTE $0x7e
+ BYTE $0x70
+ BYTE $0x01
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0xa1
+ BYTE $0x22
+ BYTE $0x1e
+ BYTE $0x01
+ VINSERTI128 $0x01, X11, Y15, Y15
+ VPADDQ Y12, Y0, Y0
+ VPADDQ Y1, Y0, Y0
+ VPXOR Y0, Y3, Y3
+ VPSHUFD $-79, Y3, Y3
+ VPADDQ Y3, Y2, Y2
+ VPXOR Y2, Y1, Y1
+ VPSHUFB Y4, Y1, Y1
+ VPADDQ Y13, Y0, Y0
+ VPADDQ Y1, Y0, Y0
+ VPXOR Y0, Y3, Y3
+ VPSHUFB Y5, Y3, Y3
+ VPADDQ Y3, Y2, Y2
+ VPXOR Y2, Y1, Y1
+ VPADDQ Y1, Y1, Y10
+ VPSRLQ $0x3f, Y1, Y1
+ VPXOR Y10, Y1, Y1
+ BYTE $0xc4
+ BYTE $0xe3
+ BYTE $0xfd
+ BYTE $0x00
+ BYTE $0xc9
+ BYTE $0x39
+ BYTE $0xc4
+ BYTE $0xe3
+ BYTE $0xfd
+ BYTE $0x00
+ BYTE $0xd2
+ BYTE $0x4e
+ BYTE $0xc4
+ BYTE $0xe3
+ BYTE $0xfd
+ BYTE $0x00
+ BYTE $0xdb
+ BYTE $0x93
+ VPADDQ Y14, Y0, Y0
+ VPADDQ Y1, Y0, Y0
+ VPXOR Y0, Y3, Y3
+ VPSHUFD $-79, Y3, Y3
+ VPADDQ Y3, Y2, Y2
+ VPXOR Y2, Y1, Y1
+ VPSHUFB Y4, Y1, Y1
+ VPADDQ Y15, Y0, Y0
+ VPADDQ Y1, Y0, Y0
+ VPXOR Y0, Y3, Y3
+ VPSHUFB Y5, Y3, Y3
+ VPADDQ Y3, Y2, Y2
+ VPXOR Y2, Y1, Y1
+ VPADDQ Y1, Y1, Y10
+ VPSRLQ $0x3f, Y1, Y1
+ VPXOR Y10, Y1, Y1
+ BYTE $0xc4
+ BYTE $0xe3
+ BYTE $0xfd
+ BYTE $0x00
+ BYTE $0xdb
+ BYTE $0x39
+ BYTE $0xc4
+ BYTE $0xe3
+ BYTE $0xfd
+ BYTE $0x00
+ BYTE $0xd2
+ BYTE $0x4e
+ BYTE $0xc4
+ BYTE $0xe3
+ BYTE $0xfd
+ BYTE $0x00
+ BYTE $0xc9
+ BYTE $0x93
+ VPADDQ 32(DX), Y0, Y0
+ VPADDQ Y1, Y0, Y0
+ VPXOR Y0, Y3, Y3
+ VPSHUFD $-79, Y3, Y3
+ VPADDQ Y3, Y2, Y2
+ VPXOR Y2, Y1, Y1
+ VPSHUFB Y4, Y1, Y1
+ VPADDQ 64(DX), Y0, Y0
+ VPADDQ Y1, Y0, Y0
+ VPXOR Y0, Y3, Y3
+ VPSHUFB Y5, Y3, Y3
+ VPADDQ Y3, Y2, Y2
+ VPXOR Y2, Y1, Y1
+ VPADDQ Y1, Y1, Y10
+ VPSRLQ $0x3f, Y1, Y1
+ VPXOR Y10, Y1, Y1
+ BYTE $0xc4
+ BYTE $0xe3
+ BYTE $0xfd
+ BYTE $0x00
+ BYTE $0xc9
+ BYTE $0x39
+ BYTE $0xc4
+ BYTE $0xe3
+ BYTE $0xfd
+ BYTE $0x00
+ BYTE $0xd2
+ BYTE $0x4e
+ BYTE $0xc4
+ BYTE $0xe3
+ BYTE $0xfd
+ BYTE $0x00
+ BYTE $0xdb
+ BYTE $0x93
+ VPADDQ 96(DX), Y0, Y0
+ VPADDQ Y1, Y0, Y0
+ VPXOR Y0, Y3, Y3
+ VPSHUFD $-79, Y3, Y3
+ VPADDQ Y3, Y2, Y2
+ VPXOR Y2, Y1, Y1
+ VPSHUFB Y4, Y1, Y1
+ VPADDQ 128(DX), Y0, Y0
+ VPADDQ Y1, Y0, Y0
+ VPXOR Y0, Y3, Y3
+ VPSHUFB Y5, Y3, Y3
+ VPADDQ Y3, Y2, Y2
+ VPXOR Y2, Y1, Y1
+ VPADDQ Y1, Y1, Y10
+ VPSRLQ $0x3f, Y1, Y1
+ VPXOR Y10, Y1, Y1
+ BYTE $0xc4
+ BYTE $0xe3
+ BYTE $0xfd
+ BYTE $0x00
+ BYTE $0xdb
+ BYTE $0x39
+ BYTE $0xc4
+ BYTE $0xe3
+ BYTE $0xfd
+ BYTE $0x00
+ BYTE $0xd2
+ BYTE $0x4e
+ BYTE $0xc4
+ BYTE $0xe3
+ BYTE $0xfd
+ BYTE $0x00
+ BYTE $0xc9
+ BYTE $0x93
+ VPADDQ 160(DX), Y0, Y0
+ VPADDQ Y1, Y0, Y0
+ VPXOR Y0, Y3, Y3
+ VPSHUFD $-79, Y3, Y3
+ VPADDQ Y3, Y2, Y2
+ VPXOR Y2, Y1, Y1
+ VPSHUFB Y4, Y1, Y1
+ VPADDQ 192(DX), Y0, Y0
+ VPADDQ Y1, Y0, Y0
+ VPXOR Y0, Y3, Y3
+ VPSHUFB Y5, Y3, Y3
+ VPADDQ Y3, Y2, Y2
+ VPXOR Y2, Y1, Y1
+ VPADDQ Y1, Y1, Y10
+ VPSRLQ $0x3f, Y1, Y1
+ VPXOR Y10, Y1, Y1
+ BYTE $0xc4
+ BYTE $0xe3
+ BYTE $0xfd
+ BYTE $0x00
+ BYTE $0xc9
+ BYTE $0x39
+ BYTE $0xc4
+ BYTE $0xe3
+ BYTE $0xfd
+ BYTE $0x00
+ BYTE $0xd2
+ BYTE $0x4e
+ BYTE $0xc4
+ BYTE $0xe3
+ BYTE $0xfd
+ BYTE $0x00
+ BYTE $0xdb
+ BYTE $0x93
+ VPADDQ 224(DX), Y0, Y0
+ VPADDQ Y1, Y0, Y0
+ VPXOR Y0, Y3, Y3
+ VPSHUFD $-79, Y3, Y3
+ VPADDQ Y3, Y2, Y2
+ VPXOR Y2, Y1, Y1
+ VPSHUFB Y4, Y1, Y1
+ VPADDQ 256(DX), Y0, Y0
+ VPADDQ Y1, Y0, Y0
+ VPXOR Y0, Y3, Y3
+ VPSHUFB Y5, Y3, Y3
+ VPADDQ Y3, Y2, Y2
+ VPXOR Y2, Y1, Y1
+ VPADDQ Y1, Y1, Y10
+ VPSRLQ $0x3f, Y1, Y1
+ VPXOR Y10, Y1, Y1
+ BYTE $0xc4
+ BYTE $0xe3
+ BYTE $0xfd
+ BYTE $0x00
+ BYTE $0xdb
+ BYTE $0x39
+ BYTE $0xc4
+ BYTE $0xe3
+ BYTE $0xfd
+ BYTE $0x00
+ BYTE $0xd2
+ BYTE $0x4e
+ BYTE $0xc4
+ BYTE $0xe3
+ BYTE $0xfd
+ BYTE $0x00
+ BYTE $0xc9
+ BYTE $0x93
+ VPXOR Y0, Y8, Y8
+ VPXOR Y1, Y9, Y9
+ VPXOR Y2, Y8, Y8
+ VPXOR Y3, Y9, Y9
+ LEAQ 128(SI), SI
+ SUBQ $0x80, DI
+ JNE loop
+ MOVQ R8, (BX)
+ MOVQ R9, 8(BX)
+ VMOVDQU Y8, (AX)
+ VMOVDQU Y9, 32(AX)
VZEROUPPER
-
RET
-#define VPUNPCKLQDQ_X2_X2_X15 BYTE $0xC5; BYTE $0x69; BYTE $0x6C; BYTE $0xFA
-#define VPUNPCKLQDQ_X3_X3_X15 BYTE $0xC5; BYTE $0x61; BYTE $0x6C; BYTE $0xFB
-#define VPUNPCKLQDQ_X7_X7_X15 BYTE $0xC5; BYTE $0x41; BYTE $0x6C; BYTE $0xFF
-#define VPUNPCKLQDQ_X13_X13_X15 BYTE $0xC4; BYTE $0x41; BYTE $0x11; BYTE $0x6C; BYTE $0xFD
-#define VPUNPCKLQDQ_X14_X14_X15 BYTE $0xC4; BYTE $0x41; BYTE $0x09; BYTE $0x6C; BYTE $0xFE
-
-#define VPUNPCKHQDQ_X15_X2_X2 BYTE $0xC4; BYTE $0xC1; BYTE $0x69; BYTE $0x6D; BYTE $0xD7
-#define VPUNPCKHQDQ_X15_X3_X3 BYTE $0xC4; BYTE $0xC1; BYTE $0x61; BYTE $0x6D; BYTE $0xDF
-#define VPUNPCKHQDQ_X15_X6_X6 BYTE $0xC4; BYTE $0xC1; BYTE $0x49; BYTE $0x6D; BYTE $0xF7
-#define VPUNPCKHQDQ_X15_X7_X7 BYTE $0xC4; BYTE $0xC1; BYTE $0x41; BYTE $0x6D; BYTE $0xFF
-#define VPUNPCKHQDQ_X15_X3_X2 BYTE $0xC4; BYTE $0xC1; BYTE $0x61; BYTE $0x6D; BYTE $0xD7
-#define VPUNPCKHQDQ_X15_X7_X6 BYTE $0xC4; BYTE $0xC1; BYTE $0x41; BYTE $0x6D; BYTE $0xF7
-#define VPUNPCKHQDQ_X15_X13_X3 BYTE $0xC4; BYTE $0xC1; BYTE $0x11; BYTE $0x6D; BYTE $0xDF
-#define VPUNPCKHQDQ_X15_X13_X7 BYTE $0xC4; BYTE $0xC1; BYTE $0x11; BYTE $0x6D; BYTE $0xFF
-
-#define SHUFFLE_AVX() \
- VMOVDQA X6, X13; \
- VMOVDQA X2, X14; \
- VMOVDQA X4, X6; \
- VPUNPCKLQDQ_X13_X13_X15; \
- VMOVDQA X5, X4; \
- VMOVDQA X6, X5; \
- VPUNPCKHQDQ_X15_X7_X6; \
- VPUNPCKLQDQ_X7_X7_X15; \
- VPUNPCKHQDQ_X15_X13_X7; \
- VPUNPCKLQDQ_X3_X3_X15; \
- VPUNPCKHQDQ_X15_X2_X2; \
- VPUNPCKLQDQ_X14_X14_X15; \
- VPUNPCKHQDQ_X15_X3_X3; \
-
-#define SHUFFLE_AVX_INV() \
- VMOVDQA X2, X13; \
- VMOVDQA X4, X14; \
- VPUNPCKLQDQ_X2_X2_X15; \
- VMOVDQA X5, X4; \
- VPUNPCKHQDQ_X15_X3_X2; \
- VMOVDQA X14, X5; \
- VPUNPCKLQDQ_X3_X3_X15; \
- VMOVDQA X6, X14; \
- VPUNPCKHQDQ_X15_X13_X3; \
- VPUNPCKLQDQ_X7_X7_X15; \
- VPUNPCKHQDQ_X15_X6_X6; \
- VPUNPCKLQDQ_X14_X14_X15; \
- VPUNPCKHQDQ_X15_X7_X7; \
-
-#define HALF_ROUND_AVX(v0, v1, v2, v3, v4, v5, v6, v7, m0, m1, m2, m3, t0, c40, c48) \
- VPADDQ m0, v0, v0; \
- VPADDQ v2, v0, v0; \
- VPADDQ m1, v1, v1; \
- VPADDQ v3, v1, v1; \
- VPXOR v0, v6, v6; \
- VPXOR v1, v7, v7; \
- VPSHUFD $-79, v6, v6; \
- VPSHUFD $-79, v7, v7; \
- VPADDQ v6, v4, v4; \
- VPADDQ v7, v5, v5; \
- VPXOR v4, v2, v2; \
- VPXOR v5, v3, v3; \
- VPSHUFB c40, v2, v2; \
- VPSHUFB c40, v3, v3; \
- VPADDQ m2, v0, v0; \
- VPADDQ v2, v0, v0; \
- VPADDQ m3, v1, v1; \
- VPADDQ v3, v1, v1; \
- VPXOR v0, v6, v6; \
- VPXOR v1, v7, v7; \
- VPSHUFB c48, v6, v6; \
- VPSHUFB c48, v7, v7; \
- VPADDQ v6, v4, v4; \
- VPADDQ v7, v5, v5; \
- VPXOR v4, v2, v2; \
- VPXOR v5, v3, v3; \
- VPADDQ v2, v2, t0; \
- VPSRLQ $63, v2, v2; \
- VPXOR t0, v2, v2; \
- VPADDQ v3, v3, t0; \
- VPSRLQ $63, v3, v3; \
- VPXOR t0, v3, v3
-
-// load msg: X12 = (i0, i1), X13 = (i2, i3), X14 = (i4, i5), X15 = (i6, i7)
-// i0, i1, i2, i3, i4, i5, i6, i7 must not be 0
-#define LOAD_MSG_AVX(i0, i1, i2, i3, i4, i5, i6, i7) \
- VMOVQ_SI_X12(i0*8); \
- VMOVQ_SI_X13(i2*8); \
- VMOVQ_SI_X14(i4*8); \
- VMOVQ_SI_X15(i6*8); \
- VPINSRQ_1_SI_X12(i1*8); \
- VPINSRQ_1_SI_X13(i3*8); \
- VPINSRQ_1_SI_X14(i5*8); \
- VPINSRQ_1_SI_X15(i7*8)
-
-// load msg: X12 = (0, 2), X13 = (4, 6), X14 = (1, 3), X15 = (5, 7)
-#define LOAD_MSG_AVX_0_2_4_6_1_3_5_7() \
- VMOVQ_SI_X12_0; \
- VMOVQ_SI_X13(4*8); \
- VMOVQ_SI_X14(1*8); \
- VMOVQ_SI_X15(5*8); \
- VPINSRQ_1_SI_X12(2*8); \
- VPINSRQ_1_SI_X13(6*8); \
- VPINSRQ_1_SI_X14(3*8); \
- VPINSRQ_1_SI_X15(7*8)
-
-// load msg: X12 = (1, 0), X13 = (11, 5), X14 = (12, 2), X15 = (7, 3)
-#define LOAD_MSG_AVX_1_0_11_5_12_2_7_3() \
- VPSHUFD $0x4E, 0*8(SI), X12; \
- VMOVQ_SI_X13(11*8); \
- VMOVQ_SI_X14(12*8); \
- VMOVQ_SI_X15(7*8); \
- VPINSRQ_1_SI_X13(5*8); \
- VPINSRQ_1_SI_X14(2*8); \
- VPINSRQ_1_SI_X15(3*8)
-
-// load msg: X12 = (11, 12), X13 = (5, 15), X14 = (8, 0), X15 = (2, 13)
-#define LOAD_MSG_AVX_11_12_5_15_8_0_2_13() \
- VMOVDQU 11*8(SI), X12; \
- VMOVQ_SI_X13(5*8); \
- VMOVQ_SI_X14(8*8); \
- VMOVQ_SI_X15(2*8); \
- VPINSRQ_1_SI_X13(15*8); \
- VPINSRQ_1_SI_X14_0; \
- VPINSRQ_1_SI_X15(13*8)
-
-// load msg: X12 = (2, 5), X13 = (4, 15), X14 = (6, 10), X15 = (0, 8)
-#define LOAD_MSG_AVX_2_5_4_15_6_10_0_8() \
- VMOVQ_SI_X12(2*8); \
- VMOVQ_SI_X13(4*8); \
- VMOVQ_SI_X14(6*8); \
- VMOVQ_SI_X15_0; \
- VPINSRQ_1_SI_X12(5*8); \
- VPINSRQ_1_SI_X13(15*8); \
- VPINSRQ_1_SI_X14(10*8); \
- VPINSRQ_1_SI_X15(8*8)
+DATA ·AVX2_c40<>+0(SB)/8, $0x0201000706050403
+DATA ·AVX2_c40<>+8(SB)/8, $0x0a09080f0e0d0c0b
+DATA ·AVX2_c40<>+16(SB)/8, $0x0201000706050403
+DATA ·AVX2_c40<>+24(SB)/8, $0x0a09080f0e0d0c0b
+GLOBL ·AVX2_c40<>(SB), RODATA|NOPTR, $32
-// load msg: X12 = (9, 5), X13 = (2, 10), X14 = (0, 7), X15 = (4, 15)
-#define LOAD_MSG_AVX_9_5_2_10_0_7_4_15() \
- VMOVQ_SI_X12(9*8); \
- VMOVQ_SI_X13(2*8); \
- VMOVQ_SI_X14_0; \
- VMOVQ_SI_X15(4*8); \
- VPINSRQ_1_SI_X12(5*8); \
- VPINSRQ_1_SI_X13(10*8); \
- VPINSRQ_1_SI_X14(7*8); \
- VPINSRQ_1_SI_X15(15*8)
+DATA ·AVX2_c48<>+0(SB)/8, $0x0100070605040302
+DATA ·AVX2_c48<>+8(SB)/8, $0x09080f0e0d0c0b0a
+DATA ·AVX2_c48<>+16(SB)/8, $0x0100070605040302
+DATA ·AVX2_c48<>+24(SB)/8, $0x09080f0e0d0c0b0a
+GLOBL ·AVX2_c48<>(SB), RODATA|NOPTR, $32
-// load msg: X12 = (2, 6), X13 = (0, 8), X14 = (12, 10), X15 = (11, 3)
-#define LOAD_MSG_AVX_2_6_0_8_12_10_11_3() \
- VMOVQ_SI_X12(2*8); \
- VMOVQ_SI_X13_0; \
- VMOVQ_SI_X14(12*8); \
- VMOVQ_SI_X15(11*8); \
- VPINSRQ_1_SI_X12(6*8); \
- VPINSRQ_1_SI_X13(8*8); \
- VPINSRQ_1_SI_X14(10*8); \
- VPINSRQ_1_SI_X15(3*8)
+DATA ·AVX2_iv0<>+0(SB)/8, $0x6a09e667f3bcc908
+DATA ·AVX2_iv0<>+8(SB)/8, $0xbb67ae8584caa73b
+DATA ·AVX2_iv0<>+16(SB)/8, $0x3c6ef372fe94f82b
+DATA ·AVX2_iv0<>+24(SB)/8, $0xa54ff53a5f1d36f1
+GLOBL ·AVX2_iv0<>(SB), RODATA|NOPTR, $32
-// load msg: X12 = (0, 6), X13 = (9, 8), X14 = (7, 3), X15 = (2, 11)
-#define LOAD_MSG_AVX_0_6_9_8_7_3_2_11() \
- MOVQ 0*8(SI), X12; \
- VPSHUFD $0x4E, 8*8(SI), X13; \
- MOVQ 7*8(SI), X14; \
- MOVQ 2*8(SI), X15; \
- VPINSRQ_1_SI_X12(6*8); \
- VPINSRQ_1_SI_X14(3*8); \
- VPINSRQ_1_SI_X15(11*8)
-
-// load msg: X12 = (6, 14), X13 = (11, 0), X14 = (15, 9), X15 = (3, 8)
-#define LOAD_MSG_AVX_6_14_11_0_15_9_3_8() \
- MOVQ 6*8(SI), X12; \
- MOVQ 11*8(SI), X13; \
- MOVQ 15*8(SI), X14; \
- MOVQ 3*8(SI), X15; \
- VPINSRQ_1_SI_X12(14*8); \
- VPINSRQ_1_SI_X13_0; \
- VPINSRQ_1_SI_X14(9*8); \
- VPINSRQ_1_SI_X15(8*8)
-
-// load msg: X12 = (5, 15), X13 = (8, 2), X14 = (0, 4), X15 = (6, 10)
-#define LOAD_MSG_AVX_5_15_8_2_0_4_6_10() \
- MOVQ 5*8(SI), X12; \
- MOVQ 8*8(SI), X13; \
- MOVQ 0*8(SI), X14; \
- MOVQ 6*8(SI), X15; \
- VPINSRQ_1_SI_X12(15*8); \
- VPINSRQ_1_SI_X13(2*8); \
- VPINSRQ_1_SI_X14(4*8); \
- VPINSRQ_1_SI_X15(10*8)
-
-// load msg: X12 = (12, 13), X13 = (1, 10), X14 = (2, 7), X15 = (4, 5)
-#define LOAD_MSG_AVX_12_13_1_10_2_7_4_5() \
- VMOVDQU 12*8(SI), X12; \
- MOVQ 1*8(SI), X13; \
- MOVQ 2*8(SI), X14; \
- VPINSRQ_1_SI_X13(10*8); \
- VPINSRQ_1_SI_X14(7*8); \
- VMOVDQU 4*8(SI), X15
-
-// load msg: X12 = (15, 9), X13 = (3, 13), X14 = (11, 14), X15 = (12, 0)
-#define LOAD_MSG_AVX_15_9_3_13_11_14_12_0() \
- MOVQ 15*8(SI), X12; \
- MOVQ 3*8(SI), X13; \
- MOVQ 11*8(SI), X14; \
- MOVQ 12*8(SI), X15; \
- VPINSRQ_1_SI_X12(9*8); \
- VPINSRQ_1_SI_X13(13*8); \
- VPINSRQ_1_SI_X14(14*8); \
- VPINSRQ_1_SI_X15_0
+DATA ·AVX2_iv1<>+0(SB)/8, $0x510e527fade682d1
+DATA ·AVX2_iv1<>+8(SB)/8, $0x9b05688c2b3e6c1f
+DATA ·AVX2_iv1<>+16(SB)/8, $0x1f83d9abfb41bd6b
+DATA ·AVX2_iv1<>+24(SB)/8, $0x5be0cd19137e2179
+GLOBL ·AVX2_iv1<>(SB), RODATA|NOPTR, $32
// func hashBlocksAVX(h *[8]uint64, c *[2]uint64, flag uint64, blocks []byte)
-TEXT ·hashBlocksAVX(SB), 4, $288-48 // frame size = 272 + 16 byte alignment
- MOVQ h+0(FP), AX
- MOVQ c+8(FP), BX
- MOVQ flag+16(FP), CX
- MOVQ blocks_base+24(FP), SI
- MOVQ blocks_len+32(FP), DI
-
- MOVQ SP, R10
- ADDQ $15, R10
- ANDQ $~15, R10
-
- VMOVDQU ·AVX_c40<>(SB), X0
- VMOVDQU ·AVX_c48<>(SB), X1
+// Requires: AVX, SSE2
+TEXT ·hashBlocksAVX(SB), NOSPLIT, $288-48
+ MOVQ h+0(FP), AX
+ MOVQ c+8(FP), BX
+ MOVQ flag+16(FP), CX
+ MOVQ blocks_base+24(FP), SI
+ MOVQ blocks_len+32(FP), DI
+ MOVQ SP, R10
+ ADDQ $0x0f, R10
+ ANDQ $-16, R10
+ VMOVDQU ·AVX_c40<>+0(SB), X0
+ VMOVDQU ·AVX_c48<>+0(SB), X1
VMOVDQA X0, X8
VMOVDQA X1, X9
-
- VMOVDQU ·AVX_iv3<>(SB), X0
- VMOVDQA X0, 0(R10)
- XORQ CX, 0(R10) // 0(R10) = ·AVX_iv3 ^ (CX || 0)
-
- VMOVDQU 0(AX), X10
+ VMOVDQU ·AVX_iv3<>+0(SB), X0
+ VMOVDQA X0, (R10)
+ XORQ CX, (R10)
+ VMOVDQU (AX), X10
VMOVDQU 16(AX), X11
VMOVDQU 32(AX), X2
VMOVDQU 48(AX), X3
-
- MOVQ 0(BX), R8
- MOVQ 8(BX), R9
+ MOVQ (BX), R8
+ MOVQ 8(BX), R9
loop:
- ADDQ $128, R8
- CMPQ R8, $128
+ ADDQ $0x80, R8
+ CMPQ R8, $0x80
JGE noinc
INCQ R9
noinc:
- VMOVQ_R8_X15
- VPINSRQ_1_R9_X15
-
+ BYTE $0xc4
+ BYTE $0x41
+ BYTE $0xf9
+ BYTE $0x6e
+ BYTE $0xf8
+ BYTE $0xc4
+ BYTE $0x43
+ BYTE $0x81
+ BYTE $0x22
+ BYTE $0xf9
+ BYTE $0x01
VMOVDQA X10, X0
VMOVDQA X11, X1
- VMOVDQU ·AVX_iv0<>(SB), X4
- VMOVDQU ·AVX_iv1<>(SB), X5
- VMOVDQU ·AVX_iv2<>(SB), X6
-
+ VMOVDQU ·AVX_iv0<>+0(SB), X4
+ VMOVDQU ·AVX_iv1<>+0(SB), X5
+ VMOVDQU ·AVX_iv2<>+0(SB), X6
VPXOR X15, X6, X6
- VMOVDQA 0(R10), X7
-
- LOAD_MSG_AVX_0_2_4_6_1_3_5_7()
+ VMOVDQA (R10), X7
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x26
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x6e
+ BYTE $0x20
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x76
+ BYTE $0x08
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x7e
+ BYTE $0x28
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x99
+ BYTE $0x22
+ BYTE $0x66
+ BYTE $0x10
+ BYTE $0x01
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x91
+ BYTE $0x22
+ BYTE $0x6e
+ BYTE $0x30
+ BYTE $0x01
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x89
+ BYTE $0x22
+ BYTE $0x76
+ BYTE $0x18
+ BYTE $0x01
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x81
+ BYTE $0x22
+ BYTE $0x7e
+ BYTE $0x38
+ BYTE $0x01
VMOVDQA X12, 16(R10)
VMOVDQA X13, 32(R10)
VMOVDQA X14, 48(R10)
VMOVDQA X15, 64(R10)
- HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9)
- SHUFFLE_AVX()
- LOAD_MSG_AVX(8, 10, 12, 14, 9, 11, 13, 15)
+ VPADDQ X12, X0, X0
+ VPADDQ X2, X0, X0
+ VPADDQ X13, X1, X1
+ VPADDQ X3, X1, X1
+ VPXOR X0, X6, X6
+ VPXOR X1, X7, X7
+ VPSHUFD $-79, X6, X6
+ VPSHUFD $-79, X7, X7
+ VPADDQ X6, X4, X4
+ VPADDQ X7, X5, X5
+ VPXOR X4, X2, X2
+ VPXOR X5, X3, X3
+ VPSHUFB X8, X2, X2
+ VPSHUFB X8, X3, X3
+ VPADDQ X14, X0, X0
+ VPADDQ X2, X0, X0
+ VPADDQ X15, X1, X1
+ VPADDQ X3, X1, X1
+ VPXOR X0, X6, X6
+ VPXOR X1, X7, X7
+ VPSHUFB X9, X6, X6
+ VPSHUFB X9, X7, X7
+ VPADDQ X6, X4, X4
+ VPADDQ X7, X5, X5
+ VPXOR X4, X2, X2
+ VPXOR X5, X3, X3
+ VPADDQ X2, X2, X15
+ VPSRLQ $0x3f, X2, X2
+ VPXOR X15, X2, X2
+ VPADDQ X3, X3, X15
+ VPSRLQ $0x3f, X3, X3
+ VPXOR X15, X3, X3
+ VMOVDQA X6, X13
+ VMOVDQA X2, X14
+ VMOVDQA X4, X6
+ BYTE $0xc4
+ BYTE $0x41
+ BYTE $0x11
+ BYTE $0x6c
+ BYTE $0xfd
+ VMOVDQA X5, X4
+ VMOVDQA X6, X5
+ BYTE $0xc4
+ BYTE $0xc1
+ BYTE $0x41
+ BYTE $0x6d
+ BYTE $0xf7
+ BYTE $0xc5
+ BYTE $0x41
+ BYTE $0x6c
+ BYTE $0xff
+ BYTE $0xc4
+ BYTE $0xc1
+ BYTE $0x11
+ BYTE $0x6d
+ BYTE $0xff
+ BYTE $0xc5
+ BYTE $0x61
+ BYTE $0x6c
+ BYTE $0xfb
+ BYTE $0xc4
+ BYTE $0xc1
+ BYTE $0x69
+ BYTE $0x6d
+ BYTE $0xd7
+ BYTE $0xc4
+ BYTE $0x41
+ BYTE $0x09
+ BYTE $0x6c
+ BYTE $0xfe
+ BYTE $0xc4
+ BYTE $0xc1
+ BYTE $0x61
+ BYTE $0x6d
+ BYTE $0xdf
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x66
+ BYTE $0x40
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x6e
+ BYTE $0x60
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x76
+ BYTE $0x48
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x7e
+ BYTE $0x68
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x99
+ BYTE $0x22
+ BYTE $0x66
+ BYTE $0x50
+ BYTE $0x01
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x91
+ BYTE $0x22
+ BYTE $0x6e
+ BYTE $0x70
+ BYTE $0x01
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x89
+ BYTE $0x22
+ BYTE $0x76
+ BYTE $0x58
+ BYTE $0x01
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x81
+ BYTE $0x22
+ BYTE $0x7e
+ BYTE $0x78
+ BYTE $0x01
VMOVDQA X12, 80(R10)
VMOVDQA X13, 96(R10)
VMOVDQA X14, 112(R10)
VMOVDQA X15, 128(R10)
- HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9)
- SHUFFLE_AVX_INV()
-
- LOAD_MSG_AVX(14, 4, 9, 13, 10, 8, 15, 6)
+ VPADDQ X12, X0, X0
+ VPADDQ X2, X0, X0
+ VPADDQ X13, X1, X1
+ VPADDQ X3, X1, X1
+ VPXOR X0, X6, X6
+ VPXOR X1, X7, X7
+ VPSHUFD $-79, X6, X6
+ VPSHUFD $-79, X7, X7
+ VPADDQ X6, X4, X4
+ VPADDQ X7, X5, X5
+ VPXOR X4, X2, X2
+ VPXOR X5, X3, X3
+ VPSHUFB X8, X2, X2
+ VPSHUFB X8, X3, X3
+ VPADDQ X14, X0, X0
+ VPADDQ X2, X0, X0
+ VPADDQ X15, X1, X1
+ VPADDQ X3, X1, X1
+ VPXOR X0, X6, X6
+ VPXOR X1, X7, X7
+ VPSHUFB X9, X6, X6
+ VPSHUFB X9, X7, X7
+ VPADDQ X6, X4, X4
+ VPADDQ X7, X5, X5
+ VPXOR X4, X2, X2
+ VPXOR X5, X3, X3
+ VPADDQ X2, X2, X15
+ VPSRLQ $0x3f, X2, X2
+ VPXOR X15, X2, X2
+ VPADDQ X3, X3, X15
+ VPSRLQ $0x3f, X3, X3
+ VPXOR X15, X3, X3
+ VMOVDQA X2, X13
+ VMOVDQA X4, X14
+ BYTE $0xc5
+ BYTE $0x69
+ BYTE $0x6c
+ BYTE $0xfa
+ VMOVDQA X5, X4
+ BYTE $0xc4
+ BYTE $0xc1
+ BYTE $0x61
+ BYTE $0x6d
+ BYTE $0xd7
+ VMOVDQA X14, X5
+ BYTE $0xc5
+ BYTE $0x61
+ BYTE $0x6c
+ BYTE $0xfb
+ VMOVDQA X6, X14
+ BYTE $0xc4
+ BYTE $0xc1
+ BYTE $0x11
+ BYTE $0x6d
+ BYTE $0xdf
+ BYTE $0xc5
+ BYTE $0x41
+ BYTE $0x6c
+ BYTE $0xff
+ BYTE $0xc4
+ BYTE $0xc1
+ BYTE $0x49
+ BYTE $0x6d
+ BYTE $0xf7
+ BYTE $0xc4
+ BYTE $0x41
+ BYTE $0x09
+ BYTE $0x6c
+ BYTE $0xfe
+ BYTE $0xc4
+ BYTE $0xc1
+ BYTE $0x41
+ BYTE $0x6d
+ BYTE $0xff
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x66
+ BYTE $0x70
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x6e
+ BYTE $0x48
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x76
+ BYTE $0x50
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x7e
+ BYTE $0x78
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x99
+ BYTE $0x22
+ BYTE $0x66
+ BYTE $0x20
+ BYTE $0x01
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x91
+ BYTE $0x22
+ BYTE $0x6e
+ BYTE $0x68
+ BYTE $0x01
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x89
+ BYTE $0x22
+ BYTE $0x76
+ BYTE $0x40
+ BYTE $0x01
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x81
+ BYTE $0x22
+ BYTE $0x7e
+ BYTE $0x30
+ BYTE $0x01
VMOVDQA X12, 144(R10)
VMOVDQA X13, 160(R10)
VMOVDQA X14, 176(R10)
VMOVDQA X15, 192(R10)
- HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9)
- SHUFFLE_AVX()
- LOAD_MSG_AVX_1_0_11_5_12_2_7_3()
+ VPADDQ X12, X0, X0
+ VPADDQ X2, X0, X0
+ VPADDQ X13, X1, X1
+ VPADDQ X3, X1, X1
+ VPXOR X0, X6, X6
+ VPXOR X1, X7, X7
+ VPSHUFD $-79, X6, X6
+ VPSHUFD $-79, X7, X7
+ VPADDQ X6, X4, X4
+ VPADDQ X7, X5, X5
+ VPXOR X4, X2, X2
+ VPXOR X5, X3, X3
+ VPSHUFB X8, X2, X2
+ VPSHUFB X8, X3, X3
+ VPADDQ X14, X0, X0
+ VPADDQ X2, X0, X0
+ VPADDQ X15, X1, X1
+ VPADDQ X3, X1, X1
+ VPXOR X0, X6, X6
+ VPXOR X1, X7, X7
+ VPSHUFB X9, X6, X6
+ VPSHUFB X9, X7, X7
+ VPADDQ X6, X4, X4
+ VPADDQ X7, X5, X5
+ VPXOR X4, X2, X2
+ VPXOR X5, X3, X3
+ VPADDQ X2, X2, X15
+ VPSRLQ $0x3f, X2, X2
+ VPXOR X15, X2, X2
+ VPADDQ X3, X3, X15
+ VPSRLQ $0x3f, X3, X3
+ VPXOR X15, X3, X3
+ VMOVDQA X6, X13
+ VMOVDQA X2, X14
+ VMOVDQA X4, X6
+ BYTE $0xc4
+ BYTE $0x41
+ BYTE $0x11
+ BYTE $0x6c
+ BYTE $0xfd
+ VMOVDQA X5, X4
+ VMOVDQA X6, X5
+ BYTE $0xc4
+ BYTE $0xc1
+ BYTE $0x41
+ BYTE $0x6d
+ BYTE $0xf7
+ BYTE $0xc5
+ BYTE $0x41
+ BYTE $0x6c
+ BYTE $0xff
+ BYTE $0xc4
+ BYTE $0xc1
+ BYTE $0x11
+ BYTE $0x6d
+ BYTE $0xff
+ BYTE $0xc5
+ BYTE $0x61
+ BYTE $0x6c
+ BYTE $0xfb
+ BYTE $0xc4
+ BYTE $0xc1
+ BYTE $0x69
+ BYTE $0x6d
+ BYTE $0xd7
+ BYTE $0xc4
+ BYTE $0x41
+ BYTE $0x09
+ BYTE $0x6c
+ BYTE $0xfe
+ BYTE $0xc4
+ BYTE $0xc1
+ BYTE $0x61
+ BYTE $0x6d
+ BYTE $0xdf
+ VPSHUFD $0x4e, (SI), X12
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x6e
+ BYTE $0x58
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x76
+ BYTE $0x60
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x7e
+ BYTE $0x38
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x91
+ BYTE $0x22
+ BYTE $0x6e
+ BYTE $0x28
+ BYTE $0x01
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x89
+ BYTE $0x22
+ BYTE $0x76
+ BYTE $0x10
+ BYTE $0x01
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x81
+ BYTE $0x22
+ BYTE $0x7e
+ BYTE $0x18
+ BYTE $0x01
VMOVDQA X12, 208(R10)
VMOVDQA X13, 224(R10)
VMOVDQA X14, 240(R10)
VMOVDQA X15, 256(R10)
- HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9)
- SHUFFLE_AVX_INV()
-
- LOAD_MSG_AVX_11_12_5_15_8_0_2_13()
- HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9)
- SHUFFLE_AVX()
- LOAD_MSG_AVX(10, 3, 7, 9, 14, 6, 1, 4)
- HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9)
- SHUFFLE_AVX_INV()
-
- LOAD_MSG_AVX(7, 3, 13, 11, 9, 1, 12, 14)
- HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9)
- SHUFFLE_AVX()
- LOAD_MSG_AVX_2_5_4_15_6_10_0_8()
- HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9)
- SHUFFLE_AVX_INV()
-
- LOAD_MSG_AVX_9_5_2_10_0_7_4_15()
- HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9)
- SHUFFLE_AVX()
- LOAD_MSG_AVX(14, 11, 6, 3, 1, 12, 8, 13)
- HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9)
- SHUFFLE_AVX_INV()
-
- LOAD_MSG_AVX_2_6_0_8_12_10_11_3()
- HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9)
- SHUFFLE_AVX()
- LOAD_MSG_AVX(4, 7, 15, 1, 13, 5, 14, 9)
- HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9)
- SHUFFLE_AVX_INV()
-
- LOAD_MSG_AVX(12, 1, 14, 4, 5, 15, 13, 10)
- HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9)
- SHUFFLE_AVX()
- LOAD_MSG_AVX_0_6_9_8_7_3_2_11()
- HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9)
- SHUFFLE_AVX_INV()
-
- LOAD_MSG_AVX(13, 7, 12, 3, 11, 14, 1, 9)
- HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9)
- SHUFFLE_AVX()
- LOAD_MSG_AVX_5_15_8_2_0_4_6_10()
- HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9)
- SHUFFLE_AVX_INV()
-
- LOAD_MSG_AVX_6_14_11_0_15_9_3_8()
- HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9)
- SHUFFLE_AVX()
- LOAD_MSG_AVX_12_13_1_10_2_7_4_5()
- HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9)
- SHUFFLE_AVX_INV()
-
- LOAD_MSG_AVX(10, 8, 7, 1, 2, 4, 6, 5)
- HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9)
- SHUFFLE_AVX()
- LOAD_MSG_AVX_15_9_3_13_11_14_12_0()
- HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9)
- SHUFFLE_AVX_INV()
-
- HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, 16(R10), 32(R10), 48(R10), 64(R10), X15, X8, X9)
- SHUFFLE_AVX()
- HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, 80(R10), 96(R10), 112(R10), 128(R10), X15, X8, X9)
- SHUFFLE_AVX_INV()
-
- HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, 144(R10), 160(R10), 176(R10), 192(R10), X15, X8, X9)
- SHUFFLE_AVX()
- HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, 208(R10), 224(R10), 240(R10), 256(R10), X15, X8, X9)
- SHUFFLE_AVX_INV()
-
+ VPADDQ X12, X0, X0
+ VPADDQ X2, X0, X0
+ VPADDQ X13, X1, X1
+ VPADDQ X3, X1, X1
+ VPXOR X0, X6, X6
+ VPXOR X1, X7, X7
+ VPSHUFD $-79, X6, X6
+ VPSHUFD $-79, X7, X7
+ VPADDQ X6, X4, X4
+ VPADDQ X7, X5, X5
+ VPXOR X4, X2, X2
+ VPXOR X5, X3, X3
+ VPSHUFB X8, X2, X2
+ VPSHUFB X8, X3, X3
+ VPADDQ X14, X0, X0
+ VPADDQ X2, X0, X0
+ VPADDQ X15, X1, X1
+ VPADDQ X3, X1, X1
+ VPXOR X0, X6, X6
+ VPXOR X1, X7, X7
+ VPSHUFB X9, X6, X6
+ VPSHUFB X9, X7, X7
+ VPADDQ X6, X4, X4
+ VPADDQ X7, X5, X5
+ VPXOR X4, X2, X2
+ VPXOR X5, X3, X3
+ VPADDQ X2, X2, X15
+ VPSRLQ $0x3f, X2, X2
+ VPXOR X15, X2, X2
+ VPADDQ X3, X3, X15
+ VPSRLQ $0x3f, X3, X3
+ VPXOR X15, X3, X3
+ VMOVDQA X2, X13
+ VMOVDQA X4, X14
+ BYTE $0xc5
+ BYTE $0x69
+ BYTE $0x6c
+ BYTE $0xfa
+ VMOVDQA X5, X4
+ BYTE $0xc4
+ BYTE $0xc1
+ BYTE $0x61
+ BYTE $0x6d
+ BYTE $0xd7
+ VMOVDQA X14, X5
+ BYTE $0xc5
+ BYTE $0x61
+ BYTE $0x6c
+ BYTE $0xfb
+ VMOVDQA X6, X14
+ BYTE $0xc4
+ BYTE $0xc1
+ BYTE $0x11
+ BYTE $0x6d
+ BYTE $0xdf
+ BYTE $0xc5
+ BYTE $0x41
+ BYTE $0x6c
+ BYTE $0xff
+ BYTE $0xc4
+ BYTE $0xc1
+ BYTE $0x49
+ BYTE $0x6d
+ BYTE $0xf7
+ BYTE $0xc4
+ BYTE $0x41
+ BYTE $0x09
+ BYTE $0x6c
+ BYTE $0xfe
+ BYTE $0xc4
+ BYTE $0xc1
+ BYTE $0x41
+ BYTE $0x6d
+ BYTE $0xff
+ VMOVDQU 88(SI), X12
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x6e
+ BYTE $0x28
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x76
+ BYTE $0x40
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x7e
+ BYTE $0x10
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x91
+ BYTE $0x22
+ BYTE $0x6e
+ BYTE $0x78
+ BYTE $0x01
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x89
+ BYTE $0x22
+ BYTE $0x36
+ BYTE $0x01
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x81
+ BYTE $0x22
+ BYTE $0x7e
+ BYTE $0x68
+ BYTE $0x01
+ VPADDQ X12, X0, X0
+ VPADDQ X2, X0, X0
+ VPADDQ X13, X1, X1
+ VPADDQ X3, X1, X1
+ VPXOR X0, X6, X6
+ VPXOR X1, X7, X7
+ VPSHUFD $-79, X6, X6
+ VPSHUFD $-79, X7, X7
+ VPADDQ X6, X4, X4
+ VPADDQ X7, X5, X5
+ VPXOR X4, X2, X2
+ VPXOR X5, X3, X3
+ VPSHUFB X8, X2, X2
+ VPSHUFB X8, X3, X3
+ VPADDQ X14, X0, X0
+ VPADDQ X2, X0, X0
+ VPADDQ X15, X1, X1
+ VPADDQ X3, X1, X1
+ VPXOR X0, X6, X6
+ VPXOR X1, X7, X7
+ VPSHUFB X9, X6, X6
+ VPSHUFB X9, X7, X7
+ VPADDQ X6, X4, X4
+ VPADDQ X7, X5, X5
+ VPXOR X4, X2, X2
+ VPXOR X5, X3, X3
+ VPADDQ X2, X2, X15
+ VPSRLQ $0x3f, X2, X2
+ VPXOR X15, X2, X2
+ VPADDQ X3, X3, X15
+ VPSRLQ $0x3f, X3, X3
+ VPXOR X15, X3, X3
+ VMOVDQA X6, X13
+ VMOVDQA X2, X14
+ VMOVDQA X4, X6
+ BYTE $0xc4
+ BYTE $0x41
+ BYTE $0x11
+ BYTE $0x6c
+ BYTE $0xfd
+ VMOVDQA X5, X4
+ VMOVDQA X6, X5
+ BYTE $0xc4
+ BYTE $0xc1
+ BYTE $0x41
+ BYTE $0x6d
+ BYTE $0xf7
+ BYTE $0xc5
+ BYTE $0x41
+ BYTE $0x6c
+ BYTE $0xff
+ BYTE $0xc4
+ BYTE $0xc1
+ BYTE $0x11
+ BYTE $0x6d
+ BYTE $0xff
+ BYTE $0xc5
+ BYTE $0x61
+ BYTE $0x6c
+ BYTE $0xfb
+ BYTE $0xc4
+ BYTE $0xc1
+ BYTE $0x69
+ BYTE $0x6d
+ BYTE $0xd7
+ BYTE $0xc4
+ BYTE $0x41
+ BYTE $0x09
+ BYTE $0x6c
+ BYTE $0xfe
+ BYTE $0xc4
+ BYTE $0xc1
+ BYTE $0x61
+ BYTE $0x6d
+ BYTE $0xdf
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x66
+ BYTE $0x50
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x6e
+ BYTE $0x38
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x76
+ BYTE $0x70
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x7e
+ BYTE $0x08
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x99
+ BYTE $0x22
+ BYTE $0x66
+ BYTE $0x18
+ BYTE $0x01
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x91
+ BYTE $0x22
+ BYTE $0x6e
+ BYTE $0x48
+ BYTE $0x01
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x89
+ BYTE $0x22
+ BYTE $0x76
+ BYTE $0x30
+ BYTE $0x01
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x81
+ BYTE $0x22
+ BYTE $0x7e
+ BYTE $0x20
+ BYTE $0x01
+ VPADDQ X12, X0, X0
+ VPADDQ X2, X0, X0
+ VPADDQ X13, X1, X1
+ VPADDQ X3, X1, X1
+ VPXOR X0, X6, X6
+ VPXOR X1, X7, X7
+ VPSHUFD $-79, X6, X6
+ VPSHUFD $-79, X7, X7
+ VPADDQ X6, X4, X4
+ VPADDQ X7, X5, X5
+ VPXOR X4, X2, X2
+ VPXOR X5, X3, X3
+ VPSHUFB X8, X2, X2
+ VPSHUFB X8, X3, X3
+ VPADDQ X14, X0, X0
+ VPADDQ X2, X0, X0
+ VPADDQ X15, X1, X1
+ VPADDQ X3, X1, X1
+ VPXOR X0, X6, X6
+ VPXOR X1, X7, X7
+ VPSHUFB X9, X6, X6
+ VPSHUFB X9, X7, X7
+ VPADDQ X6, X4, X4
+ VPADDQ X7, X5, X5
+ VPXOR X4, X2, X2
+ VPXOR X5, X3, X3
+ VPADDQ X2, X2, X15
+ VPSRLQ $0x3f, X2, X2
+ VPXOR X15, X2, X2
+ VPADDQ X3, X3, X15
+ VPSRLQ $0x3f, X3, X3
+ VPXOR X15, X3, X3
+ VMOVDQA X2, X13
+ VMOVDQA X4, X14
+ BYTE $0xc5
+ BYTE $0x69
+ BYTE $0x6c
+ BYTE $0xfa
+ VMOVDQA X5, X4
+ BYTE $0xc4
+ BYTE $0xc1
+ BYTE $0x61
+ BYTE $0x6d
+ BYTE $0xd7
+ VMOVDQA X14, X5
+ BYTE $0xc5
+ BYTE $0x61
+ BYTE $0x6c
+ BYTE $0xfb
+ VMOVDQA X6, X14
+ BYTE $0xc4
+ BYTE $0xc1
+ BYTE $0x11
+ BYTE $0x6d
+ BYTE $0xdf
+ BYTE $0xc5
+ BYTE $0x41
+ BYTE $0x6c
+ BYTE $0xff
+ BYTE $0xc4
+ BYTE $0xc1
+ BYTE $0x49
+ BYTE $0x6d
+ BYTE $0xf7
+ BYTE $0xc4
+ BYTE $0x41
+ BYTE $0x09
+ BYTE $0x6c
+ BYTE $0xfe
+ BYTE $0xc4
+ BYTE $0xc1
+ BYTE $0x41
+ BYTE $0x6d
+ BYTE $0xff
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x66
+ BYTE $0x38
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x6e
+ BYTE $0x68
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x76
+ BYTE $0x48
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x7e
+ BYTE $0x60
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x99
+ BYTE $0x22
+ BYTE $0x66
+ BYTE $0x18
+ BYTE $0x01
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x91
+ BYTE $0x22
+ BYTE $0x6e
+ BYTE $0x58
+ BYTE $0x01
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x89
+ BYTE $0x22
+ BYTE $0x76
+ BYTE $0x08
+ BYTE $0x01
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x81
+ BYTE $0x22
+ BYTE $0x7e
+ BYTE $0x70
+ BYTE $0x01
+ VPADDQ X12, X0, X0
+ VPADDQ X2, X0, X0
+ VPADDQ X13, X1, X1
+ VPADDQ X3, X1, X1
+ VPXOR X0, X6, X6
+ VPXOR X1, X7, X7
+ VPSHUFD $-79, X6, X6
+ VPSHUFD $-79, X7, X7
+ VPADDQ X6, X4, X4
+ VPADDQ X7, X5, X5
+ VPXOR X4, X2, X2
+ VPXOR X5, X3, X3
+ VPSHUFB X8, X2, X2
+ VPSHUFB X8, X3, X3
+ VPADDQ X14, X0, X0
+ VPADDQ X2, X0, X0
+ VPADDQ X15, X1, X1
+ VPADDQ X3, X1, X1
+ VPXOR X0, X6, X6
+ VPXOR X1, X7, X7
+ VPSHUFB X9, X6, X6
+ VPSHUFB X9, X7, X7
+ VPADDQ X6, X4, X4
+ VPADDQ X7, X5, X5
+ VPXOR X4, X2, X2
+ VPXOR X5, X3, X3
+ VPADDQ X2, X2, X15
+ VPSRLQ $0x3f, X2, X2
+ VPXOR X15, X2, X2
+ VPADDQ X3, X3, X15
+ VPSRLQ $0x3f, X3, X3
+ VPXOR X15, X3, X3
+ VMOVDQA X6, X13
+ VMOVDQA X2, X14
+ VMOVDQA X4, X6
+ BYTE $0xc4
+ BYTE $0x41
+ BYTE $0x11
+ BYTE $0x6c
+ BYTE $0xfd
+ VMOVDQA X5, X4
+ VMOVDQA X6, X5
+ BYTE $0xc4
+ BYTE $0xc1
+ BYTE $0x41
+ BYTE $0x6d
+ BYTE $0xf7
+ BYTE $0xc5
+ BYTE $0x41
+ BYTE $0x6c
+ BYTE $0xff
+ BYTE $0xc4
+ BYTE $0xc1
+ BYTE $0x11
+ BYTE $0x6d
+ BYTE $0xff
+ BYTE $0xc5
+ BYTE $0x61
+ BYTE $0x6c
+ BYTE $0xfb
+ BYTE $0xc4
+ BYTE $0xc1
+ BYTE $0x69
+ BYTE $0x6d
+ BYTE $0xd7
+ BYTE $0xc4
+ BYTE $0x41
+ BYTE $0x09
+ BYTE $0x6c
+ BYTE $0xfe
+ BYTE $0xc4
+ BYTE $0xc1
+ BYTE $0x61
+ BYTE $0x6d
+ BYTE $0xdf
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x66
+ BYTE $0x10
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x6e
+ BYTE $0x20
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x76
+ BYTE $0x30
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x3e
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x99
+ BYTE $0x22
+ BYTE $0x66
+ BYTE $0x28
+ BYTE $0x01
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x91
+ BYTE $0x22
+ BYTE $0x6e
+ BYTE $0x78
+ BYTE $0x01
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x89
+ BYTE $0x22
+ BYTE $0x76
+ BYTE $0x50
+ BYTE $0x01
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x81
+ BYTE $0x22
+ BYTE $0x7e
+ BYTE $0x40
+ BYTE $0x01
+ VPADDQ X12, X0, X0
+ VPADDQ X2, X0, X0
+ VPADDQ X13, X1, X1
+ VPADDQ X3, X1, X1
+ VPXOR X0, X6, X6
+ VPXOR X1, X7, X7
+ VPSHUFD $-79, X6, X6
+ VPSHUFD $-79, X7, X7
+ VPADDQ X6, X4, X4
+ VPADDQ X7, X5, X5
+ VPXOR X4, X2, X2
+ VPXOR X5, X3, X3
+ VPSHUFB X8, X2, X2
+ VPSHUFB X8, X3, X3
+ VPADDQ X14, X0, X0
+ VPADDQ X2, X0, X0
+ VPADDQ X15, X1, X1
+ VPADDQ X3, X1, X1
+ VPXOR X0, X6, X6
+ VPXOR X1, X7, X7
+ VPSHUFB X9, X6, X6
+ VPSHUFB X9, X7, X7
+ VPADDQ X6, X4, X4
+ VPADDQ X7, X5, X5
+ VPXOR X4, X2, X2
+ VPXOR X5, X3, X3
+ VPADDQ X2, X2, X15
+ VPSRLQ $0x3f, X2, X2
+ VPXOR X15, X2, X2
+ VPADDQ X3, X3, X15
+ VPSRLQ $0x3f, X3, X3
+ VPXOR X15, X3, X3
+ VMOVDQA X2, X13
+ VMOVDQA X4, X14
+ BYTE $0xc5
+ BYTE $0x69
+ BYTE $0x6c
+ BYTE $0xfa
+ VMOVDQA X5, X4
+ BYTE $0xc4
+ BYTE $0xc1
+ BYTE $0x61
+ BYTE $0x6d
+ BYTE $0xd7
+ VMOVDQA X14, X5
+ BYTE $0xc5
+ BYTE $0x61
+ BYTE $0x6c
+ BYTE $0xfb
+ VMOVDQA X6, X14
+ BYTE $0xc4
+ BYTE $0xc1
+ BYTE $0x11
+ BYTE $0x6d
+ BYTE $0xdf
+ BYTE $0xc5
+ BYTE $0x41
+ BYTE $0x6c
+ BYTE $0xff
+ BYTE $0xc4
+ BYTE $0xc1
+ BYTE $0x49
+ BYTE $0x6d
+ BYTE $0xf7
+ BYTE $0xc4
+ BYTE $0x41
+ BYTE $0x09
+ BYTE $0x6c
+ BYTE $0xfe
+ BYTE $0xc4
+ BYTE $0xc1
+ BYTE $0x41
+ BYTE $0x6d
+ BYTE $0xff
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x66
+ BYTE $0x48
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x6e
+ BYTE $0x10
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x36
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x7e
+ BYTE $0x20
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x99
+ BYTE $0x22
+ BYTE $0x66
+ BYTE $0x28
+ BYTE $0x01
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x91
+ BYTE $0x22
+ BYTE $0x6e
+ BYTE $0x50
+ BYTE $0x01
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x89
+ BYTE $0x22
+ BYTE $0x76
+ BYTE $0x38
+ BYTE $0x01
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x81
+ BYTE $0x22
+ BYTE $0x7e
+ BYTE $0x78
+ BYTE $0x01
+ VPADDQ X12, X0, X0
+ VPADDQ X2, X0, X0
+ VPADDQ X13, X1, X1
+ VPADDQ X3, X1, X1
+ VPXOR X0, X6, X6
+ VPXOR X1, X7, X7
+ VPSHUFD $-79, X6, X6
+ VPSHUFD $-79, X7, X7
+ VPADDQ X6, X4, X4
+ VPADDQ X7, X5, X5
+ VPXOR X4, X2, X2
+ VPXOR X5, X3, X3
+ VPSHUFB X8, X2, X2
+ VPSHUFB X8, X3, X3
+ VPADDQ X14, X0, X0
+ VPADDQ X2, X0, X0
+ VPADDQ X15, X1, X1
+ VPADDQ X3, X1, X1
+ VPXOR X0, X6, X6
+ VPXOR X1, X7, X7
+ VPSHUFB X9, X6, X6
+ VPSHUFB X9, X7, X7
+ VPADDQ X6, X4, X4
+ VPADDQ X7, X5, X5
+ VPXOR X4, X2, X2
+ VPXOR X5, X3, X3
+ VPADDQ X2, X2, X15
+ VPSRLQ $0x3f, X2, X2
+ VPXOR X15, X2, X2
+ VPADDQ X3, X3, X15
+ VPSRLQ $0x3f, X3, X3
+ VPXOR X15, X3, X3
+ VMOVDQA X6, X13
+ VMOVDQA X2, X14
+ VMOVDQA X4, X6
+ BYTE $0xc4
+ BYTE $0x41
+ BYTE $0x11
+ BYTE $0x6c
+ BYTE $0xfd
+ VMOVDQA X5, X4
+ VMOVDQA X6, X5
+ BYTE $0xc4
+ BYTE $0xc1
+ BYTE $0x41
+ BYTE $0x6d
+ BYTE $0xf7
+ BYTE $0xc5
+ BYTE $0x41
+ BYTE $0x6c
+ BYTE $0xff
+ BYTE $0xc4
+ BYTE $0xc1
+ BYTE $0x11
+ BYTE $0x6d
+ BYTE $0xff
+ BYTE $0xc5
+ BYTE $0x61
+ BYTE $0x6c
+ BYTE $0xfb
+ BYTE $0xc4
+ BYTE $0xc1
+ BYTE $0x69
+ BYTE $0x6d
+ BYTE $0xd7
+ BYTE $0xc4
+ BYTE $0x41
+ BYTE $0x09
+ BYTE $0x6c
+ BYTE $0xfe
+ BYTE $0xc4
+ BYTE $0xc1
+ BYTE $0x61
+ BYTE $0x6d
+ BYTE $0xdf
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x66
+ BYTE $0x70
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x6e
+ BYTE $0x30
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x76
+ BYTE $0x08
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x7e
+ BYTE $0x40
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x99
+ BYTE $0x22
+ BYTE $0x66
+ BYTE $0x58
+ BYTE $0x01
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x91
+ BYTE $0x22
+ BYTE $0x6e
+ BYTE $0x18
+ BYTE $0x01
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x89
+ BYTE $0x22
+ BYTE $0x76
+ BYTE $0x60
+ BYTE $0x01
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x81
+ BYTE $0x22
+ BYTE $0x7e
+ BYTE $0x68
+ BYTE $0x01
+ VPADDQ X12, X0, X0
+ VPADDQ X2, X0, X0
+ VPADDQ X13, X1, X1
+ VPADDQ X3, X1, X1
+ VPXOR X0, X6, X6
+ VPXOR X1, X7, X7
+ VPSHUFD $-79, X6, X6
+ VPSHUFD $-79, X7, X7
+ VPADDQ X6, X4, X4
+ VPADDQ X7, X5, X5
+ VPXOR X4, X2, X2
+ VPXOR X5, X3, X3
+ VPSHUFB X8, X2, X2
+ VPSHUFB X8, X3, X3
+ VPADDQ X14, X0, X0
+ VPADDQ X2, X0, X0
+ VPADDQ X15, X1, X1
+ VPADDQ X3, X1, X1
+ VPXOR X0, X6, X6
+ VPXOR X1, X7, X7
+ VPSHUFB X9, X6, X6
+ VPSHUFB X9, X7, X7
+ VPADDQ X6, X4, X4
+ VPADDQ X7, X5, X5
+ VPXOR X4, X2, X2
+ VPXOR X5, X3, X3
+ VPADDQ X2, X2, X15
+ VPSRLQ $0x3f, X2, X2
+ VPXOR X15, X2, X2
+ VPADDQ X3, X3, X15
+ VPSRLQ $0x3f, X3, X3
+ VPXOR X15, X3, X3
+ VMOVDQA X2, X13
+ VMOVDQA X4, X14
+ BYTE $0xc5
+ BYTE $0x69
+ BYTE $0x6c
+ BYTE $0xfa
+ VMOVDQA X5, X4
+ BYTE $0xc4
+ BYTE $0xc1
+ BYTE $0x61
+ BYTE $0x6d
+ BYTE $0xd7
+ VMOVDQA X14, X5
+ BYTE $0xc5
+ BYTE $0x61
+ BYTE $0x6c
+ BYTE $0xfb
+ VMOVDQA X6, X14
+ BYTE $0xc4
+ BYTE $0xc1
+ BYTE $0x11
+ BYTE $0x6d
+ BYTE $0xdf
+ BYTE $0xc5
+ BYTE $0x41
+ BYTE $0x6c
+ BYTE $0xff
+ BYTE $0xc4
+ BYTE $0xc1
+ BYTE $0x49
+ BYTE $0x6d
+ BYTE $0xf7
+ BYTE $0xc4
+ BYTE $0x41
+ BYTE $0x09
+ BYTE $0x6c
+ BYTE $0xfe
+ BYTE $0xc4
+ BYTE $0xc1
+ BYTE $0x41
+ BYTE $0x6d
+ BYTE $0xff
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x66
+ BYTE $0x10
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x2e
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x76
+ BYTE $0x60
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x7e
+ BYTE $0x58
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x99
+ BYTE $0x22
+ BYTE $0x66
+ BYTE $0x30
+ BYTE $0x01
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x91
+ BYTE $0x22
+ BYTE $0x6e
+ BYTE $0x40
+ BYTE $0x01
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x89
+ BYTE $0x22
+ BYTE $0x76
+ BYTE $0x50
+ BYTE $0x01
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x81
+ BYTE $0x22
+ BYTE $0x7e
+ BYTE $0x18
+ BYTE $0x01
+ VPADDQ X12, X0, X0
+ VPADDQ X2, X0, X0
+ VPADDQ X13, X1, X1
+ VPADDQ X3, X1, X1
+ VPXOR X0, X6, X6
+ VPXOR X1, X7, X7
+ VPSHUFD $-79, X6, X6
+ VPSHUFD $-79, X7, X7
+ VPADDQ X6, X4, X4
+ VPADDQ X7, X5, X5
+ VPXOR X4, X2, X2
+ VPXOR X5, X3, X3
+ VPSHUFB X8, X2, X2
+ VPSHUFB X8, X3, X3
+ VPADDQ X14, X0, X0
+ VPADDQ X2, X0, X0
+ VPADDQ X15, X1, X1
+ VPADDQ X3, X1, X1
+ VPXOR X0, X6, X6
+ VPXOR X1, X7, X7
+ VPSHUFB X9, X6, X6
+ VPSHUFB X9, X7, X7
+ VPADDQ X6, X4, X4
+ VPADDQ X7, X5, X5
+ VPXOR X4, X2, X2
+ VPXOR X5, X3, X3
+ VPADDQ X2, X2, X15
+ VPSRLQ $0x3f, X2, X2
+ VPXOR X15, X2, X2
+ VPADDQ X3, X3, X15
+ VPSRLQ $0x3f, X3, X3
+ VPXOR X15, X3, X3
+ VMOVDQA X6, X13
+ VMOVDQA X2, X14
+ VMOVDQA X4, X6
+ BYTE $0xc4
+ BYTE $0x41
+ BYTE $0x11
+ BYTE $0x6c
+ BYTE $0xfd
+ VMOVDQA X5, X4
+ VMOVDQA X6, X5
+ BYTE $0xc4
+ BYTE $0xc1
+ BYTE $0x41
+ BYTE $0x6d
+ BYTE $0xf7
+ BYTE $0xc5
+ BYTE $0x41
+ BYTE $0x6c
+ BYTE $0xff
+ BYTE $0xc4
+ BYTE $0xc1
+ BYTE $0x11
+ BYTE $0x6d
+ BYTE $0xff
+ BYTE $0xc5
+ BYTE $0x61
+ BYTE $0x6c
+ BYTE $0xfb
+ BYTE $0xc4
+ BYTE $0xc1
+ BYTE $0x69
+ BYTE $0x6d
+ BYTE $0xd7
+ BYTE $0xc4
+ BYTE $0x41
+ BYTE $0x09
+ BYTE $0x6c
+ BYTE $0xfe
+ BYTE $0xc4
+ BYTE $0xc1
+ BYTE $0x61
+ BYTE $0x6d
+ BYTE $0xdf
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x66
+ BYTE $0x20
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x6e
+ BYTE $0x78
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x76
+ BYTE $0x68
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x7e
+ BYTE $0x70
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x99
+ BYTE $0x22
+ BYTE $0x66
+ BYTE $0x38
+ BYTE $0x01
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x91
+ BYTE $0x22
+ BYTE $0x6e
+ BYTE $0x08
+ BYTE $0x01
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x89
+ BYTE $0x22
+ BYTE $0x76
+ BYTE $0x28
+ BYTE $0x01
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x81
+ BYTE $0x22
+ BYTE $0x7e
+ BYTE $0x48
+ BYTE $0x01
+ VPADDQ X12, X0, X0
+ VPADDQ X2, X0, X0
+ VPADDQ X13, X1, X1
+ VPADDQ X3, X1, X1
+ VPXOR X0, X6, X6
+ VPXOR X1, X7, X7
+ VPSHUFD $-79, X6, X6
+ VPSHUFD $-79, X7, X7
+ VPADDQ X6, X4, X4
+ VPADDQ X7, X5, X5
+ VPXOR X4, X2, X2
+ VPXOR X5, X3, X3
+ VPSHUFB X8, X2, X2
+ VPSHUFB X8, X3, X3
+ VPADDQ X14, X0, X0
+ VPADDQ X2, X0, X0
+ VPADDQ X15, X1, X1
+ VPADDQ X3, X1, X1
+ VPXOR X0, X6, X6
+ VPXOR X1, X7, X7
+ VPSHUFB X9, X6, X6
+ VPSHUFB X9, X7, X7
+ VPADDQ X6, X4, X4
+ VPADDQ X7, X5, X5
+ VPXOR X4, X2, X2
+ VPXOR X5, X3, X3
+ VPADDQ X2, X2, X15
+ VPSRLQ $0x3f, X2, X2
+ VPXOR X15, X2, X2
+ VPADDQ X3, X3, X15
+ VPSRLQ $0x3f, X3, X3
+ VPXOR X15, X3, X3
+ VMOVDQA X2, X13
+ VMOVDQA X4, X14
+ BYTE $0xc5
+ BYTE $0x69
+ BYTE $0x6c
+ BYTE $0xfa
+ VMOVDQA X5, X4
+ BYTE $0xc4
+ BYTE $0xc1
+ BYTE $0x61
+ BYTE $0x6d
+ BYTE $0xd7
+ VMOVDQA X14, X5
+ BYTE $0xc5
+ BYTE $0x61
+ BYTE $0x6c
+ BYTE $0xfb
+ VMOVDQA X6, X14
+ BYTE $0xc4
+ BYTE $0xc1
+ BYTE $0x11
+ BYTE $0x6d
+ BYTE $0xdf
+ BYTE $0xc5
+ BYTE $0x41
+ BYTE $0x6c
+ BYTE $0xff
+ BYTE $0xc4
+ BYTE $0xc1
+ BYTE $0x49
+ BYTE $0x6d
+ BYTE $0xf7
+ BYTE $0xc4
+ BYTE $0x41
+ BYTE $0x09
+ BYTE $0x6c
+ BYTE $0xfe
+ BYTE $0xc4
+ BYTE $0xc1
+ BYTE $0x41
+ BYTE $0x6d
+ BYTE $0xff
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x66
+ BYTE $0x60
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x6e
+ BYTE $0x70
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x76
+ BYTE $0x28
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x7e
+ BYTE $0x68
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x99
+ BYTE $0x22
+ BYTE $0x66
+ BYTE $0x08
+ BYTE $0x01
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x91
+ BYTE $0x22
+ BYTE $0x6e
+ BYTE $0x20
+ BYTE $0x01
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x89
+ BYTE $0x22
+ BYTE $0x76
+ BYTE $0x78
+ BYTE $0x01
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x81
+ BYTE $0x22
+ BYTE $0x7e
+ BYTE $0x50
+ BYTE $0x01
+ VPADDQ X12, X0, X0
+ VPADDQ X2, X0, X0
+ VPADDQ X13, X1, X1
+ VPADDQ X3, X1, X1
+ VPXOR X0, X6, X6
+ VPXOR X1, X7, X7
+ VPSHUFD $-79, X6, X6
+ VPSHUFD $-79, X7, X7
+ VPADDQ X6, X4, X4
+ VPADDQ X7, X5, X5
+ VPXOR X4, X2, X2
+ VPXOR X5, X3, X3
+ VPSHUFB X8, X2, X2
+ VPSHUFB X8, X3, X3
+ VPADDQ X14, X0, X0
+ VPADDQ X2, X0, X0
+ VPADDQ X15, X1, X1
+ VPADDQ X3, X1, X1
+ VPXOR X0, X6, X6
+ VPXOR X1, X7, X7
+ VPSHUFB X9, X6, X6
+ VPSHUFB X9, X7, X7
+ VPADDQ X6, X4, X4
+ VPADDQ X7, X5, X5
+ VPXOR X4, X2, X2
+ VPXOR X5, X3, X3
+ VPADDQ X2, X2, X15
+ VPSRLQ $0x3f, X2, X2
+ VPXOR X15, X2, X2
+ VPADDQ X3, X3, X15
+ VPSRLQ $0x3f, X3, X3
+ VPXOR X15, X3, X3
+ VMOVDQA X6, X13
+ VMOVDQA X2, X14
+ VMOVDQA X4, X6
+ BYTE $0xc4
+ BYTE $0x41
+ BYTE $0x11
+ BYTE $0x6c
+ BYTE $0xfd
+ VMOVDQA X5, X4
+ VMOVDQA X6, X5
+ BYTE $0xc4
+ BYTE $0xc1
+ BYTE $0x41
+ BYTE $0x6d
+ BYTE $0xf7
+ BYTE $0xc5
+ BYTE $0x41
+ BYTE $0x6c
+ BYTE $0xff
+ BYTE $0xc4
+ BYTE $0xc1
+ BYTE $0x11
+ BYTE $0x6d
+ BYTE $0xff
+ BYTE $0xc5
+ BYTE $0x61
+ BYTE $0x6c
+ BYTE $0xfb
+ BYTE $0xc4
+ BYTE $0xc1
+ BYTE $0x69
+ BYTE $0x6d
+ BYTE $0xd7
+ BYTE $0xc4
+ BYTE $0x41
+ BYTE $0x09
+ BYTE $0x6c
+ BYTE $0xfe
+ BYTE $0xc4
+ BYTE $0xc1
+ BYTE $0x61
+ BYTE $0x6d
+ BYTE $0xdf
+ MOVQ (SI), X12
+ VPSHUFD $0x4e, 64(SI), X13
+ MOVQ 56(SI), X14
+ MOVQ 16(SI), X15
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x99
+ BYTE $0x22
+ BYTE $0x66
+ BYTE $0x30
+ BYTE $0x01
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x89
+ BYTE $0x22
+ BYTE $0x76
+ BYTE $0x18
+ BYTE $0x01
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x81
+ BYTE $0x22
+ BYTE $0x7e
+ BYTE $0x58
+ BYTE $0x01
+ VPADDQ X12, X0, X0
+ VPADDQ X2, X0, X0
+ VPADDQ X13, X1, X1
+ VPADDQ X3, X1, X1
+ VPXOR X0, X6, X6
+ VPXOR X1, X7, X7
+ VPSHUFD $-79, X6, X6
+ VPSHUFD $-79, X7, X7
+ VPADDQ X6, X4, X4
+ VPADDQ X7, X5, X5
+ VPXOR X4, X2, X2
+ VPXOR X5, X3, X3
+ VPSHUFB X8, X2, X2
+ VPSHUFB X8, X3, X3
+ VPADDQ X14, X0, X0
+ VPADDQ X2, X0, X0
+ VPADDQ X15, X1, X1
+ VPADDQ X3, X1, X1
+ VPXOR X0, X6, X6
+ VPXOR X1, X7, X7
+ VPSHUFB X9, X6, X6
+ VPSHUFB X9, X7, X7
+ VPADDQ X6, X4, X4
+ VPADDQ X7, X5, X5
+ VPXOR X4, X2, X2
+ VPXOR X5, X3, X3
+ VPADDQ X2, X2, X15
+ VPSRLQ $0x3f, X2, X2
+ VPXOR X15, X2, X2
+ VPADDQ X3, X3, X15
+ VPSRLQ $0x3f, X3, X3
+ VPXOR X15, X3, X3
+ VMOVDQA X2, X13
+ VMOVDQA X4, X14
+ BYTE $0xc5
+ BYTE $0x69
+ BYTE $0x6c
+ BYTE $0xfa
+ VMOVDQA X5, X4
+ BYTE $0xc4
+ BYTE $0xc1
+ BYTE $0x61
+ BYTE $0x6d
+ BYTE $0xd7
+ VMOVDQA X14, X5
+ BYTE $0xc5
+ BYTE $0x61
+ BYTE $0x6c
+ BYTE $0xfb
+ VMOVDQA X6, X14
+ BYTE $0xc4
+ BYTE $0xc1
+ BYTE $0x11
+ BYTE $0x6d
+ BYTE $0xdf
+ BYTE $0xc5
+ BYTE $0x41
+ BYTE $0x6c
+ BYTE $0xff
+ BYTE $0xc4
+ BYTE $0xc1
+ BYTE $0x49
+ BYTE $0x6d
+ BYTE $0xf7
+ BYTE $0xc4
+ BYTE $0x41
+ BYTE $0x09
+ BYTE $0x6c
+ BYTE $0xfe
+ BYTE $0xc4
+ BYTE $0xc1
+ BYTE $0x41
+ BYTE $0x6d
+ BYTE $0xff
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x66
+ BYTE $0x68
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x6e
+ BYTE $0x60
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x76
+ BYTE $0x58
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x7e
+ BYTE $0x08
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x99
+ BYTE $0x22
+ BYTE $0x66
+ BYTE $0x38
+ BYTE $0x01
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x91
+ BYTE $0x22
+ BYTE $0x6e
+ BYTE $0x18
+ BYTE $0x01
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x89
+ BYTE $0x22
+ BYTE $0x76
+ BYTE $0x70
+ BYTE $0x01
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x81
+ BYTE $0x22
+ BYTE $0x7e
+ BYTE $0x48
+ BYTE $0x01
+ VPADDQ X12, X0, X0
+ VPADDQ X2, X0, X0
+ VPADDQ X13, X1, X1
+ VPADDQ X3, X1, X1
+ VPXOR X0, X6, X6
+ VPXOR X1, X7, X7
+ VPSHUFD $-79, X6, X6
+ VPSHUFD $-79, X7, X7
+ VPADDQ X6, X4, X4
+ VPADDQ X7, X5, X5
+ VPXOR X4, X2, X2
+ VPXOR X5, X3, X3
+ VPSHUFB X8, X2, X2
+ VPSHUFB X8, X3, X3
+ VPADDQ X14, X0, X0
+ VPADDQ X2, X0, X0
+ VPADDQ X15, X1, X1
+ VPADDQ X3, X1, X1
+ VPXOR X0, X6, X6
+ VPXOR X1, X7, X7
+ VPSHUFB X9, X6, X6
+ VPSHUFB X9, X7, X7
+ VPADDQ X6, X4, X4
+ VPADDQ X7, X5, X5
+ VPXOR X4, X2, X2
+ VPXOR X5, X3, X3
+ VPADDQ X2, X2, X15
+ VPSRLQ $0x3f, X2, X2
+ VPXOR X15, X2, X2
+ VPADDQ X3, X3, X15
+ VPSRLQ $0x3f, X3, X3
+ VPXOR X15, X3, X3
+ VMOVDQA X6, X13
+ VMOVDQA X2, X14
+ VMOVDQA X4, X6
+ BYTE $0xc4
+ BYTE $0x41
+ BYTE $0x11
+ BYTE $0x6c
+ BYTE $0xfd
+ VMOVDQA X5, X4
+ VMOVDQA X6, X5
+ BYTE $0xc4
+ BYTE $0xc1
+ BYTE $0x41
+ BYTE $0x6d
+ BYTE $0xf7
+ BYTE $0xc5
+ BYTE $0x41
+ BYTE $0x6c
+ BYTE $0xff
+ BYTE $0xc4
+ BYTE $0xc1
+ BYTE $0x11
+ BYTE $0x6d
+ BYTE $0xff
+ BYTE $0xc5
+ BYTE $0x61
+ BYTE $0x6c
+ BYTE $0xfb
+ BYTE $0xc4
+ BYTE $0xc1
+ BYTE $0x69
+ BYTE $0x6d
+ BYTE $0xd7
+ BYTE $0xc4
+ BYTE $0x41
+ BYTE $0x09
+ BYTE $0x6c
+ BYTE $0xfe
+ BYTE $0xc4
+ BYTE $0xc1
+ BYTE $0x61
+ BYTE $0x6d
+ BYTE $0xdf
+ MOVQ 40(SI), X12
+ MOVQ 64(SI), X13
+ MOVQ (SI), X14
+ MOVQ 48(SI), X15
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x99
+ BYTE $0x22
+ BYTE $0x66
+ BYTE $0x78
+ BYTE $0x01
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x91
+ BYTE $0x22
+ BYTE $0x6e
+ BYTE $0x10
+ BYTE $0x01
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x89
+ BYTE $0x22
+ BYTE $0x76
+ BYTE $0x20
+ BYTE $0x01
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x81
+ BYTE $0x22
+ BYTE $0x7e
+ BYTE $0x50
+ BYTE $0x01
+ VPADDQ X12, X0, X0
+ VPADDQ X2, X0, X0
+ VPADDQ X13, X1, X1
+ VPADDQ X3, X1, X1
+ VPXOR X0, X6, X6
+ VPXOR X1, X7, X7
+ VPSHUFD $-79, X6, X6
+ VPSHUFD $-79, X7, X7
+ VPADDQ X6, X4, X4
+ VPADDQ X7, X5, X5
+ VPXOR X4, X2, X2
+ VPXOR X5, X3, X3
+ VPSHUFB X8, X2, X2
+ VPSHUFB X8, X3, X3
+ VPADDQ X14, X0, X0
+ VPADDQ X2, X0, X0
+ VPADDQ X15, X1, X1
+ VPADDQ X3, X1, X1
+ VPXOR X0, X6, X6
+ VPXOR X1, X7, X7
+ VPSHUFB X9, X6, X6
+ VPSHUFB X9, X7, X7
+ VPADDQ X6, X4, X4
+ VPADDQ X7, X5, X5
+ VPXOR X4, X2, X2
+ VPXOR X5, X3, X3
+ VPADDQ X2, X2, X15
+ VPSRLQ $0x3f, X2, X2
+ VPXOR X15, X2, X2
+ VPADDQ X3, X3, X15
+ VPSRLQ $0x3f, X3, X3
+ VPXOR X15, X3, X3
+ VMOVDQA X2, X13
+ VMOVDQA X4, X14
+ BYTE $0xc5
+ BYTE $0x69
+ BYTE $0x6c
+ BYTE $0xfa
+ VMOVDQA X5, X4
+ BYTE $0xc4
+ BYTE $0xc1
+ BYTE $0x61
+ BYTE $0x6d
+ BYTE $0xd7
+ VMOVDQA X14, X5
+ BYTE $0xc5
+ BYTE $0x61
+ BYTE $0x6c
+ BYTE $0xfb
+ VMOVDQA X6, X14
+ BYTE $0xc4
+ BYTE $0xc1
+ BYTE $0x11
+ BYTE $0x6d
+ BYTE $0xdf
+ BYTE $0xc5
+ BYTE $0x41
+ BYTE $0x6c
+ BYTE $0xff
+ BYTE $0xc4
+ BYTE $0xc1
+ BYTE $0x49
+ BYTE $0x6d
+ BYTE $0xf7
+ BYTE $0xc4
+ BYTE $0x41
+ BYTE $0x09
+ BYTE $0x6c
+ BYTE $0xfe
+ BYTE $0xc4
+ BYTE $0xc1
+ BYTE $0x41
+ BYTE $0x6d
+ BYTE $0xff
+ MOVQ 48(SI), X12
+ MOVQ 88(SI), X13
+ MOVQ 120(SI), X14
+ MOVQ 24(SI), X15
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x99
+ BYTE $0x22
+ BYTE $0x66
+ BYTE $0x70
+ BYTE $0x01
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x91
+ BYTE $0x22
+ BYTE $0x2e
+ BYTE $0x01
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x89
+ BYTE $0x22
+ BYTE $0x76
+ BYTE $0x48
+ BYTE $0x01
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x81
+ BYTE $0x22
+ BYTE $0x7e
+ BYTE $0x40
+ BYTE $0x01
+ VPADDQ X12, X0, X0
+ VPADDQ X2, X0, X0
+ VPADDQ X13, X1, X1
+ VPADDQ X3, X1, X1
+ VPXOR X0, X6, X6
+ VPXOR X1, X7, X7
+ VPSHUFD $-79, X6, X6
+ VPSHUFD $-79, X7, X7
+ VPADDQ X6, X4, X4
+ VPADDQ X7, X5, X5
+ VPXOR X4, X2, X2
+ VPXOR X5, X3, X3
+ VPSHUFB X8, X2, X2
+ VPSHUFB X8, X3, X3
+ VPADDQ X14, X0, X0
+ VPADDQ X2, X0, X0
+ VPADDQ X15, X1, X1
+ VPADDQ X3, X1, X1
+ VPXOR X0, X6, X6
+ VPXOR X1, X7, X7
+ VPSHUFB X9, X6, X6
+ VPSHUFB X9, X7, X7
+ VPADDQ X6, X4, X4
+ VPADDQ X7, X5, X5
+ VPXOR X4, X2, X2
+ VPXOR X5, X3, X3
+ VPADDQ X2, X2, X15
+ VPSRLQ $0x3f, X2, X2
+ VPXOR X15, X2, X2
+ VPADDQ X3, X3, X15
+ VPSRLQ $0x3f, X3, X3
+ VPXOR X15, X3, X3
+ VMOVDQA X6, X13
+ VMOVDQA X2, X14
+ VMOVDQA X4, X6
+ BYTE $0xc4
+ BYTE $0x41
+ BYTE $0x11
+ BYTE $0x6c
+ BYTE $0xfd
+ VMOVDQA X5, X4
+ VMOVDQA X6, X5
+ BYTE $0xc4
+ BYTE $0xc1
+ BYTE $0x41
+ BYTE $0x6d
+ BYTE $0xf7
+ BYTE $0xc5
+ BYTE $0x41
+ BYTE $0x6c
+ BYTE $0xff
+ BYTE $0xc4
+ BYTE $0xc1
+ BYTE $0x11
+ BYTE $0x6d
+ BYTE $0xff
+ BYTE $0xc5
+ BYTE $0x61
+ BYTE $0x6c
+ BYTE $0xfb
+ BYTE $0xc4
+ BYTE $0xc1
+ BYTE $0x69
+ BYTE $0x6d
+ BYTE $0xd7
+ BYTE $0xc4
+ BYTE $0x41
+ BYTE $0x09
+ BYTE $0x6c
+ BYTE $0xfe
+ BYTE $0xc4
+ BYTE $0xc1
+ BYTE $0x61
+ BYTE $0x6d
+ BYTE $0xdf
+ VMOVDQU 96(SI), X12
+ MOVQ 8(SI), X13
+ MOVQ 16(SI), X14
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x91
+ BYTE $0x22
+ BYTE $0x6e
+ BYTE $0x50
+ BYTE $0x01
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x89
+ BYTE $0x22
+ BYTE $0x76
+ BYTE $0x38
+ BYTE $0x01
+ VMOVDQU 32(SI), X15
+ VPADDQ X12, X0, X0
+ VPADDQ X2, X0, X0
+ VPADDQ X13, X1, X1
+ VPADDQ X3, X1, X1
+ VPXOR X0, X6, X6
+ VPXOR X1, X7, X7
+ VPSHUFD $-79, X6, X6
+ VPSHUFD $-79, X7, X7
+ VPADDQ X6, X4, X4
+ VPADDQ X7, X5, X5
+ VPXOR X4, X2, X2
+ VPXOR X5, X3, X3
+ VPSHUFB X8, X2, X2
+ VPSHUFB X8, X3, X3
+ VPADDQ X14, X0, X0
+ VPADDQ X2, X0, X0
+ VPADDQ X15, X1, X1
+ VPADDQ X3, X1, X1
+ VPXOR X0, X6, X6
+ VPXOR X1, X7, X7
+ VPSHUFB X9, X6, X6
+ VPSHUFB X9, X7, X7
+ VPADDQ X6, X4, X4
+ VPADDQ X7, X5, X5
+ VPXOR X4, X2, X2
+ VPXOR X5, X3, X3
+ VPADDQ X2, X2, X15
+ VPSRLQ $0x3f, X2, X2
+ VPXOR X15, X2, X2
+ VPADDQ X3, X3, X15
+ VPSRLQ $0x3f, X3, X3
+ VPXOR X15, X3, X3
+ VMOVDQA X2, X13
+ VMOVDQA X4, X14
+ BYTE $0xc5
+ BYTE $0x69
+ BYTE $0x6c
+ BYTE $0xfa
+ VMOVDQA X5, X4
+ BYTE $0xc4
+ BYTE $0xc1
+ BYTE $0x61
+ BYTE $0x6d
+ BYTE $0xd7
+ VMOVDQA X14, X5
+ BYTE $0xc5
+ BYTE $0x61
+ BYTE $0x6c
+ BYTE $0xfb
+ VMOVDQA X6, X14
+ BYTE $0xc4
+ BYTE $0xc1
+ BYTE $0x11
+ BYTE $0x6d
+ BYTE $0xdf
+ BYTE $0xc5
+ BYTE $0x41
+ BYTE $0x6c
+ BYTE $0xff
+ BYTE $0xc4
+ BYTE $0xc1
+ BYTE $0x49
+ BYTE $0x6d
+ BYTE $0xf7
+ BYTE $0xc4
+ BYTE $0x41
+ BYTE $0x09
+ BYTE $0x6c
+ BYTE $0xfe
+ BYTE $0xc4
+ BYTE $0xc1
+ BYTE $0x41
+ BYTE $0x6d
+ BYTE $0xff
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x66
+ BYTE $0x50
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x6e
+ BYTE $0x38
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x76
+ BYTE $0x10
+ BYTE $0xc5
+ BYTE $0x7a
+ BYTE $0x7e
+ BYTE $0x7e
+ BYTE $0x30
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x99
+ BYTE $0x22
+ BYTE $0x66
+ BYTE $0x40
+ BYTE $0x01
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x91
+ BYTE $0x22
+ BYTE $0x6e
+ BYTE $0x08
+ BYTE $0x01
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x89
+ BYTE $0x22
+ BYTE $0x76
+ BYTE $0x20
+ BYTE $0x01
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x81
+ BYTE $0x22
+ BYTE $0x7e
+ BYTE $0x28
+ BYTE $0x01
+ VPADDQ X12, X0, X0
+ VPADDQ X2, X0, X0
+ VPADDQ X13, X1, X1
+ VPADDQ X3, X1, X1
+ VPXOR X0, X6, X6
+ VPXOR X1, X7, X7
+ VPSHUFD $-79, X6, X6
+ VPSHUFD $-79, X7, X7
+ VPADDQ X6, X4, X4
+ VPADDQ X7, X5, X5
+ VPXOR X4, X2, X2
+ VPXOR X5, X3, X3
+ VPSHUFB X8, X2, X2
+ VPSHUFB X8, X3, X3
+ VPADDQ X14, X0, X0
+ VPADDQ X2, X0, X0
+ VPADDQ X15, X1, X1
+ VPADDQ X3, X1, X1
+ VPXOR X0, X6, X6
+ VPXOR X1, X7, X7
+ VPSHUFB X9, X6, X6
+ VPSHUFB X9, X7, X7
+ VPADDQ X6, X4, X4
+ VPADDQ X7, X5, X5
+ VPXOR X4, X2, X2
+ VPXOR X5, X3, X3
+ VPADDQ X2, X2, X15
+ VPSRLQ $0x3f, X2, X2
+ VPXOR X15, X2, X2
+ VPADDQ X3, X3, X15
+ VPSRLQ $0x3f, X3, X3
+ VPXOR X15, X3, X3
+ VMOVDQA X6, X13
+ VMOVDQA X2, X14
+ VMOVDQA X4, X6
+ BYTE $0xc4
+ BYTE $0x41
+ BYTE $0x11
+ BYTE $0x6c
+ BYTE $0xfd
+ VMOVDQA X5, X4
+ VMOVDQA X6, X5
+ BYTE $0xc4
+ BYTE $0xc1
+ BYTE $0x41
+ BYTE $0x6d
+ BYTE $0xf7
+ BYTE $0xc5
+ BYTE $0x41
+ BYTE $0x6c
+ BYTE $0xff
+ BYTE $0xc4
+ BYTE $0xc1
+ BYTE $0x11
+ BYTE $0x6d
+ BYTE $0xff
+ BYTE $0xc5
+ BYTE $0x61
+ BYTE $0x6c
+ BYTE $0xfb
+ BYTE $0xc4
+ BYTE $0xc1
+ BYTE $0x69
+ BYTE $0x6d
+ BYTE $0xd7
+ BYTE $0xc4
+ BYTE $0x41
+ BYTE $0x09
+ BYTE $0x6c
+ BYTE $0xfe
+ BYTE $0xc4
+ BYTE $0xc1
+ BYTE $0x61
+ BYTE $0x6d
+ BYTE $0xdf
+ MOVQ 120(SI), X12
+ MOVQ 24(SI), X13
+ MOVQ 88(SI), X14
+ MOVQ 96(SI), X15
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x99
+ BYTE $0x22
+ BYTE $0x66
+ BYTE $0x48
+ BYTE $0x01
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x91
+ BYTE $0x22
+ BYTE $0x6e
+ BYTE $0x68
+ BYTE $0x01
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x89
+ BYTE $0x22
+ BYTE $0x76
+ BYTE $0x70
+ BYTE $0x01
+ BYTE $0xc4
+ BYTE $0x63
+ BYTE $0x81
+ BYTE $0x22
+ BYTE $0x3e
+ BYTE $0x01
+ VPADDQ X12, X0, X0
+ VPADDQ X2, X0, X0
+ VPADDQ X13, X1, X1
+ VPADDQ X3, X1, X1
+ VPXOR X0, X6, X6
+ VPXOR X1, X7, X7
+ VPSHUFD $-79, X6, X6
+ VPSHUFD $-79, X7, X7
+ VPADDQ X6, X4, X4
+ VPADDQ X7, X5, X5
+ VPXOR X4, X2, X2
+ VPXOR X5, X3, X3
+ VPSHUFB X8, X2, X2
+ VPSHUFB X8, X3, X3
+ VPADDQ X14, X0, X0
+ VPADDQ X2, X0, X0
+ VPADDQ X15, X1, X1
+ VPADDQ X3, X1, X1
+ VPXOR X0, X6, X6
+ VPXOR X1, X7, X7
+ VPSHUFB X9, X6, X6
+ VPSHUFB X9, X7, X7
+ VPADDQ X6, X4, X4
+ VPADDQ X7, X5, X5
+ VPXOR X4, X2, X2
+ VPXOR X5, X3, X3
+ VPADDQ X2, X2, X15
+ VPSRLQ $0x3f, X2, X2
+ VPXOR X15, X2, X2
+ VPADDQ X3, X3, X15
+ VPSRLQ $0x3f, X3, X3
+ VPXOR X15, X3, X3
+ VMOVDQA X2, X13
+ VMOVDQA X4, X14
+ BYTE $0xc5
+ BYTE $0x69
+ BYTE $0x6c
+ BYTE $0xfa
+ VMOVDQA X5, X4
+ BYTE $0xc4
+ BYTE $0xc1
+ BYTE $0x61
+ BYTE $0x6d
+ BYTE $0xd7
+ VMOVDQA X14, X5
+ BYTE $0xc5
+ BYTE $0x61
+ BYTE $0x6c
+ BYTE $0xfb
+ VMOVDQA X6, X14
+ BYTE $0xc4
+ BYTE $0xc1
+ BYTE $0x11
+ BYTE $0x6d
+ BYTE $0xdf
+ BYTE $0xc5
+ BYTE $0x41
+ BYTE $0x6c
+ BYTE $0xff
+ BYTE $0xc4
+ BYTE $0xc1
+ BYTE $0x49
+ BYTE $0x6d
+ BYTE $0xf7
+ BYTE $0xc4
+ BYTE $0x41
+ BYTE $0x09
+ BYTE $0x6c
+ BYTE $0xfe
+ BYTE $0xc4
+ BYTE $0xc1
+ BYTE $0x41
+ BYTE $0x6d
+ BYTE $0xff
+ VPADDQ 16(R10), X0, X0
+ VPADDQ X2, X0, X0
+ VPADDQ 32(R10), X1, X1
+ VPADDQ X3, X1, X1
+ VPXOR X0, X6, X6
+ VPXOR X1, X7, X7
+ VPSHUFD $-79, X6, X6
+ VPSHUFD $-79, X7, X7
+ VPADDQ X6, X4, X4
+ VPADDQ X7, X5, X5
+ VPXOR X4, X2, X2
+ VPXOR X5, X3, X3
+ VPSHUFB X8, X2, X2
+ VPSHUFB X8, X3, X3
+ VPADDQ 48(R10), X0, X0
+ VPADDQ X2, X0, X0
+ VPADDQ 64(R10), X1, X1
+ VPADDQ X3, X1, X1
+ VPXOR X0, X6, X6
+ VPXOR X1, X7, X7
+ VPSHUFB X9, X6, X6
+ VPSHUFB X9, X7, X7
+ VPADDQ X6, X4, X4
+ VPADDQ X7, X5, X5
+ VPXOR X4, X2, X2
+ VPXOR X5, X3, X3
+ VPADDQ X2, X2, X15
+ VPSRLQ $0x3f, X2, X2
+ VPXOR X15, X2, X2
+ VPADDQ X3, X3, X15
+ VPSRLQ $0x3f, X3, X3
+ VPXOR X15, X3, X3
+ VMOVDQA X6, X13
+ VMOVDQA X2, X14
+ VMOVDQA X4, X6
+ BYTE $0xc4
+ BYTE $0x41
+ BYTE $0x11
+ BYTE $0x6c
+ BYTE $0xfd
+ VMOVDQA X5, X4
+ VMOVDQA X6, X5
+ BYTE $0xc4
+ BYTE $0xc1
+ BYTE $0x41
+ BYTE $0x6d
+ BYTE $0xf7
+ BYTE $0xc5
+ BYTE $0x41
+ BYTE $0x6c
+ BYTE $0xff
+ BYTE $0xc4
+ BYTE $0xc1
+ BYTE $0x11
+ BYTE $0x6d
+ BYTE $0xff
+ BYTE $0xc5
+ BYTE $0x61
+ BYTE $0x6c
+ BYTE $0xfb
+ BYTE $0xc4
+ BYTE $0xc1
+ BYTE $0x69
+ BYTE $0x6d
+ BYTE $0xd7
+ BYTE $0xc4
+ BYTE $0x41
+ BYTE $0x09
+ BYTE $0x6c
+ BYTE $0xfe
+ BYTE $0xc4
+ BYTE $0xc1
+ BYTE $0x61
+ BYTE $0x6d
+ BYTE $0xdf
+ VPADDQ 80(R10), X0, X0
+ VPADDQ X2, X0, X0
+ VPADDQ 96(R10), X1, X1
+ VPADDQ X3, X1, X1
+ VPXOR X0, X6, X6
+ VPXOR X1, X7, X7
+ VPSHUFD $-79, X6, X6
+ VPSHUFD $-79, X7, X7
+ VPADDQ X6, X4, X4
+ VPADDQ X7, X5, X5
+ VPXOR X4, X2, X2
+ VPXOR X5, X3, X3
+ VPSHUFB X8, X2, X2
+ VPSHUFB X8, X3, X3
+ VPADDQ 112(R10), X0, X0
+ VPADDQ X2, X0, X0
+ VPADDQ 128(R10), X1, X1
+ VPADDQ X3, X1, X1
+ VPXOR X0, X6, X6
+ VPXOR X1, X7, X7
+ VPSHUFB X9, X6, X6
+ VPSHUFB X9, X7, X7
+ VPADDQ X6, X4, X4
+ VPADDQ X7, X5, X5
+ VPXOR X4, X2, X2
+ VPXOR X5, X3, X3
+ VPADDQ X2, X2, X15
+ VPSRLQ $0x3f, X2, X2
+ VPXOR X15, X2, X2
+ VPADDQ X3, X3, X15
+ VPSRLQ $0x3f, X3, X3
+ VPXOR X15, X3, X3
+ VMOVDQA X2, X13
+ VMOVDQA X4, X14
+ BYTE $0xc5
+ BYTE $0x69
+ BYTE $0x6c
+ BYTE $0xfa
+ VMOVDQA X5, X4
+ BYTE $0xc4
+ BYTE $0xc1
+ BYTE $0x61
+ BYTE $0x6d
+ BYTE $0xd7
+ VMOVDQA X14, X5
+ BYTE $0xc5
+ BYTE $0x61
+ BYTE $0x6c
+ BYTE $0xfb
+ VMOVDQA X6, X14
+ BYTE $0xc4
+ BYTE $0xc1
+ BYTE $0x11
+ BYTE $0x6d
+ BYTE $0xdf
+ BYTE $0xc5
+ BYTE $0x41
+ BYTE $0x6c
+ BYTE $0xff
+ BYTE $0xc4
+ BYTE $0xc1
+ BYTE $0x49
+ BYTE $0x6d
+ BYTE $0xf7
+ BYTE $0xc4
+ BYTE $0x41
+ BYTE $0x09
+ BYTE $0x6c
+ BYTE $0xfe
+ BYTE $0xc4
+ BYTE $0xc1
+ BYTE $0x41
+ BYTE $0x6d
+ BYTE $0xff
+ VPADDQ 144(R10), X0, X0
+ VPADDQ X2, X0, X0
+ VPADDQ 160(R10), X1, X1
+ VPADDQ X3, X1, X1
+ VPXOR X0, X6, X6
+ VPXOR X1, X7, X7
+ VPSHUFD $-79, X6, X6
+ VPSHUFD $-79, X7, X7
+ VPADDQ X6, X4, X4
+ VPADDQ X7, X5, X5
+ VPXOR X4, X2, X2
+ VPXOR X5, X3, X3
+ VPSHUFB X8, X2, X2
+ VPSHUFB X8, X3, X3
+ VPADDQ 176(R10), X0, X0
+ VPADDQ X2, X0, X0
+ VPADDQ 192(R10), X1, X1
+ VPADDQ X3, X1, X1
+ VPXOR X0, X6, X6
+ VPXOR X1, X7, X7
+ VPSHUFB X9, X6, X6
+ VPSHUFB X9, X7, X7
+ VPADDQ X6, X4, X4
+ VPADDQ X7, X5, X5
+ VPXOR X4, X2, X2
+ VPXOR X5, X3, X3
+ VPADDQ X2, X2, X15
+ VPSRLQ $0x3f, X2, X2
+ VPXOR X15, X2, X2
+ VPADDQ X3, X3, X15
+ VPSRLQ $0x3f, X3, X3
+ VPXOR X15, X3, X3
+ VMOVDQA X6, X13
+ VMOVDQA X2, X14
+ VMOVDQA X4, X6
+ BYTE $0xc4
+ BYTE $0x41
+ BYTE $0x11
+ BYTE $0x6c
+ BYTE $0xfd
+ VMOVDQA X5, X4
+ VMOVDQA X6, X5
+ BYTE $0xc4
+ BYTE $0xc1
+ BYTE $0x41
+ BYTE $0x6d
+ BYTE $0xf7
+ BYTE $0xc5
+ BYTE $0x41
+ BYTE $0x6c
+ BYTE $0xff
+ BYTE $0xc4
+ BYTE $0xc1
+ BYTE $0x11
+ BYTE $0x6d
+ BYTE $0xff
+ BYTE $0xc5
+ BYTE $0x61
+ BYTE $0x6c
+ BYTE $0xfb
+ BYTE $0xc4
+ BYTE $0xc1
+ BYTE $0x69
+ BYTE $0x6d
+ BYTE $0xd7
+ BYTE $0xc4
+ BYTE $0x41
+ BYTE $0x09
+ BYTE $0x6c
+ BYTE $0xfe
+ BYTE $0xc4
+ BYTE $0xc1
+ BYTE $0x61
+ BYTE $0x6d
+ BYTE $0xdf
+ VPADDQ 208(R10), X0, X0
+ VPADDQ X2, X0, X0
+ VPADDQ 224(R10), X1, X1
+ VPADDQ X3, X1, X1
+ VPXOR X0, X6, X6
+ VPXOR X1, X7, X7
+ VPSHUFD $-79, X6, X6
+ VPSHUFD $-79, X7, X7
+ VPADDQ X6, X4, X4
+ VPADDQ X7, X5, X5
+ VPXOR X4, X2, X2
+ VPXOR X5, X3, X3
+ VPSHUFB X8, X2, X2
+ VPSHUFB X8, X3, X3
+ VPADDQ 240(R10), X0, X0
+ VPADDQ X2, X0, X0
+ VPADDQ 256(R10), X1, X1
+ VPADDQ X3, X1, X1
+ VPXOR X0, X6, X6
+ VPXOR X1, X7, X7
+ VPSHUFB X9, X6, X6
+ VPSHUFB X9, X7, X7
+ VPADDQ X6, X4, X4
+ VPADDQ X7, X5, X5
+ VPXOR X4, X2, X2
+ VPXOR X5, X3, X3
+ VPADDQ X2, X2, X15
+ VPSRLQ $0x3f, X2, X2
+ VPXOR X15, X2, X2
+ VPADDQ X3, X3, X15
+ VPSRLQ $0x3f, X3, X3
+ VPXOR X15, X3, X3
+ VMOVDQA X2, X13
+ VMOVDQA X4, X14
+ BYTE $0xc5
+ BYTE $0x69
+ BYTE $0x6c
+ BYTE $0xfa
+ VMOVDQA X5, X4
+ BYTE $0xc4
+ BYTE $0xc1
+ BYTE $0x61
+ BYTE $0x6d
+ BYTE $0xd7
+ VMOVDQA X14, X5
+ BYTE $0xc5
+ BYTE $0x61
+ BYTE $0x6c
+ BYTE $0xfb
+ VMOVDQA X6, X14
+ BYTE $0xc4
+ BYTE $0xc1
+ BYTE $0x11
+ BYTE $0x6d
+ BYTE $0xdf
+ BYTE $0xc5
+ BYTE $0x41
+ BYTE $0x6c
+ BYTE $0xff
+ BYTE $0xc4
+ BYTE $0xc1
+ BYTE $0x49
+ BYTE $0x6d
+ BYTE $0xf7
+ BYTE $0xc4
+ BYTE $0x41
+ BYTE $0x09
+ BYTE $0x6c
+ BYTE $0xfe
+ BYTE $0xc4
+ BYTE $0xc1
+ BYTE $0x41
+ BYTE $0x6d
+ BYTE $0xff
VMOVDQU 32(AX), X14
VMOVDQU 48(AX), X15
VPXOR X0, X10, X10
@@ -729,16 +4524,36 @@ noinc:
VPXOR X7, X15, X3
VMOVDQU X2, 32(AX)
VMOVDQU X3, 48(AX)
+ LEAQ 128(SI), SI
+ SUBQ $0x80, DI
+ JNE loop
+ VMOVDQU X10, (AX)
+ VMOVDQU X11, 16(AX)
+ MOVQ R8, (BX)
+ MOVQ R9, 8(BX)
+ VZEROUPPER
+ RET
- LEAQ 128(SI), SI
- SUBQ $128, DI
- JNE loop
+DATA ·AVX_c40<>+0(SB)/8, $0x0201000706050403
+DATA ·AVX_c40<>+8(SB)/8, $0x0a09080f0e0d0c0b
+GLOBL ·AVX_c40<>(SB), RODATA|NOPTR, $16
- VMOVDQU X10, 0(AX)
- VMOVDQU X11, 16(AX)
+DATA ·AVX_c48<>+0(SB)/8, $0x0100070605040302
+DATA ·AVX_c48<>+8(SB)/8, $0x09080f0e0d0c0b0a
+GLOBL ·AVX_c48<>(SB), RODATA|NOPTR, $16
- MOVQ R8, 0(BX)
- MOVQ R9, 8(BX)
- VZEROUPPER
+DATA ·AVX_iv3<>+0(SB)/8, $0x1f83d9abfb41bd6b
+DATA ·AVX_iv3<>+8(SB)/8, $0x5be0cd19137e2179
+GLOBL ·AVX_iv3<>(SB), RODATA|NOPTR, $16
- RET
+DATA ·AVX_iv0<>+0(SB)/8, $0x6a09e667f3bcc908
+DATA ·AVX_iv0<>+8(SB)/8, $0xbb67ae8584caa73b
+GLOBL ·AVX_iv0<>(SB), RODATA|NOPTR, $16
+
+DATA ·AVX_iv1<>+0(SB)/8, $0x3c6ef372fe94f82b
+DATA ·AVX_iv1<>+8(SB)/8, $0xa54ff53a5f1d36f1
+GLOBL ·AVX_iv1<>(SB), RODATA|NOPTR, $16
+
+DATA ·AVX_iv2<>+0(SB)/8, $0x510e527fade682d1
+DATA ·AVX_iv2<>+8(SB)/8, $0x9b05688c2b3e6c1f
+GLOBL ·AVX_iv2<>(SB), RODATA|NOPTR, $16
diff --git a/vendor/golang.org/x/crypto/blake2b/blake2b_amd64.s b/vendor/golang.org/x/crypto/blake2b/blake2b_amd64.s
index adfac00c15..9a0ce21244 100644
--- a/vendor/golang.org/x/crypto/blake2b/blake2b_amd64.s
+++ b/vendor/golang.org/x/crypto/blake2b/blake2b_amd64.s
@@ -1,278 +1,1441 @@
-// Copyright 2016 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
+// Code generated by command: go run blake2b_amd64_asm.go -out ../../blake2b_amd64.s -pkg blake2b. DO NOT EDIT.
//go:build amd64 && gc && !purego
#include "textflag.h"
-DATA ·iv0<>+0x00(SB)/8, $0x6a09e667f3bcc908
-DATA ·iv0<>+0x08(SB)/8, $0xbb67ae8584caa73b
-GLOBL ·iv0<>(SB), (NOPTR+RODATA), $16
-
-DATA ·iv1<>+0x00(SB)/8, $0x3c6ef372fe94f82b
-DATA ·iv1<>+0x08(SB)/8, $0xa54ff53a5f1d36f1
-GLOBL ·iv1<>(SB), (NOPTR+RODATA), $16
-
-DATA ·iv2<>+0x00(SB)/8, $0x510e527fade682d1
-DATA ·iv2<>+0x08(SB)/8, $0x9b05688c2b3e6c1f
-GLOBL ·iv2<>(SB), (NOPTR+RODATA), $16
-
-DATA ·iv3<>+0x00(SB)/8, $0x1f83d9abfb41bd6b
-DATA ·iv3<>+0x08(SB)/8, $0x5be0cd19137e2179
-GLOBL ·iv3<>(SB), (NOPTR+RODATA), $16
-
-DATA ·c40<>+0x00(SB)/8, $0x0201000706050403
-DATA ·c40<>+0x08(SB)/8, $0x0a09080f0e0d0c0b
-GLOBL ·c40<>(SB), (NOPTR+RODATA), $16
-
-DATA ·c48<>+0x00(SB)/8, $0x0100070605040302
-DATA ·c48<>+0x08(SB)/8, $0x09080f0e0d0c0b0a
-GLOBL ·c48<>(SB), (NOPTR+RODATA), $16
-
-#define SHUFFLE(v2, v3, v4, v5, v6, v7, t1, t2) \
- MOVO v4, t1; \
- MOVO v5, v4; \
- MOVO t1, v5; \
- MOVO v6, t1; \
- PUNPCKLQDQ v6, t2; \
- PUNPCKHQDQ v7, v6; \
- PUNPCKHQDQ t2, v6; \
- PUNPCKLQDQ v7, t2; \
- MOVO t1, v7; \
- MOVO v2, t1; \
- PUNPCKHQDQ t2, v7; \
- PUNPCKLQDQ v3, t2; \
- PUNPCKHQDQ t2, v2; \
- PUNPCKLQDQ t1, t2; \
- PUNPCKHQDQ t2, v3
-
-#define SHUFFLE_INV(v2, v3, v4, v5, v6, v7, t1, t2) \
- MOVO v4, t1; \
- MOVO v5, v4; \
- MOVO t1, v5; \
- MOVO v2, t1; \
- PUNPCKLQDQ v2, t2; \
- PUNPCKHQDQ v3, v2; \
- PUNPCKHQDQ t2, v2; \
- PUNPCKLQDQ v3, t2; \
- MOVO t1, v3; \
- MOVO v6, t1; \
- PUNPCKHQDQ t2, v3; \
- PUNPCKLQDQ v7, t2; \
- PUNPCKHQDQ t2, v6; \
- PUNPCKLQDQ t1, t2; \
- PUNPCKHQDQ t2, v7
-
-#define HALF_ROUND(v0, v1, v2, v3, v4, v5, v6, v7, m0, m1, m2, m3, t0, c40, c48) \
- PADDQ m0, v0; \
- PADDQ m1, v1; \
- PADDQ v2, v0; \
- PADDQ v3, v1; \
- PXOR v0, v6; \
- PXOR v1, v7; \
- PSHUFD $0xB1, v6, v6; \
- PSHUFD $0xB1, v7, v7; \
- PADDQ v6, v4; \
- PADDQ v7, v5; \
- PXOR v4, v2; \
- PXOR v5, v3; \
- PSHUFB c40, v2; \
- PSHUFB c40, v3; \
- PADDQ m2, v0; \
- PADDQ m3, v1; \
- PADDQ v2, v0; \
- PADDQ v3, v1; \
- PXOR v0, v6; \
- PXOR v1, v7; \
- PSHUFB c48, v6; \
- PSHUFB c48, v7; \
- PADDQ v6, v4; \
- PADDQ v7, v5; \
- PXOR v4, v2; \
- PXOR v5, v3; \
- MOVOU v2, t0; \
- PADDQ v2, t0; \
- PSRLQ $63, v2; \
- PXOR t0, v2; \
- MOVOU v3, t0; \
- PADDQ v3, t0; \
- PSRLQ $63, v3; \
- PXOR t0, v3
-
-#define LOAD_MSG(m0, m1, m2, m3, src, i0, i1, i2, i3, i4, i5, i6, i7) \
- MOVQ i0*8(src), m0; \
- PINSRQ $1, i1*8(src), m0; \
- MOVQ i2*8(src), m1; \
- PINSRQ $1, i3*8(src), m1; \
- MOVQ i4*8(src), m2; \
- PINSRQ $1, i5*8(src), m2; \
- MOVQ i6*8(src), m3; \
- PINSRQ $1, i7*8(src), m3
-
// func hashBlocksSSE4(h *[8]uint64, c *[2]uint64, flag uint64, blocks []byte)
-TEXT ·hashBlocksSSE4(SB), 4, $288-48 // frame size = 272 + 16 byte alignment
- MOVQ h+0(FP), AX
- MOVQ c+8(FP), BX
- MOVQ flag+16(FP), CX
- MOVQ blocks_base+24(FP), SI
- MOVQ blocks_len+32(FP), DI
-
- MOVQ SP, R10
- ADDQ $15, R10
- ANDQ $~15, R10
-
- MOVOU ·iv3<>(SB), X0
- MOVO X0, 0(R10)
- XORQ CX, 0(R10) // 0(R10) = ·iv3 ^ (CX || 0)
-
- MOVOU ·c40<>(SB), X13
- MOVOU ·c48<>(SB), X14
-
- MOVOU 0(AX), X12
+// Requires: SSE2, SSE4.1, SSSE3
+TEXT ·hashBlocksSSE4(SB), NOSPLIT, $288-48
+ MOVQ h+0(FP), AX
+ MOVQ c+8(FP), BX
+ MOVQ flag+16(FP), CX
+ MOVQ blocks_base+24(FP), SI
+ MOVQ blocks_len+32(FP), DI
+ MOVQ SP, R10
+ ADDQ $0x0f, R10
+ ANDQ $-16, R10
+ MOVOU ·iv3<>+0(SB), X0
+ MOVO X0, (R10)
+ XORQ CX, (R10)
+ MOVOU ·c40<>+0(SB), X13
+ MOVOU ·c48<>+0(SB), X14
+ MOVOU (AX), X12
MOVOU 16(AX), X15
-
- MOVQ 0(BX), R8
- MOVQ 8(BX), R9
+ MOVQ (BX), R8
+ MOVQ 8(BX), R9
loop:
- ADDQ $128, R8
- CMPQ R8, $128
+ ADDQ $0x80, R8
+ CMPQ R8, $0x80
JGE noinc
INCQ R9
noinc:
- MOVQ R8, X8
- PINSRQ $1, R9, X8
-
- MOVO X12, X0
- MOVO X15, X1
- MOVOU 32(AX), X2
- MOVOU 48(AX), X3
- MOVOU ·iv0<>(SB), X4
- MOVOU ·iv1<>(SB), X5
- MOVOU ·iv2<>(SB), X6
-
- PXOR X8, X6
- MOVO 0(R10), X7
-
- LOAD_MSG(X8, X9, X10, X11, SI, 0, 2, 4, 6, 1, 3, 5, 7)
- MOVO X8, 16(R10)
- MOVO X9, 32(R10)
- MOVO X10, 48(R10)
- MOVO X11, 64(R10)
- HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
- SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9)
- LOAD_MSG(X8, X9, X10, X11, SI, 8, 10, 12, 14, 9, 11, 13, 15)
- MOVO X8, 80(R10)
- MOVO X9, 96(R10)
- MOVO X10, 112(R10)
- MOVO X11, 128(R10)
- HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
- SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9)
-
- LOAD_MSG(X8, X9, X10, X11, SI, 14, 4, 9, 13, 10, 8, 15, 6)
- MOVO X8, 144(R10)
- MOVO X9, 160(R10)
- MOVO X10, 176(R10)
- MOVO X11, 192(R10)
- HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
- SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9)
- LOAD_MSG(X8, X9, X10, X11, SI, 1, 0, 11, 5, 12, 2, 7, 3)
- MOVO X8, 208(R10)
- MOVO X9, 224(R10)
- MOVO X10, 240(R10)
- MOVO X11, 256(R10)
- HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
- SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9)
-
- LOAD_MSG(X8, X9, X10, X11, SI, 11, 12, 5, 15, 8, 0, 2, 13)
- HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
- SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9)
- LOAD_MSG(X8, X9, X10, X11, SI, 10, 3, 7, 9, 14, 6, 1, 4)
- HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
- SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9)
-
- LOAD_MSG(X8, X9, X10, X11, SI, 7, 3, 13, 11, 9, 1, 12, 14)
- HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
- SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9)
- LOAD_MSG(X8, X9, X10, X11, SI, 2, 5, 4, 15, 6, 10, 0, 8)
- HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
- SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9)
-
- LOAD_MSG(X8, X9, X10, X11, SI, 9, 5, 2, 10, 0, 7, 4, 15)
- HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
- SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9)
- LOAD_MSG(X8, X9, X10, X11, SI, 14, 11, 6, 3, 1, 12, 8, 13)
- HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
- SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9)
-
- LOAD_MSG(X8, X9, X10, X11, SI, 2, 6, 0, 8, 12, 10, 11, 3)
- HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
- SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9)
- LOAD_MSG(X8, X9, X10, X11, SI, 4, 7, 15, 1, 13, 5, 14, 9)
- HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
- SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9)
-
- LOAD_MSG(X8, X9, X10, X11, SI, 12, 1, 14, 4, 5, 15, 13, 10)
- HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
- SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9)
- LOAD_MSG(X8, X9, X10, X11, SI, 0, 6, 9, 8, 7, 3, 2, 11)
- HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
- SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9)
-
- LOAD_MSG(X8, X9, X10, X11, SI, 13, 7, 12, 3, 11, 14, 1, 9)
- HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
- SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9)
- LOAD_MSG(X8, X9, X10, X11, SI, 5, 15, 8, 2, 0, 4, 6, 10)
- HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
- SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9)
-
- LOAD_MSG(X8, X9, X10, X11, SI, 6, 14, 11, 0, 15, 9, 3, 8)
- HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
- SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9)
- LOAD_MSG(X8, X9, X10, X11, SI, 12, 13, 1, 10, 2, 7, 4, 5)
- HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
- SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9)
-
- LOAD_MSG(X8, X9, X10, X11, SI, 10, 8, 7, 1, 2, 4, 6, 5)
- HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
- SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9)
- LOAD_MSG(X8, X9, X10, X11, SI, 15, 9, 3, 13, 11, 14, 12, 0)
- HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
- SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9)
-
- HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, 16(R10), 32(R10), 48(R10), 64(R10), X11, X13, X14)
- SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9)
- HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, 80(R10), 96(R10), 112(R10), 128(R10), X11, X13, X14)
- SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9)
+ MOVQ R8, X8
+ PINSRQ $0x01, R9, X8
+ MOVO X12, X0
+ MOVO X15, X1
+ MOVOU 32(AX), X2
+ MOVOU 48(AX), X3
+ MOVOU ·iv0<>+0(SB), X4
+ MOVOU ·iv1<>+0(SB), X5
+ MOVOU ·iv2<>+0(SB), X6
+ PXOR X8, X6
+ MOVO (R10), X7
+ MOVQ (SI), X8
+ PINSRQ $0x01, 16(SI), X8
+ MOVQ 32(SI), X9
+ PINSRQ $0x01, 48(SI), X9
+ MOVQ 8(SI), X10
+ PINSRQ $0x01, 24(SI), X10
+ MOVQ 40(SI), X11
+ PINSRQ $0x01, 56(SI), X11
+ MOVO X8, 16(R10)
+ MOVO X9, 32(R10)
+ MOVO X10, 48(R10)
+ MOVO X11, 64(R10)
+ PADDQ X8, X0
+ PADDQ X9, X1
+ PADDQ X2, X0
+ PADDQ X3, X1
+ PXOR X0, X6
+ PXOR X1, X7
+ PSHUFD $0xb1, X6, X6
+ PSHUFD $0xb1, X7, X7
+ PADDQ X6, X4
+ PADDQ X7, X5
+ PXOR X4, X2
+ PXOR X5, X3
+ PSHUFB X13, X2
+ PSHUFB X13, X3
+ PADDQ X10, X0
+ PADDQ X11, X1
+ PADDQ X2, X0
+ PADDQ X3, X1
+ PXOR X0, X6
+ PXOR X1, X7
+ PSHUFB X14, X6
+ PSHUFB X14, X7
+ PADDQ X6, X4
+ PADDQ X7, X5
+ PXOR X4, X2
+ PXOR X5, X3
+ MOVOU X2, X11
+ PADDQ X2, X11
+ PSRLQ $0x3f, X2
+ PXOR X11, X2
+ MOVOU X3, X11
+ PADDQ X3, X11
+ PSRLQ $0x3f, X3
+ PXOR X11, X3
+ MOVO X4, X8
+ MOVO X5, X4
+ MOVO X8, X5
+ MOVO X6, X8
+ PUNPCKLQDQ X6, X9
+ PUNPCKHQDQ X7, X6
+ PUNPCKHQDQ X9, X6
+ PUNPCKLQDQ X7, X9
+ MOVO X8, X7
+ MOVO X2, X8
+ PUNPCKHQDQ X9, X7
+ PUNPCKLQDQ X3, X9
+ PUNPCKHQDQ X9, X2
+ PUNPCKLQDQ X8, X9
+ PUNPCKHQDQ X9, X3
+ MOVQ 64(SI), X8
+ PINSRQ $0x01, 80(SI), X8
+ MOVQ 96(SI), X9
+ PINSRQ $0x01, 112(SI), X9
+ MOVQ 72(SI), X10
+ PINSRQ $0x01, 88(SI), X10
+ MOVQ 104(SI), X11
+ PINSRQ $0x01, 120(SI), X11
+ MOVO X8, 80(R10)
+ MOVO X9, 96(R10)
+ MOVO X10, 112(R10)
+ MOVO X11, 128(R10)
+ PADDQ X8, X0
+ PADDQ X9, X1
+ PADDQ X2, X0
+ PADDQ X3, X1
+ PXOR X0, X6
+ PXOR X1, X7
+ PSHUFD $0xb1, X6, X6
+ PSHUFD $0xb1, X7, X7
+ PADDQ X6, X4
+ PADDQ X7, X5
+ PXOR X4, X2
+ PXOR X5, X3
+ PSHUFB X13, X2
+ PSHUFB X13, X3
+ PADDQ X10, X0
+ PADDQ X11, X1
+ PADDQ X2, X0
+ PADDQ X3, X1
+ PXOR X0, X6
+ PXOR X1, X7
+ PSHUFB X14, X6
+ PSHUFB X14, X7
+ PADDQ X6, X4
+ PADDQ X7, X5
+ PXOR X4, X2
+ PXOR X5, X3
+ MOVOU X2, X11
+ PADDQ X2, X11
+ PSRLQ $0x3f, X2
+ PXOR X11, X2
+ MOVOU X3, X11
+ PADDQ X3, X11
+ PSRLQ $0x3f, X3
+ PXOR X11, X3
+ MOVO X4, X8
+ MOVO X5, X4
+ MOVO X8, X5
+ MOVO X2, X8
+ PUNPCKLQDQ X2, X9
+ PUNPCKHQDQ X3, X2
+ PUNPCKHQDQ X9, X2
+ PUNPCKLQDQ X3, X9
+ MOVO X8, X3
+ MOVO X6, X8
+ PUNPCKHQDQ X9, X3
+ PUNPCKLQDQ X7, X9
+ PUNPCKHQDQ X9, X6
+ PUNPCKLQDQ X8, X9
+ PUNPCKHQDQ X9, X7
+ MOVQ 112(SI), X8
+ PINSRQ $0x01, 32(SI), X8
+ MOVQ 72(SI), X9
+ PINSRQ $0x01, 104(SI), X9
+ MOVQ 80(SI), X10
+ PINSRQ $0x01, 64(SI), X10
+ MOVQ 120(SI), X11
+ PINSRQ $0x01, 48(SI), X11
+ MOVO X8, 144(R10)
+ MOVO X9, 160(R10)
+ MOVO X10, 176(R10)
+ MOVO X11, 192(R10)
+ PADDQ X8, X0
+ PADDQ X9, X1
+ PADDQ X2, X0
+ PADDQ X3, X1
+ PXOR X0, X6
+ PXOR X1, X7
+ PSHUFD $0xb1, X6, X6
+ PSHUFD $0xb1, X7, X7
+ PADDQ X6, X4
+ PADDQ X7, X5
+ PXOR X4, X2
+ PXOR X5, X3
+ PSHUFB X13, X2
+ PSHUFB X13, X3
+ PADDQ X10, X0
+ PADDQ X11, X1
+ PADDQ X2, X0
+ PADDQ X3, X1
+ PXOR X0, X6
+ PXOR X1, X7
+ PSHUFB X14, X6
+ PSHUFB X14, X7
+ PADDQ X6, X4
+ PADDQ X7, X5
+ PXOR X4, X2
+ PXOR X5, X3
+ MOVOU X2, X11
+ PADDQ X2, X11
+ PSRLQ $0x3f, X2
+ PXOR X11, X2
+ MOVOU X3, X11
+ PADDQ X3, X11
+ PSRLQ $0x3f, X3
+ PXOR X11, X3
+ MOVO X4, X8
+ MOVO X5, X4
+ MOVO X8, X5
+ MOVO X6, X8
+ PUNPCKLQDQ X6, X9
+ PUNPCKHQDQ X7, X6
+ PUNPCKHQDQ X9, X6
+ PUNPCKLQDQ X7, X9
+ MOVO X8, X7
+ MOVO X2, X8
+ PUNPCKHQDQ X9, X7
+ PUNPCKLQDQ X3, X9
+ PUNPCKHQDQ X9, X2
+ PUNPCKLQDQ X8, X9
+ PUNPCKHQDQ X9, X3
+ MOVQ 8(SI), X8
+ PINSRQ $0x01, (SI), X8
+ MOVQ 88(SI), X9
+ PINSRQ $0x01, 40(SI), X9
+ MOVQ 96(SI), X10
+ PINSRQ $0x01, 16(SI), X10
+ MOVQ 56(SI), X11
+ PINSRQ $0x01, 24(SI), X11
+ MOVO X8, 208(R10)
+ MOVO X9, 224(R10)
+ MOVO X10, 240(R10)
+ MOVO X11, 256(R10)
+ PADDQ X8, X0
+ PADDQ X9, X1
+ PADDQ X2, X0
+ PADDQ X3, X1
+ PXOR X0, X6
+ PXOR X1, X7
+ PSHUFD $0xb1, X6, X6
+ PSHUFD $0xb1, X7, X7
+ PADDQ X6, X4
+ PADDQ X7, X5
+ PXOR X4, X2
+ PXOR X5, X3
+ PSHUFB X13, X2
+ PSHUFB X13, X3
+ PADDQ X10, X0
+ PADDQ X11, X1
+ PADDQ X2, X0
+ PADDQ X3, X1
+ PXOR X0, X6
+ PXOR X1, X7
+ PSHUFB X14, X6
+ PSHUFB X14, X7
+ PADDQ X6, X4
+ PADDQ X7, X5
+ PXOR X4, X2
+ PXOR X5, X3
+ MOVOU X2, X11
+ PADDQ X2, X11
+ PSRLQ $0x3f, X2
+ PXOR X11, X2
+ MOVOU X3, X11
+ PADDQ X3, X11
+ PSRLQ $0x3f, X3
+ PXOR X11, X3
+ MOVO X4, X8
+ MOVO X5, X4
+ MOVO X8, X5
+ MOVO X2, X8
+ PUNPCKLQDQ X2, X9
+ PUNPCKHQDQ X3, X2
+ PUNPCKHQDQ X9, X2
+ PUNPCKLQDQ X3, X9
+ MOVO X8, X3
+ MOVO X6, X8
+ PUNPCKHQDQ X9, X3
+ PUNPCKLQDQ X7, X9
+ PUNPCKHQDQ X9, X6
+ PUNPCKLQDQ X8, X9
+ PUNPCKHQDQ X9, X7
+ MOVQ 88(SI), X8
+ PINSRQ $0x01, 96(SI), X8
+ MOVQ 40(SI), X9
+ PINSRQ $0x01, 120(SI), X9
+ MOVQ 64(SI), X10
+ PINSRQ $0x01, (SI), X10
+ MOVQ 16(SI), X11
+ PINSRQ $0x01, 104(SI), X11
+ PADDQ X8, X0
+ PADDQ X9, X1
+ PADDQ X2, X0
+ PADDQ X3, X1
+ PXOR X0, X6
+ PXOR X1, X7
+ PSHUFD $0xb1, X6, X6
+ PSHUFD $0xb1, X7, X7
+ PADDQ X6, X4
+ PADDQ X7, X5
+ PXOR X4, X2
+ PXOR X5, X3
+ PSHUFB X13, X2
+ PSHUFB X13, X3
+ PADDQ X10, X0
+ PADDQ X11, X1
+ PADDQ X2, X0
+ PADDQ X3, X1
+ PXOR X0, X6
+ PXOR X1, X7
+ PSHUFB X14, X6
+ PSHUFB X14, X7
+ PADDQ X6, X4
+ PADDQ X7, X5
+ PXOR X4, X2
+ PXOR X5, X3
+ MOVOU X2, X11
+ PADDQ X2, X11
+ PSRLQ $0x3f, X2
+ PXOR X11, X2
+ MOVOU X3, X11
+ PADDQ X3, X11
+ PSRLQ $0x3f, X3
+ PXOR X11, X3
+ MOVO X4, X8
+ MOVO X5, X4
+ MOVO X8, X5
+ MOVO X6, X8
+ PUNPCKLQDQ X6, X9
+ PUNPCKHQDQ X7, X6
+ PUNPCKHQDQ X9, X6
+ PUNPCKLQDQ X7, X9
+ MOVO X8, X7
+ MOVO X2, X8
+ PUNPCKHQDQ X9, X7
+ PUNPCKLQDQ X3, X9
+ PUNPCKHQDQ X9, X2
+ PUNPCKLQDQ X8, X9
+ PUNPCKHQDQ X9, X3
+ MOVQ 80(SI), X8
+ PINSRQ $0x01, 24(SI), X8
+ MOVQ 56(SI), X9
+ PINSRQ $0x01, 72(SI), X9
+ MOVQ 112(SI), X10
+ PINSRQ $0x01, 48(SI), X10
+ MOVQ 8(SI), X11
+ PINSRQ $0x01, 32(SI), X11
+ PADDQ X8, X0
+ PADDQ X9, X1
+ PADDQ X2, X0
+ PADDQ X3, X1
+ PXOR X0, X6
+ PXOR X1, X7
+ PSHUFD $0xb1, X6, X6
+ PSHUFD $0xb1, X7, X7
+ PADDQ X6, X4
+ PADDQ X7, X5
+ PXOR X4, X2
+ PXOR X5, X3
+ PSHUFB X13, X2
+ PSHUFB X13, X3
+ PADDQ X10, X0
+ PADDQ X11, X1
+ PADDQ X2, X0
+ PADDQ X3, X1
+ PXOR X0, X6
+ PXOR X1, X7
+ PSHUFB X14, X6
+ PSHUFB X14, X7
+ PADDQ X6, X4
+ PADDQ X7, X5
+ PXOR X4, X2
+ PXOR X5, X3
+ MOVOU X2, X11
+ PADDQ X2, X11
+ PSRLQ $0x3f, X2
+ PXOR X11, X2
+ MOVOU X3, X11
+ PADDQ X3, X11
+ PSRLQ $0x3f, X3
+ PXOR X11, X3
+ MOVO X4, X8
+ MOVO X5, X4
+ MOVO X8, X5
+ MOVO X2, X8
+ PUNPCKLQDQ X2, X9
+ PUNPCKHQDQ X3, X2
+ PUNPCKHQDQ X9, X2
+ PUNPCKLQDQ X3, X9
+ MOVO X8, X3
+ MOVO X6, X8
+ PUNPCKHQDQ X9, X3
+ PUNPCKLQDQ X7, X9
+ PUNPCKHQDQ X9, X6
+ PUNPCKLQDQ X8, X9
+ PUNPCKHQDQ X9, X7
+ MOVQ 56(SI), X8
+ PINSRQ $0x01, 24(SI), X8
+ MOVQ 104(SI), X9
+ PINSRQ $0x01, 88(SI), X9
+ MOVQ 72(SI), X10
+ PINSRQ $0x01, 8(SI), X10
+ MOVQ 96(SI), X11
+ PINSRQ $0x01, 112(SI), X11
+ PADDQ X8, X0
+ PADDQ X9, X1
+ PADDQ X2, X0
+ PADDQ X3, X1
+ PXOR X0, X6
+ PXOR X1, X7
+ PSHUFD $0xb1, X6, X6
+ PSHUFD $0xb1, X7, X7
+ PADDQ X6, X4
+ PADDQ X7, X5
+ PXOR X4, X2
+ PXOR X5, X3
+ PSHUFB X13, X2
+ PSHUFB X13, X3
+ PADDQ X10, X0
+ PADDQ X11, X1
+ PADDQ X2, X0
+ PADDQ X3, X1
+ PXOR X0, X6
+ PXOR X1, X7
+ PSHUFB X14, X6
+ PSHUFB X14, X7
+ PADDQ X6, X4
+ PADDQ X7, X5
+ PXOR X4, X2
+ PXOR X5, X3
+ MOVOU X2, X11
+ PADDQ X2, X11
+ PSRLQ $0x3f, X2
+ PXOR X11, X2
+ MOVOU X3, X11
+ PADDQ X3, X11
+ PSRLQ $0x3f, X3
+ PXOR X11, X3
+ MOVO X4, X8
+ MOVO X5, X4
+ MOVO X8, X5
+ MOVO X6, X8
+ PUNPCKLQDQ X6, X9
+ PUNPCKHQDQ X7, X6
+ PUNPCKHQDQ X9, X6
+ PUNPCKLQDQ X7, X9
+ MOVO X8, X7
+ MOVO X2, X8
+ PUNPCKHQDQ X9, X7
+ PUNPCKLQDQ X3, X9
+ PUNPCKHQDQ X9, X2
+ PUNPCKLQDQ X8, X9
+ PUNPCKHQDQ X9, X3
+ MOVQ 16(SI), X8
+ PINSRQ $0x01, 40(SI), X8
+ MOVQ 32(SI), X9
+ PINSRQ $0x01, 120(SI), X9
+ MOVQ 48(SI), X10
+ PINSRQ $0x01, 80(SI), X10
+ MOVQ (SI), X11
+ PINSRQ $0x01, 64(SI), X11
+ PADDQ X8, X0
+ PADDQ X9, X1
+ PADDQ X2, X0
+ PADDQ X3, X1
+ PXOR X0, X6
+ PXOR X1, X7
+ PSHUFD $0xb1, X6, X6
+ PSHUFD $0xb1, X7, X7
+ PADDQ X6, X4
+ PADDQ X7, X5
+ PXOR X4, X2
+ PXOR X5, X3
+ PSHUFB X13, X2
+ PSHUFB X13, X3
+ PADDQ X10, X0
+ PADDQ X11, X1
+ PADDQ X2, X0
+ PADDQ X3, X1
+ PXOR X0, X6
+ PXOR X1, X7
+ PSHUFB X14, X6
+ PSHUFB X14, X7
+ PADDQ X6, X4
+ PADDQ X7, X5
+ PXOR X4, X2
+ PXOR X5, X3
+ MOVOU X2, X11
+ PADDQ X2, X11
+ PSRLQ $0x3f, X2
+ PXOR X11, X2
+ MOVOU X3, X11
+ PADDQ X3, X11
+ PSRLQ $0x3f, X3
+ PXOR X11, X3
+ MOVO X4, X8
+ MOVO X5, X4
+ MOVO X8, X5
+ MOVO X2, X8
+ PUNPCKLQDQ X2, X9
+ PUNPCKHQDQ X3, X2
+ PUNPCKHQDQ X9, X2
+ PUNPCKLQDQ X3, X9
+ MOVO X8, X3
+ MOVO X6, X8
+ PUNPCKHQDQ X9, X3
+ PUNPCKLQDQ X7, X9
+ PUNPCKHQDQ X9, X6
+ PUNPCKLQDQ X8, X9
+ PUNPCKHQDQ X9, X7
+ MOVQ 72(SI), X8
+ PINSRQ $0x01, 40(SI), X8
+ MOVQ 16(SI), X9
+ PINSRQ $0x01, 80(SI), X9
+ MOVQ (SI), X10
+ PINSRQ $0x01, 56(SI), X10
+ MOVQ 32(SI), X11
+ PINSRQ $0x01, 120(SI), X11
+ PADDQ X8, X0
+ PADDQ X9, X1
+ PADDQ X2, X0
+ PADDQ X3, X1
+ PXOR X0, X6
+ PXOR X1, X7
+ PSHUFD $0xb1, X6, X6
+ PSHUFD $0xb1, X7, X7
+ PADDQ X6, X4
+ PADDQ X7, X5
+ PXOR X4, X2
+ PXOR X5, X3
+ PSHUFB X13, X2
+ PSHUFB X13, X3
+ PADDQ X10, X0
+ PADDQ X11, X1
+ PADDQ X2, X0
+ PADDQ X3, X1
+ PXOR X0, X6
+ PXOR X1, X7
+ PSHUFB X14, X6
+ PSHUFB X14, X7
+ PADDQ X6, X4
+ PADDQ X7, X5
+ PXOR X4, X2
+ PXOR X5, X3
+ MOVOU X2, X11
+ PADDQ X2, X11
+ PSRLQ $0x3f, X2
+ PXOR X11, X2
+ MOVOU X3, X11
+ PADDQ X3, X11
+ PSRLQ $0x3f, X3
+ PXOR X11, X3
+ MOVO X4, X8
+ MOVO X5, X4
+ MOVO X8, X5
+ MOVO X6, X8
+ PUNPCKLQDQ X6, X9
+ PUNPCKHQDQ X7, X6
+ PUNPCKHQDQ X9, X6
+ PUNPCKLQDQ X7, X9
+ MOVO X8, X7
+ MOVO X2, X8
+ PUNPCKHQDQ X9, X7
+ PUNPCKLQDQ X3, X9
+ PUNPCKHQDQ X9, X2
+ PUNPCKLQDQ X8, X9
+ PUNPCKHQDQ X9, X3
+ MOVQ 112(SI), X8
+ PINSRQ $0x01, 88(SI), X8
+ MOVQ 48(SI), X9
+ PINSRQ $0x01, 24(SI), X9
+ MOVQ 8(SI), X10
+ PINSRQ $0x01, 96(SI), X10
+ MOVQ 64(SI), X11
+ PINSRQ $0x01, 104(SI), X11
+ PADDQ X8, X0
+ PADDQ X9, X1
+ PADDQ X2, X0
+ PADDQ X3, X1
+ PXOR X0, X6
+ PXOR X1, X7
+ PSHUFD $0xb1, X6, X6
+ PSHUFD $0xb1, X7, X7
+ PADDQ X6, X4
+ PADDQ X7, X5
+ PXOR X4, X2
+ PXOR X5, X3
+ PSHUFB X13, X2
+ PSHUFB X13, X3
+ PADDQ X10, X0
+ PADDQ X11, X1
+ PADDQ X2, X0
+ PADDQ X3, X1
+ PXOR X0, X6
+ PXOR X1, X7
+ PSHUFB X14, X6
+ PSHUFB X14, X7
+ PADDQ X6, X4
+ PADDQ X7, X5
+ PXOR X4, X2
+ PXOR X5, X3
+ MOVOU X2, X11
+ PADDQ X2, X11
+ PSRLQ $0x3f, X2
+ PXOR X11, X2
+ MOVOU X3, X11
+ PADDQ X3, X11
+ PSRLQ $0x3f, X3
+ PXOR X11, X3
+ MOVO X4, X8
+ MOVO X5, X4
+ MOVO X8, X5
+ MOVO X2, X8
+ PUNPCKLQDQ X2, X9
+ PUNPCKHQDQ X3, X2
+ PUNPCKHQDQ X9, X2
+ PUNPCKLQDQ X3, X9
+ MOVO X8, X3
+ MOVO X6, X8
+ PUNPCKHQDQ X9, X3
+ PUNPCKLQDQ X7, X9
+ PUNPCKHQDQ X9, X6
+ PUNPCKLQDQ X8, X9
+ PUNPCKHQDQ X9, X7
+ MOVQ 16(SI), X8
+ PINSRQ $0x01, 48(SI), X8
+ MOVQ (SI), X9
+ PINSRQ $0x01, 64(SI), X9
+ MOVQ 96(SI), X10
+ PINSRQ $0x01, 80(SI), X10
+ MOVQ 88(SI), X11
+ PINSRQ $0x01, 24(SI), X11
+ PADDQ X8, X0
+ PADDQ X9, X1
+ PADDQ X2, X0
+ PADDQ X3, X1
+ PXOR X0, X6
+ PXOR X1, X7
+ PSHUFD $0xb1, X6, X6
+ PSHUFD $0xb1, X7, X7
+ PADDQ X6, X4
+ PADDQ X7, X5
+ PXOR X4, X2
+ PXOR X5, X3
+ PSHUFB X13, X2
+ PSHUFB X13, X3
+ PADDQ X10, X0
+ PADDQ X11, X1
+ PADDQ X2, X0
+ PADDQ X3, X1
+ PXOR X0, X6
+ PXOR X1, X7
+ PSHUFB X14, X6
+ PSHUFB X14, X7
+ PADDQ X6, X4
+ PADDQ X7, X5
+ PXOR X4, X2
+ PXOR X5, X3
+ MOVOU X2, X11
+ PADDQ X2, X11
+ PSRLQ $0x3f, X2
+ PXOR X11, X2
+ MOVOU X3, X11
+ PADDQ X3, X11
+ PSRLQ $0x3f, X3
+ PXOR X11, X3
+ MOVO X4, X8
+ MOVO X5, X4
+ MOVO X8, X5
+ MOVO X6, X8
+ PUNPCKLQDQ X6, X9
+ PUNPCKHQDQ X7, X6
+ PUNPCKHQDQ X9, X6
+ PUNPCKLQDQ X7, X9
+ MOVO X8, X7
+ MOVO X2, X8
+ PUNPCKHQDQ X9, X7
+ PUNPCKLQDQ X3, X9
+ PUNPCKHQDQ X9, X2
+ PUNPCKLQDQ X8, X9
+ PUNPCKHQDQ X9, X3
+ MOVQ 32(SI), X8
+ PINSRQ $0x01, 56(SI), X8
+ MOVQ 120(SI), X9
+ PINSRQ $0x01, 8(SI), X9
+ MOVQ 104(SI), X10
+ PINSRQ $0x01, 40(SI), X10
+ MOVQ 112(SI), X11
+ PINSRQ $0x01, 72(SI), X11
+ PADDQ X8, X0
+ PADDQ X9, X1
+ PADDQ X2, X0
+ PADDQ X3, X1
+ PXOR X0, X6
+ PXOR X1, X7
+ PSHUFD $0xb1, X6, X6
+ PSHUFD $0xb1, X7, X7
+ PADDQ X6, X4
+ PADDQ X7, X5
+ PXOR X4, X2
+ PXOR X5, X3
+ PSHUFB X13, X2
+ PSHUFB X13, X3
+ PADDQ X10, X0
+ PADDQ X11, X1
+ PADDQ X2, X0
+ PADDQ X3, X1
+ PXOR X0, X6
+ PXOR X1, X7
+ PSHUFB X14, X6
+ PSHUFB X14, X7
+ PADDQ X6, X4
+ PADDQ X7, X5
+ PXOR X4, X2
+ PXOR X5, X3
+ MOVOU X2, X11
+ PADDQ X2, X11
+ PSRLQ $0x3f, X2
+ PXOR X11, X2
+ MOVOU X3, X11
+ PADDQ X3, X11
+ PSRLQ $0x3f, X3
+ PXOR X11, X3
+ MOVO X4, X8
+ MOVO X5, X4
+ MOVO X8, X5
+ MOVO X2, X8
+ PUNPCKLQDQ X2, X9
+ PUNPCKHQDQ X3, X2
+ PUNPCKHQDQ X9, X2
+ PUNPCKLQDQ X3, X9
+ MOVO X8, X3
+ MOVO X6, X8
+ PUNPCKHQDQ X9, X3
+ PUNPCKLQDQ X7, X9
+ PUNPCKHQDQ X9, X6
+ PUNPCKLQDQ X8, X9
+ PUNPCKHQDQ X9, X7
+ MOVQ 96(SI), X8
+ PINSRQ $0x01, 8(SI), X8
+ MOVQ 112(SI), X9
+ PINSRQ $0x01, 32(SI), X9
+ MOVQ 40(SI), X10
+ PINSRQ $0x01, 120(SI), X10
+ MOVQ 104(SI), X11
+ PINSRQ $0x01, 80(SI), X11
+ PADDQ X8, X0
+ PADDQ X9, X1
+ PADDQ X2, X0
+ PADDQ X3, X1
+ PXOR X0, X6
+ PXOR X1, X7
+ PSHUFD $0xb1, X6, X6
+ PSHUFD $0xb1, X7, X7
+ PADDQ X6, X4
+ PADDQ X7, X5
+ PXOR X4, X2
+ PXOR X5, X3
+ PSHUFB X13, X2
+ PSHUFB X13, X3
+ PADDQ X10, X0
+ PADDQ X11, X1
+ PADDQ X2, X0
+ PADDQ X3, X1
+ PXOR X0, X6
+ PXOR X1, X7
+ PSHUFB X14, X6
+ PSHUFB X14, X7
+ PADDQ X6, X4
+ PADDQ X7, X5
+ PXOR X4, X2
+ PXOR X5, X3
+ MOVOU X2, X11
+ PADDQ X2, X11
+ PSRLQ $0x3f, X2
+ PXOR X11, X2
+ MOVOU X3, X11
+ PADDQ X3, X11
+ PSRLQ $0x3f, X3
+ PXOR X11, X3
+ MOVO X4, X8
+ MOVO X5, X4
+ MOVO X8, X5
+ MOVO X6, X8
+ PUNPCKLQDQ X6, X9
+ PUNPCKHQDQ X7, X6
+ PUNPCKHQDQ X9, X6
+ PUNPCKLQDQ X7, X9
+ MOVO X8, X7
+ MOVO X2, X8
+ PUNPCKHQDQ X9, X7
+ PUNPCKLQDQ X3, X9
+ PUNPCKHQDQ X9, X2
+ PUNPCKLQDQ X8, X9
+ PUNPCKHQDQ X9, X3
+ MOVQ (SI), X8
+ PINSRQ $0x01, 48(SI), X8
+ MOVQ 72(SI), X9
+ PINSRQ $0x01, 64(SI), X9
+ MOVQ 56(SI), X10
+ PINSRQ $0x01, 24(SI), X10
+ MOVQ 16(SI), X11
+ PINSRQ $0x01, 88(SI), X11
+ PADDQ X8, X0
+ PADDQ X9, X1
+ PADDQ X2, X0
+ PADDQ X3, X1
+ PXOR X0, X6
+ PXOR X1, X7
+ PSHUFD $0xb1, X6, X6
+ PSHUFD $0xb1, X7, X7
+ PADDQ X6, X4
+ PADDQ X7, X5
+ PXOR X4, X2
+ PXOR X5, X3
+ PSHUFB X13, X2
+ PSHUFB X13, X3
+ PADDQ X10, X0
+ PADDQ X11, X1
+ PADDQ X2, X0
+ PADDQ X3, X1
+ PXOR X0, X6
+ PXOR X1, X7
+ PSHUFB X14, X6
+ PSHUFB X14, X7
+ PADDQ X6, X4
+ PADDQ X7, X5
+ PXOR X4, X2
+ PXOR X5, X3
+ MOVOU X2, X11
+ PADDQ X2, X11
+ PSRLQ $0x3f, X2
+ PXOR X11, X2
+ MOVOU X3, X11
+ PADDQ X3, X11
+ PSRLQ $0x3f, X3
+ PXOR X11, X3
+ MOVO X4, X8
+ MOVO X5, X4
+ MOVO X8, X5
+ MOVO X2, X8
+ PUNPCKLQDQ X2, X9
+ PUNPCKHQDQ X3, X2
+ PUNPCKHQDQ X9, X2
+ PUNPCKLQDQ X3, X9
+ MOVO X8, X3
+ MOVO X6, X8
+ PUNPCKHQDQ X9, X3
+ PUNPCKLQDQ X7, X9
+ PUNPCKHQDQ X9, X6
+ PUNPCKLQDQ X8, X9
+ PUNPCKHQDQ X9, X7
+ MOVQ 104(SI), X8
+ PINSRQ $0x01, 56(SI), X8
+ MOVQ 96(SI), X9
+ PINSRQ $0x01, 24(SI), X9
+ MOVQ 88(SI), X10
+ PINSRQ $0x01, 112(SI), X10
+ MOVQ 8(SI), X11
+ PINSRQ $0x01, 72(SI), X11
+ PADDQ X8, X0
+ PADDQ X9, X1
+ PADDQ X2, X0
+ PADDQ X3, X1
+ PXOR X0, X6
+ PXOR X1, X7
+ PSHUFD $0xb1, X6, X6
+ PSHUFD $0xb1, X7, X7
+ PADDQ X6, X4
+ PADDQ X7, X5
+ PXOR X4, X2
+ PXOR X5, X3
+ PSHUFB X13, X2
+ PSHUFB X13, X3
+ PADDQ X10, X0
+ PADDQ X11, X1
+ PADDQ X2, X0
+ PADDQ X3, X1
+ PXOR X0, X6
+ PXOR X1, X7
+ PSHUFB X14, X6
+ PSHUFB X14, X7
+ PADDQ X6, X4
+ PADDQ X7, X5
+ PXOR X4, X2
+ PXOR X5, X3
+ MOVOU X2, X11
+ PADDQ X2, X11
+ PSRLQ $0x3f, X2
+ PXOR X11, X2
+ MOVOU X3, X11
+ PADDQ X3, X11
+ PSRLQ $0x3f, X3
+ PXOR X11, X3
+ MOVO X4, X8
+ MOVO X5, X4
+ MOVO X8, X5
+ MOVO X6, X8
+ PUNPCKLQDQ X6, X9
+ PUNPCKHQDQ X7, X6
+ PUNPCKHQDQ X9, X6
+ PUNPCKLQDQ X7, X9
+ MOVO X8, X7
+ MOVO X2, X8
+ PUNPCKHQDQ X9, X7
+ PUNPCKLQDQ X3, X9
+ PUNPCKHQDQ X9, X2
+ PUNPCKLQDQ X8, X9
+ PUNPCKHQDQ X9, X3
+ MOVQ 40(SI), X8
+ PINSRQ $0x01, 120(SI), X8
+ MOVQ 64(SI), X9
+ PINSRQ $0x01, 16(SI), X9
+ MOVQ (SI), X10
+ PINSRQ $0x01, 32(SI), X10
+ MOVQ 48(SI), X11
+ PINSRQ $0x01, 80(SI), X11
+ PADDQ X8, X0
+ PADDQ X9, X1
+ PADDQ X2, X0
+ PADDQ X3, X1
+ PXOR X0, X6
+ PXOR X1, X7
+ PSHUFD $0xb1, X6, X6
+ PSHUFD $0xb1, X7, X7
+ PADDQ X6, X4
+ PADDQ X7, X5
+ PXOR X4, X2
+ PXOR X5, X3
+ PSHUFB X13, X2
+ PSHUFB X13, X3
+ PADDQ X10, X0
+ PADDQ X11, X1
+ PADDQ X2, X0
+ PADDQ X3, X1
+ PXOR X0, X6
+ PXOR X1, X7
+ PSHUFB X14, X6
+ PSHUFB X14, X7
+ PADDQ X6, X4
+ PADDQ X7, X5
+ PXOR X4, X2
+ PXOR X5, X3
+ MOVOU X2, X11
+ PADDQ X2, X11
+ PSRLQ $0x3f, X2
+ PXOR X11, X2
+ MOVOU X3, X11
+ PADDQ X3, X11
+ PSRLQ $0x3f, X3
+ PXOR X11, X3
+ MOVO X4, X8
+ MOVO X5, X4
+ MOVO X8, X5
+ MOVO X2, X8
+ PUNPCKLQDQ X2, X9
+ PUNPCKHQDQ X3, X2
+ PUNPCKHQDQ X9, X2
+ PUNPCKLQDQ X3, X9
+ MOVO X8, X3
+ MOVO X6, X8
+ PUNPCKHQDQ X9, X3
+ PUNPCKLQDQ X7, X9
+ PUNPCKHQDQ X9, X6
+ PUNPCKLQDQ X8, X9
+ PUNPCKHQDQ X9, X7
+ MOVQ 48(SI), X8
+ PINSRQ $0x01, 112(SI), X8
+ MOVQ 88(SI), X9
+ PINSRQ $0x01, (SI), X9
+ MOVQ 120(SI), X10
+ PINSRQ $0x01, 72(SI), X10
+ MOVQ 24(SI), X11
+ PINSRQ $0x01, 64(SI), X11
+ PADDQ X8, X0
+ PADDQ X9, X1
+ PADDQ X2, X0
+ PADDQ X3, X1
+ PXOR X0, X6
+ PXOR X1, X7
+ PSHUFD $0xb1, X6, X6
+ PSHUFD $0xb1, X7, X7
+ PADDQ X6, X4
+ PADDQ X7, X5
+ PXOR X4, X2
+ PXOR X5, X3
+ PSHUFB X13, X2
+ PSHUFB X13, X3
+ PADDQ X10, X0
+ PADDQ X11, X1
+ PADDQ X2, X0
+ PADDQ X3, X1
+ PXOR X0, X6
+ PXOR X1, X7
+ PSHUFB X14, X6
+ PSHUFB X14, X7
+ PADDQ X6, X4
+ PADDQ X7, X5
+ PXOR X4, X2
+ PXOR X5, X3
+ MOVOU X2, X11
+ PADDQ X2, X11
+ PSRLQ $0x3f, X2
+ PXOR X11, X2
+ MOVOU X3, X11
+ PADDQ X3, X11
+ PSRLQ $0x3f, X3
+ PXOR X11, X3
+ MOVO X4, X8
+ MOVO X5, X4
+ MOVO X8, X5
+ MOVO X6, X8
+ PUNPCKLQDQ X6, X9
+ PUNPCKHQDQ X7, X6
+ PUNPCKHQDQ X9, X6
+ PUNPCKLQDQ X7, X9
+ MOVO X8, X7
+ MOVO X2, X8
+ PUNPCKHQDQ X9, X7
+ PUNPCKLQDQ X3, X9
+ PUNPCKHQDQ X9, X2
+ PUNPCKLQDQ X8, X9
+ PUNPCKHQDQ X9, X3
+ MOVQ 96(SI), X8
+ PINSRQ $0x01, 104(SI), X8
+ MOVQ 8(SI), X9
+ PINSRQ $0x01, 80(SI), X9
+ MOVQ 16(SI), X10
+ PINSRQ $0x01, 56(SI), X10
+ MOVQ 32(SI), X11
+ PINSRQ $0x01, 40(SI), X11
+ PADDQ X8, X0
+ PADDQ X9, X1
+ PADDQ X2, X0
+ PADDQ X3, X1
+ PXOR X0, X6
+ PXOR X1, X7
+ PSHUFD $0xb1, X6, X6
+ PSHUFD $0xb1, X7, X7
+ PADDQ X6, X4
+ PADDQ X7, X5
+ PXOR X4, X2
+ PXOR X5, X3
+ PSHUFB X13, X2
+ PSHUFB X13, X3
+ PADDQ X10, X0
+ PADDQ X11, X1
+ PADDQ X2, X0
+ PADDQ X3, X1
+ PXOR X0, X6
+ PXOR X1, X7
+ PSHUFB X14, X6
+ PSHUFB X14, X7
+ PADDQ X6, X4
+ PADDQ X7, X5
+ PXOR X4, X2
+ PXOR X5, X3
+ MOVOU X2, X11
+ PADDQ X2, X11
+ PSRLQ $0x3f, X2
+ PXOR X11, X2
+ MOVOU X3, X11
+ PADDQ X3, X11
+ PSRLQ $0x3f, X3
+ PXOR X11, X3
+ MOVO X4, X8
+ MOVO X5, X4
+ MOVO X8, X5
+ MOVO X2, X8
+ PUNPCKLQDQ X2, X9
+ PUNPCKHQDQ X3, X2
+ PUNPCKHQDQ X9, X2
+ PUNPCKLQDQ X3, X9
+ MOVO X8, X3
+ MOVO X6, X8
+ PUNPCKHQDQ X9, X3
+ PUNPCKLQDQ X7, X9
+ PUNPCKHQDQ X9, X6
+ PUNPCKLQDQ X8, X9
+ PUNPCKHQDQ X9, X7
+ MOVQ 80(SI), X8
+ PINSRQ $0x01, 64(SI), X8
+ MOVQ 56(SI), X9
+ PINSRQ $0x01, 8(SI), X9
+ MOVQ 16(SI), X10
+ PINSRQ $0x01, 32(SI), X10
+ MOVQ 48(SI), X11
+ PINSRQ $0x01, 40(SI), X11
+ PADDQ X8, X0
+ PADDQ X9, X1
+ PADDQ X2, X0
+ PADDQ X3, X1
+ PXOR X0, X6
+ PXOR X1, X7
+ PSHUFD $0xb1, X6, X6
+ PSHUFD $0xb1, X7, X7
+ PADDQ X6, X4
+ PADDQ X7, X5
+ PXOR X4, X2
+ PXOR X5, X3
+ PSHUFB X13, X2
+ PSHUFB X13, X3
+ PADDQ X10, X0
+ PADDQ X11, X1
+ PADDQ X2, X0
+ PADDQ X3, X1
+ PXOR X0, X6
+ PXOR X1, X7
+ PSHUFB X14, X6
+ PSHUFB X14, X7
+ PADDQ X6, X4
+ PADDQ X7, X5
+ PXOR X4, X2
+ PXOR X5, X3
+ MOVOU X2, X11
+ PADDQ X2, X11
+ PSRLQ $0x3f, X2
+ PXOR X11, X2
+ MOVOU X3, X11
+ PADDQ X3, X11
+ PSRLQ $0x3f, X3
+ PXOR X11, X3
+ MOVO X4, X8
+ MOVO X5, X4
+ MOVO X8, X5
+ MOVO X6, X8
+ PUNPCKLQDQ X6, X9
+ PUNPCKHQDQ X7, X6
+ PUNPCKHQDQ X9, X6
+ PUNPCKLQDQ X7, X9
+ MOVO X8, X7
+ MOVO X2, X8
+ PUNPCKHQDQ X9, X7
+ PUNPCKLQDQ X3, X9
+ PUNPCKHQDQ X9, X2
+ PUNPCKLQDQ X8, X9
+ PUNPCKHQDQ X9, X3
+ MOVQ 120(SI), X8
+ PINSRQ $0x01, 72(SI), X8
+ MOVQ 24(SI), X9
+ PINSRQ $0x01, 104(SI), X9
+ MOVQ 88(SI), X10
+ PINSRQ $0x01, 112(SI), X10
+ MOVQ 96(SI), X11
+ PINSRQ $0x01, (SI), X11
+ PADDQ X8, X0
+ PADDQ X9, X1
+ PADDQ X2, X0
+ PADDQ X3, X1
+ PXOR X0, X6
+ PXOR X1, X7
+ PSHUFD $0xb1, X6, X6
+ PSHUFD $0xb1, X7, X7
+ PADDQ X6, X4
+ PADDQ X7, X5
+ PXOR X4, X2
+ PXOR X5, X3
+ PSHUFB X13, X2
+ PSHUFB X13, X3
+ PADDQ X10, X0
+ PADDQ X11, X1
+ PADDQ X2, X0
+ PADDQ X3, X1
+ PXOR X0, X6
+ PXOR X1, X7
+ PSHUFB X14, X6
+ PSHUFB X14, X7
+ PADDQ X6, X4
+ PADDQ X7, X5
+ PXOR X4, X2
+ PXOR X5, X3
+ MOVOU X2, X11
+ PADDQ X2, X11
+ PSRLQ $0x3f, X2
+ PXOR X11, X2
+ MOVOU X3, X11
+ PADDQ X3, X11
+ PSRLQ $0x3f, X3
+ PXOR X11, X3
+ MOVO X4, X8
+ MOVO X5, X4
+ MOVO X8, X5
+ MOVO X2, X8
+ PUNPCKLQDQ X2, X9
+ PUNPCKHQDQ X3, X2
+ PUNPCKHQDQ X9, X2
+ PUNPCKLQDQ X3, X9
+ MOVO X8, X3
+ MOVO X6, X8
+ PUNPCKHQDQ X9, X3
+ PUNPCKLQDQ X7, X9
+ PUNPCKHQDQ X9, X6
+ PUNPCKLQDQ X8, X9
+ PUNPCKHQDQ X9, X7
+ PADDQ 16(R10), X0
+ PADDQ 32(R10), X1
+ PADDQ X2, X0
+ PADDQ X3, X1
+ PXOR X0, X6
+ PXOR X1, X7
+ PSHUFD $0xb1, X6, X6
+ PSHUFD $0xb1, X7, X7
+ PADDQ X6, X4
+ PADDQ X7, X5
+ PXOR X4, X2
+ PXOR X5, X3
+ PSHUFB X13, X2
+ PSHUFB X13, X3
+ PADDQ 48(R10), X0
+ PADDQ 64(R10), X1
+ PADDQ X2, X0
+ PADDQ X3, X1
+ PXOR X0, X6
+ PXOR X1, X7
+ PSHUFB X14, X6
+ PSHUFB X14, X7
+ PADDQ X6, X4
+ PADDQ X7, X5
+ PXOR X4, X2
+ PXOR X5, X3
+ MOVOU X2, X11
+ PADDQ X2, X11
+ PSRLQ $0x3f, X2
+ PXOR X11, X2
+ MOVOU X3, X11
+ PADDQ X3, X11
+ PSRLQ $0x3f, X3
+ PXOR X11, X3
+ MOVO X4, X8
+ MOVO X5, X4
+ MOVO X8, X5
+ MOVO X6, X8
+ PUNPCKLQDQ X6, X9
+ PUNPCKHQDQ X7, X6
+ PUNPCKHQDQ X9, X6
+ PUNPCKLQDQ X7, X9
+ MOVO X8, X7
+ MOVO X2, X8
+ PUNPCKHQDQ X9, X7
+ PUNPCKLQDQ X3, X9
+ PUNPCKHQDQ X9, X2
+ PUNPCKLQDQ X8, X9
+ PUNPCKHQDQ X9, X3
+ PADDQ 80(R10), X0
+ PADDQ 96(R10), X1
+ PADDQ X2, X0
+ PADDQ X3, X1
+ PXOR X0, X6
+ PXOR X1, X7
+ PSHUFD $0xb1, X6, X6
+ PSHUFD $0xb1, X7, X7
+ PADDQ X6, X4
+ PADDQ X7, X5
+ PXOR X4, X2
+ PXOR X5, X3
+ PSHUFB X13, X2
+ PSHUFB X13, X3
+ PADDQ 112(R10), X0
+ PADDQ 128(R10), X1
+ PADDQ X2, X0
+ PADDQ X3, X1
+ PXOR X0, X6
+ PXOR X1, X7
+ PSHUFB X14, X6
+ PSHUFB X14, X7
+ PADDQ X6, X4
+ PADDQ X7, X5
+ PXOR X4, X2
+ PXOR X5, X3
+ MOVOU X2, X11
+ PADDQ X2, X11
+ PSRLQ $0x3f, X2
+ PXOR X11, X2
+ MOVOU X3, X11
+ PADDQ X3, X11
+ PSRLQ $0x3f, X3
+ PXOR X11, X3
+ MOVO X4, X8
+ MOVO X5, X4
+ MOVO X8, X5
+ MOVO X2, X8
+ PUNPCKLQDQ X2, X9
+ PUNPCKHQDQ X3, X2
+ PUNPCKHQDQ X9, X2
+ PUNPCKLQDQ X3, X9
+ MOVO X8, X3
+ MOVO X6, X8
+ PUNPCKHQDQ X9, X3
+ PUNPCKLQDQ X7, X9
+ PUNPCKHQDQ X9, X6
+ PUNPCKLQDQ X8, X9
+ PUNPCKHQDQ X9, X7
+ PADDQ 144(R10), X0
+ PADDQ 160(R10), X1
+ PADDQ X2, X0
+ PADDQ X3, X1
+ PXOR X0, X6
+ PXOR X1, X7
+ PSHUFD $0xb1, X6, X6
+ PSHUFD $0xb1, X7, X7
+ PADDQ X6, X4
+ PADDQ X7, X5
+ PXOR X4, X2
+ PXOR X5, X3
+ PSHUFB X13, X2
+ PSHUFB X13, X3
+ PADDQ 176(R10), X0
+ PADDQ 192(R10), X1
+ PADDQ X2, X0
+ PADDQ X3, X1
+ PXOR X0, X6
+ PXOR X1, X7
+ PSHUFB X14, X6
+ PSHUFB X14, X7
+ PADDQ X6, X4
+ PADDQ X7, X5
+ PXOR X4, X2
+ PXOR X5, X3
+ MOVOU X2, X11
+ PADDQ X2, X11
+ PSRLQ $0x3f, X2
+ PXOR X11, X2
+ MOVOU X3, X11
+ PADDQ X3, X11
+ PSRLQ $0x3f, X3
+ PXOR X11, X3
+ MOVO X4, X8
+ MOVO X5, X4
+ MOVO X8, X5
+ MOVO X6, X8
+ PUNPCKLQDQ X6, X9
+ PUNPCKHQDQ X7, X6
+ PUNPCKHQDQ X9, X6
+ PUNPCKLQDQ X7, X9
+ MOVO X8, X7
+ MOVO X2, X8
+ PUNPCKHQDQ X9, X7
+ PUNPCKLQDQ X3, X9
+ PUNPCKHQDQ X9, X2
+ PUNPCKLQDQ X8, X9
+ PUNPCKHQDQ X9, X3
+ PADDQ 208(R10), X0
+ PADDQ 224(R10), X1
+ PADDQ X2, X0
+ PADDQ X3, X1
+ PXOR X0, X6
+ PXOR X1, X7
+ PSHUFD $0xb1, X6, X6
+ PSHUFD $0xb1, X7, X7
+ PADDQ X6, X4
+ PADDQ X7, X5
+ PXOR X4, X2
+ PXOR X5, X3
+ PSHUFB X13, X2
+ PSHUFB X13, X3
+ PADDQ 240(R10), X0
+ PADDQ 256(R10), X1
+ PADDQ X2, X0
+ PADDQ X3, X1
+ PXOR X0, X6
+ PXOR X1, X7
+ PSHUFB X14, X6
+ PSHUFB X14, X7
+ PADDQ X6, X4
+ PADDQ X7, X5
+ PXOR X4, X2
+ PXOR X5, X3
+ MOVOU X2, X11
+ PADDQ X2, X11
+ PSRLQ $0x3f, X2
+ PXOR X11, X2
+ MOVOU X3, X11
+ PADDQ X3, X11
+ PSRLQ $0x3f, X3
+ PXOR X11, X3
+ MOVO X4, X8
+ MOVO X5, X4
+ MOVO X8, X5
+ MOVO X2, X8
+ PUNPCKLQDQ X2, X9
+ PUNPCKHQDQ X3, X2
+ PUNPCKHQDQ X9, X2
+ PUNPCKLQDQ X3, X9
+ MOVO X8, X3
+ MOVO X6, X8
+ PUNPCKHQDQ X9, X3
+ PUNPCKLQDQ X7, X9
+ PUNPCKHQDQ X9, X6
+ PUNPCKLQDQ X8, X9
+ PUNPCKHQDQ X9, X7
+ MOVOU 32(AX), X10
+ MOVOU 48(AX), X11
+ PXOR X0, X12
+ PXOR X1, X15
+ PXOR X2, X10
+ PXOR X3, X11
+ PXOR X4, X12
+ PXOR X5, X15
+ PXOR X6, X10
+ PXOR X7, X11
+ MOVOU X10, 32(AX)
+ MOVOU X11, 48(AX)
+ LEAQ 128(SI), SI
+ SUBQ $0x80, DI
+ JNE loop
+ MOVOU X12, (AX)
+ MOVOU X15, 16(AX)
+ MOVQ R8, (BX)
+ MOVQ R9, 8(BX)
+ RET
- HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, 144(R10), 160(R10), 176(R10), 192(R10), X11, X13, X14)
- SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9)
- HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, 208(R10), 224(R10), 240(R10), 256(R10), X11, X13, X14)
- SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9)
+DATA ·iv3<>+0(SB)/8, $0x1f83d9abfb41bd6b
+DATA ·iv3<>+8(SB)/8, $0x5be0cd19137e2179
+GLOBL ·iv3<>(SB), RODATA|NOPTR, $16
- MOVOU 32(AX), X10
- MOVOU 48(AX), X11
- PXOR X0, X12
- PXOR X1, X15
- PXOR X2, X10
- PXOR X3, X11
- PXOR X4, X12
- PXOR X5, X15
- PXOR X6, X10
- PXOR X7, X11
- MOVOU X10, 32(AX)
- MOVOU X11, 48(AX)
+DATA ·c40<>+0(SB)/8, $0x0201000706050403
+DATA ·c40<>+8(SB)/8, $0x0a09080f0e0d0c0b
+GLOBL ·c40<>(SB), RODATA|NOPTR, $16
- LEAQ 128(SI), SI
- SUBQ $128, DI
- JNE loop
+DATA ·c48<>+0(SB)/8, $0x0100070605040302
+DATA ·c48<>+8(SB)/8, $0x09080f0e0d0c0b0a
+GLOBL ·c48<>(SB), RODATA|NOPTR, $16
- MOVOU X12, 0(AX)
- MOVOU X15, 16(AX)
+DATA ·iv0<>+0(SB)/8, $0x6a09e667f3bcc908
+DATA ·iv0<>+8(SB)/8, $0xbb67ae8584caa73b
+GLOBL ·iv0<>(SB), RODATA|NOPTR, $16
- MOVQ R8, 0(BX)
- MOVQ R9, 8(BX)
+DATA ·iv1<>+0(SB)/8, $0x3c6ef372fe94f82b
+DATA ·iv1<>+8(SB)/8, $0xa54ff53a5f1d36f1
+GLOBL ·iv1<>(SB), RODATA|NOPTR, $16
- RET
+DATA ·iv2<>+0(SB)/8, $0x510e527fade682d1
+DATA ·iv2<>+8(SB)/8, $0x9b05688c2b3e6c1f
+GLOBL ·iv2<>(SB), RODATA|NOPTR, $16
diff --git a/vendor/golang.org/x/crypto/cast5/cast5.go b/vendor/golang.org/x/crypto/cast5/cast5.go
index 425e8eecb0..016e90215c 100644
--- a/vendor/golang.org/x/crypto/cast5/cast5.go
+++ b/vendor/golang.org/x/crypto/cast5/cast5.go
@@ -11,7 +11,7 @@
// Deprecated: any new system should use AES (from crypto/aes, if necessary in
// an AEAD mode like crypto/cipher.NewGCM) or XChaCha20-Poly1305 (from
// golang.org/x/crypto/chacha20poly1305).
-package cast5 // import "golang.org/x/crypto/cast5"
+package cast5
import (
"errors"
diff --git a/vendor/golang.org/x/crypto/cryptobyte/asn1/asn1.go b/vendor/golang.org/x/crypto/cryptobyte/asn1/asn1.go
index cda8e3edfd..90ef6a241d 100644
--- a/vendor/golang.org/x/crypto/cryptobyte/asn1/asn1.go
+++ b/vendor/golang.org/x/crypto/cryptobyte/asn1/asn1.go
@@ -4,7 +4,7 @@
// Package asn1 contains supporting types for parsing and building ASN.1
// messages with the cryptobyte package.
-package asn1 // import "golang.org/x/crypto/cryptobyte/asn1"
+package asn1
// Tag represents an ASN.1 identifier octet, consisting of a tag number
// (indicating a type) and class (such as context-specific or constructed).
diff --git a/vendor/golang.org/x/crypto/cryptobyte/string.go b/vendor/golang.org/x/crypto/cryptobyte/string.go
index 10692a8a31..4b0f8097f9 100644
--- a/vendor/golang.org/x/crypto/cryptobyte/string.go
+++ b/vendor/golang.org/x/crypto/cryptobyte/string.go
@@ -15,7 +15,7 @@
//
// See the documentation and examples for the Builder and String types to get
// started.
-package cryptobyte // import "golang.org/x/crypto/cryptobyte"
+package cryptobyte
// String represents a string of bytes. It provides methods for parsing
// fixed-length and length-prefixed values from it.
diff --git a/vendor/golang.org/x/crypto/ed25519/ed25519.go b/vendor/golang.org/x/crypto/ed25519/ed25519.go
index a7828345fc..59b3a95a7d 100644
--- a/vendor/golang.org/x/crypto/ed25519/ed25519.go
+++ b/vendor/golang.org/x/crypto/ed25519/ed25519.go
@@ -11,9 +11,7 @@
// operations with the same key more efficient. This package refers to the RFC
// 8032 private key as the “seed”.
//
-// Beginning with Go 1.13, the functionality of this package was moved to the
-// standard library as crypto/ed25519. This package only acts as a compatibility
-// wrapper.
+// This package is a wrapper around the standard library crypto/ed25519 package.
package ed25519
import (
diff --git a/vendor/golang.org/x/crypto/hkdf/hkdf.go b/vendor/golang.org/x/crypto/hkdf/hkdf.go
index f4ded5fee2..3bee66294e 100644
--- a/vendor/golang.org/x/crypto/hkdf/hkdf.go
+++ b/vendor/golang.org/x/crypto/hkdf/hkdf.go
@@ -8,7 +8,7 @@
// HKDF is a cryptographic key derivation function (KDF) with the goal of
// expanding limited input keying material into one or more cryptographically
// strong secret keys.
-package hkdf // import "golang.org/x/crypto/hkdf"
+package hkdf
import (
"crypto/hmac"
diff --git a/vendor/golang.org/x/crypto/internal/poly1305/mac_noasm.go b/vendor/golang.org/x/crypto/internal/poly1305/mac_noasm.go
index 333da285b3..bd896bdc76 100644
--- a/vendor/golang.org/x/crypto/internal/poly1305/mac_noasm.go
+++ b/vendor/golang.org/x/crypto/internal/poly1305/mac_noasm.go
@@ -2,7 +2,7 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
-//go:build (!amd64 && !ppc64le && !s390x) || !gc || purego
+//go:build (!amd64 && !ppc64le && !ppc64 && !s390x) || !gc || purego
package poly1305
diff --git a/vendor/golang.org/x/crypto/internal/poly1305/sum_amd64.s b/vendor/golang.org/x/crypto/internal/poly1305/sum_amd64.s
index e0d3c64756..133757384b 100644
--- a/vendor/golang.org/x/crypto/internal/poly1305/sum_amd64.s
+++ b/vendor/golang.org/x/crypto/internal/poly1305/sum_amd64.s
@@ -1,108 +1,93 @@
-// Copyright 2012 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
+// Code generated by command: go run sum_amd64_asm.go -out ../sum_amd64.s -pkg poly1305. DO NOT EDIT.
//go:build gc && !purego
-#include "textflag.h"
-
-#define POLY1305_ADD(msg, h0, h1, h2) \
- ADDQ 0(msg), h0; \
- ADCQ 8(msg), h1; \
- ADCQ $1, h2; \
- LEAQ 16(msg), msg
-
-#define POLY1305_MUL(h0, h1, h2, r0, r1, t0, t1, t2, t3) \
- MOVQ r0, AX; \
- MULQ h0; \
- MOVQ AX, t0; \
- MOVQ DX, t1; \
- MOVQ r0, AX; \
- MULQ h1; \
- ADDQ AX, t1; \
- ADCQ $0, DX; \
- MOVQ r0, t2; \
- IMULQ h2, t2; \
- ADDQ DX, t2; \
- \
- MOVQ r1, AX; \
- MULQ h0; \
- ADDQ AX, t1; \
- ADCQ $0, DX; \
- MOVQ DX, h0; \
- MOVQ r1, t3; \
- IMULQ h2, t3; \
- MOVQ r1, AX; \
- MULQ h1; \
- ADDQ AX, t2; \
- ADCQ DX, t3; \
- ADDQ h0, t2; \
- ADCQ $0, t3; \
- \
- MOVQ t0, h0; \
- MOVQ t1, h1; \
- MOVQ t2, h2; \
- ANDQ $3, h2; \
- MOVQ t2, t0; \
- ANDQ $0xFFFFFFFFFFFFFFFC, t0; \
- ADDQ t0, h0; \
- ADCQ t3, h1; \
- ADCQ $0, h2; \
- SHRQ $2, t3, t2; \
- SHRQ $2, t3; \
- ADDQ t2, h0; \
- ADCQ t3, h1; \
- ADCQ $0, h2
-
-// func update(state *[7]uint64, msg []byte)
+// func update(state *macState, msg []byte)
TEXT ·update(SB), $0-32
MOVQ state+0(FP), DI
MOVQ msg_base+8(FP), SI
MOVQ msg_len+16(FP), R15
-
- MOVQ 0(DI), R8 // h0
- MOVQ 8(DI), R9 // h1
- MOVQ 16(DI), R10 // h2
- MOVQ 24(DI), R11 // r0
- MOVQ 32(DI), R12 // r1
-
- CMPQ R15, $16
+ MOVQ (DI), R8
+ MOVQ 8(DI), R9
+ MOVQ 16(DI), R10
+ MOVQ 24(DI), R11
+ MOVQ 32(DI), R12
+ CMPQ R15, $0x10
JB bytes_between_0_and_15
loop:
- POLY1305_ADD(SI, R8, R9, R10)
+ ADDQ (SI), R8
+ ADCQ 8(SI), R9
+ ADCQ $0x01, R10
+ LEAQ 16(SI), SI
multiply:
- POLY1305_MUL(R8, R9, R10, R11, R12, BX, CX, R13, R14)
- SUBQ $16, R15
- CMPQ R15, $16
- JAE loop
+ MOVQ R11, AX
+ MULQ R8
+ MOVQ AX, BX
+ MOVQ DX, CX
+ MOVQ R11, AX
+ MULQ R9
+ ADDQ AX, CX
+ ADCQ $0x00, DX
+ MOVQ R11, R13
+ IMULQ R10, R13
+ ADDQ DX, R13
+ MOVQ R12, AX
+ MULQ R8
+ ADDQ AX, CX
+ ADCQ $0x00, DX
+ MOVQ DX, R8
+ MOVQ R12, R14
+ IMULQ R10, R14
+ MOVQ R12, AX
+ MULQ R9
+ ADDQ AX, R13
+ ADCQ DX, R14
+ ADDQ R8, R13
+ ADCQ $0x00, R14
+ MOVQ BX, R8
+ MOVQ CX, R9
+ MOVQ R13, R10
+ ANDQ $0x03, R10
+ MOVQ R13, BX
+ ANDQ $-4, BX
+ ADDQ BX, R8
+ ADCQ R14, R9
+ ADCQ $0x00, R10
+ SHRQ $0x02, R14, R13
+ SHRQ $0x02, R14
+ ADDQ R13, R8
+ ADCQ R14, R9
+ ADCQ $0x00, R10
+ SUBQ $0x10, R15
+ CMPQ R15, $0x10
+ JAE loop
bytes_between_0_and_15:
TESTQ R15, R15
JZ done
- MOVQ $1, BX
+ MOVQ $0x00000001, BX
XORQ CX, CX
XORQ R13, R13
ADDQ R15, SI
flush_buffer:
- SHLQ $8, BX, CX
- SHLQ $8, BX
+ SHLQ $0x08, BX, CX
+ SHLQ $0x08, BX
MOVB -1(SI), R13
XORQ R13, BX
DECQ SI
DECQ R15
JNZ flush_buffer
-
ADDQ BX, R8
ADCQ CX, R9
- ADCQ $0, R10
- MOVQ $16, R15
+ ADCQ $0x00, R10
+ MOVQ $0x00000010, R15
JMP multiply
done:
- MOVQ R8, 0(DI)
+ MOVQ R8, (DI)
MOVQ R9, 8(DI)
MOVQ R10, 16(DI)
RET
diff --git a/vendor/golang.org/x/crypto/internal/poly1305/sum_ppc64le.go b/vendor/golang.org/x/crypto/internal/poly1305/sum_ppc64x.go
similarity index 95%
rename from vendor/golang.org/x/crypto/internal/poly1305/sum_ppc64le.go
rename to vendor/golang.org/x/crypto/internal/poly1305/sum_ppc64x.go
index 4aec4874b5..1a1679aaad 100644
--- a/vendor/golang.org/x/crypto/internal/poly1305/sum_ppc64le.go
+++ b/vendor/golang.org/x/crypto/internal/poly1305/sum_ppc64x.go
@@ -2,7 +2,7 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
-//go:build gc && !purego
+//go:build gc && !purego && (ppc64 || ppc64le)
package poly1305
diff --git a/vendor/golang.org/x/crypto/internal/poly1305/sum_ppc64le.s b/vendor/golang.org/x/crypto/internal/poly1305/sum_ppc64x.s
similarity index 89%
rename from vendor/golang.org/x/crypto/internal/poly1305/sum_ppc64le.s
rename to vendor/golang.org/x/crypto/internal/poly1305/sum_ppc64x.s
index b3c1699bff..6899a1dabc 100644
--- a/vendor/golang.org/x/crypto/internal/poly1305/sum_ppc64le.s
+++ b/vendor/golang.org/x/crypto/internal/poly1305/sum_ppc64x.s
@@ -2,15 +2,25 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
-//go:build gc && !purego
+//go:build gc && !purego && (ppc64 || ppc64le)
#include "textflag.h"
// This was ported from the amd64 implementation.
+#ifdef GOARCH_ppc64le
+#define LE_MOVD MOVD
+#define LE_MOVWZ MOVWZ
+#define LE_MOVHZ MOVHZ
+#else
+#define LE_MOVD MOVDBR
+#define LE_MOVWZ MOVWBR
+#define LE_MOVHZ MOVHBR
+#endif
+
#define POLY1305_ADD(msg, h0, h1, h2, t0, t1, t2) \
- MOVD (msg), t0; \
- MOVD 8(msg), t1; \
+ LE_MOVD (msg)( R0), t0; \
+ LE_MOVD (msg)(R24), t1; \
MOVD $1, t2; \
ADDC t0, h0, h0; \
ADDE t1, h1, h1; \
@@ -50,10 +60,6 @@
ADDE t3, h1, h1; \
ADDZE h2
-DATA ·poly1305Mask<>+0x00(SB)/8, $0x0FFFFFFC0FFFFFFF
-DATA ·poly1305Mask<>+0x08(SB)/8, $0x0FFFFFFC0FFFFFFC
-GLOBL ·poly1305Mask<>(SB), RODATA, $16
-
// func update(state *[7]uint64, msg []byte)
TEXT ·update(SB), $0-32
MOVD state+0(FP), R3
@@ -66,6 +72,8 @@ TEXT ·update(SB), $0-32
MOVD 24(R3), R11 // r0
MOVD 32(R3), R12 // r1
+ MOVD $8, R24
+
CMP R5, $16
BLT bytes_between_0_and_15
@@ -94,7 +102,7 @@ flush_buffer:
// Greater than 8 -- load the rightmost remaining bytes in msg
// and put into R17 (h1)
- MOVD (R4)(R21), R17
+ LE_MOVD (R4)(R21), R17
MOVD $16, R22
// Find the offset to those bytes
@@ -118,7 +126,7 @@ just1:
BLT less8
// Exactly 8
- MOVD (R4), R16
+ LE_MOVD (R4), R16
CMP R17, $0
@@ -133,7 +141,7 @@ less8:
MOVD $0, R22 // shift count
CMP R5, $4
BLT less4
- MOVWZ (R4), R16
+ LE_MOVWZ (R4), R16
ADD $4, R4
ADD $-4, R5
MOVD $32, R22
@@ -141,7 +149,7 @@ less8:
less4:
CMP R5, $2
BLT less2
- MOVHZ (R4), R21
+ LE_MOVHZ (R4), R21
SLD R22, R21, R21
OR R16, R21, R16
ADD $16, R22
diff --git a/vendor/golang.org/x/crypto/nacl/secretbox/secretbox.go b/vendor/golang.org/x/crypto/nacl/secretbox/secretbox.go
index f3c3242a04..1fe600ad03 100644
--- a/vendor/golang.org/x/crypto/nacl/secretbox/secretbox.go
+++ b/vendor/golang.org/x/crypto/nacl/secretbox/secretbox.go
@@ -32,7 +32,7 @@ chunk size.
This package is interoperable with NaCl: https://nacl.cr.yp.to/secretbox.html.
*/
-package secretbox // import "golang.org/x/crypto/nacl/secretbox"
+package secretbox
import (
"golang.org/x/crypto/internal/alias"
diff --git a/vendor/golang.org/x/crypto/openpgp/armor/armor.go b/vendor/golang.org/x/crypto/openpgp/armor/armor.go
index 8907183ec0..e664d127cb 100644
--- a/vendor/golang.org/x/crypto/openpgp/armor/armor.go
+++ b/vendor/golang.org/x/crypto/openpgp/armor/armor.go
@@ -10,14 +10,15 @@
// for their specific task. If you are required to interoperate with OpenPGP
// systems and need a maintained package, consider a community fork.
// See https://golang.org/issue/44226.
-package armor // import "golang.org/x/crypto/openpgp/armor"
+package armor
import (
"bufio"
"bytes"
"encoding/base64"
- "golang.org/x/crypto/openpgp/errors"
"io"
+
+ "golang.org/x/crypto/openpgp/errors"
)
// A Block represents an OpenPGP armored structure.
diff --git a/vendor/golang.org/x/crypto/openpgp/elgamal/elgamal.go b/vendor/golang.org/x/crypto/openpgp/elgamal/elgamal.go
index 743b35a120..f922bdbcaa 100644
--- a/vendor/golang.org/x/crypto/openpgp/elgamal/elgamal.go
+++ b/vendor/golang.org/x/crypto/openpgp/elgamal/elgamal.go
@@ -16,7 +16,7 @@
// https://golang.org/issue/44226), and ElGamal in the OpenPGP ecosystem has
// compatibility and security issues (see https://eprint.iacr.org/2021/923).
// Moreover, this package doesn't protect against side-channel attacks.
-package elgamal // import "golang.org/x/crypto/openpgp/elgamal"
+package elgamal
import (
"crypto/rand"
diff --git a/vendor/golang.org/x/crypto/openpgp/errors/errors.go b/vendor/golang.org/x/crypto/openpgp/errors/errors.go
index 1d7a0ea05a..a328749471 100644
--- a/vendor/golang.org/x/crypto/openpgp/errors/errors.go
+++ b/vendor/golang.org/x/crypto/openpgp/errors/errors.go
@@ -9,7 +9,7 @@
// for their specific task. If you are required to interoperate with OpenPGP
// systems and need a maintained package, consider a community fork.
// See https://golang.org/issue/44226.
-package errors // import "golang.org/x/crypto/openpgp/errors"
+package errors
import (
"strconv"
diff --git a/vendor/golang.org/x/crypto/openpgp/packet/packet.go b/vendor/golang.org/x/crypto/openpgp/packet/packet.go
index 0a19794a8e..a84a1a214e 100644
--- a/vendor/golang.org/x/crypto/openpgp/packet/packet.go
+++ b/vendor/golang.org/x/crypto/openpgp/packet/packet.go
@@ -10,7 +10,7 @@
// for their specific task. If you are required to interoperate with OpenPGP
// systems and need a maintained package, consider a community fork.
// See https://golang.org/issue/44226.
-package packet // import "golang.org/x/crypto/openpgp/packet"
+package packet
import (
"bufio"
diff --git a/vendor/golang.org/x/crypto/openpgp/read.go b/vendor/golang.org/x/crypto/openpgp/read.go
index 48a8931468..cff3db9196 100644
--- a/vendor/golang.org/x/crypto/openpgp/read.go
+++ b/vendor/golang.org/x/crypto/openpgp/read.go
@@ -9,7 +9,7 @@
// for their specific task. If you are required to interoperate with OpenPGP
// systems and need a maintained package, consider a community fork.
// See https://golang.org/issue/44226.
-package openpgp // import "golang.org/x/crypto/openpgp"
+package openpgp
import (
"crypto"
diff --git a/vendor/golang.org/x/crypto/openpgp/s2k/s2k.go b/vendor/golang.org/x/crypto/openpgp/s2k/s2k.go
index f53244a1c7..fa1a919079 100644
--- a/vendor/golang.org/x/crypto/openpgp/s2k/s2k.go
+++ b/vendor/golang.org/x/crypto/openpgp/s2k/s2k.go
@@ -10,7 +10,7 @@
// for their specific task. If you are required to interoperate with OpenPGP
// systems and need a maintained package, consider a community fork.
// See https://golang.org/issue/44226.
-package s2k // import "golang.org/x/crypto/openpgp/s2k"
+package s2k
import (
"crypto"
diff --git a/vendor/golang.org/x/crypto/pbkdf2/pbkdf2.go b/vendor/golang.org/x/crypto/pbkdf2/pbkdf2.go
index 904b57e01d..28cd99c7f3 100644
--- a/vendor/golang.org/x/crypto/pbkdf2/pbkdf2.go
+++ b/vendor/golang.org/x/crypto/pbkdf2/pbkdf2.go
@@ -16,7 +16,7 @@ Hash Functions SHA-1, SHA-224, SHA-256, SHA-384 and SHA-512 for HMAC. To
choose, you can pass the `New` functions from the different SHA packages to
pbkdf2.Key.
*/
-package pbkdf2 // import "golang.org/x/crypto/pbkdf2"
+package pbkdf2
import (
"crypto/hmac"
diff --git a/vendor/golang.org/x/crypto/salsa20/salsa/hsalsa20.go b/vendor/golang.org/x/crypto/salsa20/salsa/hsalsa20.go
index 3fd05b2751..3685b34458 100644
--- a/vendor/golang.org/x/crypto/salsa20/salsa/hsalsa20.go
+++ b/vendor/golang.org/x/crypto/salsa20/salsa/hsalsa20.go
@@ -3,7 +3,7 @@
// license that can be found in the LICENSE file.
// Package salsa provides low-level access to functions in the Salsa family.
-package salsa // import "golang.org/x/crypto/salsa20/salsa"
+package salsa
import "math/bits"
diff --git a/vendor/golang.org/x/crypto/salsa20/salsa/salsa20_amd64.s b/vendor/golang.org/x/crypto/salsa20/salsa/salsa20_amd64.s
index fcce0234b6..3883e0ec22 100644
--- a/vendor/golang.org/x/crypto/salsa20/salsa/salsa20_amd64.s
+++ b/vendor/golang.org/x/crypto/salsa20/salsa/salsa20_amd64.s
@@ -1,880 +1,880 @@
-// Copyright 2012 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
+// Code generated by command: go run salsa20_amd64_asm.go -out ../salsa20_amd64.s -pkg salsa. DO NOT EDIT.
//go:build amd64 && !purego && gc
-// This code was translated into a form compatible with 6a from the public
-// domain sources in SUPERCOP: https://bench.cr.yp.to/supercop.html
+// func salsa2020XORKeyStream(out *byte, in *byte, n uint64, nonce *byte, key *byte)
+// Requires: SSE2
+TEXT ·salsa2020XORKeyStream(SB), $456-40
+ // This needs up to 64 bytes at 360(R12); hence the non-obvious frame size.
+ MOVQ out+0(FP), DI
+ MOVQ in+8(FP), SI
+ MOVQ n+16(FP), DX
+ MOVQ nonce+24(FP), CX
+ MOVQ key+32(FP), R8
+ MOVQ SP, R12
+ ADDQ $0x1f, R12
+ ANDQ $-32, R12
+ MOVQ DX, R9
+ MOVQ CX, DX
+ MOVQ R8, R10
+ CMPQ R9, $0x00
+ JBE DONE
+ MOVL 20(R10), CX
+ MOVL (R10), R8
+ MOVL (DX), AX
+ MOVL 16(R10), R11
+ MOVL CX, (R12)
+ MOVL R8, 4(R12)
+ MOVL AX, 8(R12)
+ MOVL R11, 12(R12)
+ MOVL 8(DX), CX
+ MOVL 24(R10), R8
+ MOVL 4(R10), AX
+ MOVL 4(DX), R11
+ MOVL CX, 16(R12)
+ MOVL R8, 20(R12)
+ MOVL AX, 24(R12)
+ MOVL R11, 28(R12)
+ MOVL 12(DX), CX
+ MOVL 12(R10), DX
+ MOVL 28(R10), R8
+ MOVL 8(R10), AX
+ MOVL DX, 32(R12)
+ MOVL CX, 36(R12)
+ MOVL R8, 40(R12)
+ MOVL AX, 44(R12)
+ MOVQ $0x61707865, DX
+ MOVQ $0x3320646e, CX
+ MOVQ $0x79622d32, R8
+ MOVQ $0x6b206574, AX
+ MOVL DX, 48(R12)
+ MOVL CX, 52(R12)
+ MOVL R8, 56(R12)
+ MOVL AX, 60(R12)
+ CMPQ R9, $0x00000100
+ JB BYTESBETWEEN1AND255
+ MOVOA 48(R12), X0
+ PSHUFL $0x55, X0, X1
+ PSHUFL $0xaa, X0, X2
+ PSHUFL $0xff, X0, X3
+ PSHUFL $0x00, X0, X0
+ MOVOA X1, 64(R12)
+ MOVOA X2, 80(R12)
+ MOVOA X3, 96(R12)
+ MOVOA X0, 112(R12)
+ MOVOA (R12), X0
+ PSHUFL $0xaa, X0, X1
+ PSHUFL $0xff, X0, X2
+ PSHUFL $0x00, X0, X3
+ PSHUFL $0x55, X0, X0
+ MOVOA X1, 128(R12)
+ MOVOA X2, 144(R12)
+ MOVOA X3, 160(R12)
+ MOVOA X0, 176(R12)
+ MOVOA 16(R12), X0
+ PSHUFL $0xff, X0, X1
+ PSHUFL $0x55, X0, X2
+ PSHUFL $0xaa, X0, X0
+ MOVOA X1, 192(R12)
+ MOVOA X2, 208(R12)
+ MOVOA X0, 224(R12)
+ MOVOA 32(R12), X0
+ PSHUFL $0x00, X0, X1
+ PSHUFL $0xaa, X0, X2
+ PSHUFL $0xff, X0, X0
+ MOVOA X1, 240(R12)
+ MOVOA X2, 256(R12)
+ MOVOA X0, 272(R12)
-// func salsa2020XORKeyStream(out, in *byte, n uint64, nonce, key *byte)
-// This needs up to 64 bytes at 360(R12); hence the non-obvious frame size.
-TEXT ·salsa2020XORKeyStream(SB),0,$456-40 // frame = 424 + 32 byte alignment
- MOVQ out+0(FP),DI
- MOVQ in+8(FP),SI
- MOVQ n+16(FP),DX
- MOVQ nonce+24(FP),CX
- MOVQ key+32(FP),R8
+BYTESATLEAST256:
+ MOVL 16(R12), DX
+ MOVL 36(R12), CX
+ MOVL DX, 288(R12)
+ MOVL CX, 304(R12)
+ SHLQ $0x20, CX
+ ADDQ CX, DX
+ ADDQ $0x01, DX
+ MOVQ DX, CX
+ SHRQ $0x20, CX
+ MOVL DX, 292(R12)
+ MOVL CX, 308(R12)
+ ADDQ $0x01, DX
+ MOVQ DX, CX
+ SHRQ $0x20, CX
+ MOVL DX, 296(R12)
+ MOVL CX, 312(R12)
+ ADDQ $0x01, DX
+ MOVQ DX, CX
+ SHRQ $0x20, CX
+ MOVL DX, 300(R12)
+ MOVL CX, 316(R12)
+ ADDQ $0x01, DX
+ MOVQ DX, CX
+ SHRQ $0x20, CX
+ MOVL DX, 16(R12)
+ MOVL CX, 36(R12)
+ MOVQ R9, 352(R12)
+ MOVQ $0x00000014, DX
+ MOVOA 64(R12), X0
+ MOVOA 80(R12), X1
+ MOVOA 96(R12), X2
+ MOVOA 256(R12), X3
+ MOVOA 272(R12), X4
+ MOVOA 128(R12), X5
+ MOVOA 144(R12), X6
+ MOVOA 176(R12), X7
+ MOVOA 192(R12), X8
+ MOVOA 208(R12), X9
+ MOVOA 224(R12), X10
+ MOVOA 304(R12), X11
+ MOVOA 112(R12), X12
+ MOVOA 160(R12), X13
+ MOVOA 240(R12), X14
+ MOVOA 288(R12), X15
- MOVQ SP,R12
- ADDQ $31, R12
- ANDQ $~31, R12
+MAINLOOP1:
+ MOVOA X1, 320(R12)
+ MOVOA X2, 336(R12)
+ MOVOA X13, X1
+ PADDL X12, X1
+ MOVOA X1, X2
+ PSLLL $0x07, X1
+ PXOR X1, X14
+ PSRLL $0x19, X2
+ PXOR X2, X14
+ MOVOA X7, X1
+ PADDL X0, X1
+ MOVOA X1, X2
+ PSLLL $0x07, X1
+ PXOR X1, X11
+ PSRLL $0x19, X2
+ PXOR X2, X11
+ MOVOA X12, X1
+ PADDL X14, X1
+ MOVOA X1, X2
+ PSLLL $0x09, X1
+ PXOR X1, X15
+ PSRLL $0x17, X2
+ PXOR X2, X15
+ MOVOA X0, X1
+ PADDL X11, X1
+ MOVOA X1, X2
+ PSLLL $0x09, X1
+ PXOR X1, X9
+ PSRLL $0x17, X2
+ PXOR X2, X9
+ MOVOA X14, X1
+ PADDL X15, X1
+ MOVOA X1, X2
+ PSLLL $0x0d, X1
+ PXOR X1, X13
+ PSRLL $0x13, X2
+ PXOR X2, X13
+ MOVOA X11, X1
+ PADDL X9, X1
+ MOVOA X1, X2
+ PSLLL $0x0d, X1
+ PXOR X1, X7
+ PSRLL $0x13, X2
+ PXOR X2, X7
+ MOVOA X15, X1
+ PADDL X13, X1
+ MOVOA X1, X2
+ PSLLL $0x12, X1
+ PXOR X1, X12
+ PSRLL $0x0e, X2
+ PXOR X2, X12
+ MOVOA 320(R12), X1
+ MOVOA X12, 320(R12)
+ MOVOA X9, X2
+ PADDL X7, X2
+ MOVOA X2, X12
+ PSLLL $0x12, X2
+ PXOR X2, X0
+ PSRLL $0x0e, X12
+ PXOR X12, X0
+ MOVOA X5, X2
+ PADDL X1, X2
+ MOVOA X2, X12
+ PSLLL $0x07, X2
+ PXOR X2, X3
+ PSRLL $0x19, X12
+ PXOR X12, X3
+ MOVOA 336(R12), X2
+ MOVOA X0, 336(R12)
+ MOVOA X6, X0
+ PADDL X2, X0
+ MOVOA X0, X12
+ PSLLL $0x07, X0
+ PXOR X0, X4
+ PSRLL $0x19, X12
+ PXOR X12, X4
+ MOVOA X1, X0
+ PADDL X3, X0
+ MOVOA X0, X12
+ PSLLL $0x09, X0
+ PXOR X0, X10
+ PSRLL $0x17, X12
+ PXOR X12, X10
+ MOVOA X2, X0
+ PADDL X4, X0
+ MOVOA X0, X12
+ PSLLL $0x09, X0
+ PXOR X0, X8
+ PSRLL $0x17, X12
+ PXOR X12, X8
+ MOVOA X3, X0
+ PADDL X10, X0
+ MOVOA X0, X12
+ PSLLL $0x0d, X0
+ PXOR X0, X5
+ PSRLL $0x13, X12
+ PXOR X12, X5
+ MOVOA X4, X0
+ PADDL X8, X0
+ MOVOA X0, X12
+ PSLLL $0x0d, X0
+ PXOR X0, X6
+ PSRLL $0x13, X12
+ PXOR X12, X6
+ MOVOA X10, X0
+ PADDL X5, X0
+ MOVOA X0, X12
+ PSLLL $0x12, X0
+ PXOR X0, X1
+ PSRLL $0x0e, X12
+ PXOR X12, X1
+ MOVOA 320(R12), X0
+ MOVOA X1, 320(R12)
+ MOVOA X4, X1
+ PADDL X0, X1
+ MOVOA X1, X12
+ PSLLL $0x07, X1
+ PXOR X1, X7
+ PSRLL $0x19, X12
+ PXOR X12, X7
+ MOVOA X8, X1
+ PADDL X6, X1
+ MOVOA X1, X12
+ PSLLL $0x12, X1
+ PXOR X1, X2
+ PSRLL $0x0e, X12
+ PXOR X12, X2
+ MOVOA 336(R12), X12
+ MOVOA X2, 336(R12)
+ MOVOA X14, X1
+ PADDL X12, X1
+ MOVOA X1, X2
+ PSLLL $0x07, X1
+ PXOR X1, X5
+ PSRLL $0x19, X2
+ PXOR X2, X5
+ MOVOA X0, X1
+ PADDL X7, X1
+ MOVOA X1, X2
+ PSLLL $0x09, X1
+ PXOR X1, X10
+ PSRLL $0x17, X2
+ PXOR X2, X10
+ MOVOA X12, X1
+ PADDL X5, X1
+ MOVOA X1, X2
+ PSLLL $0x09, X1
+ PXOR X1, X8
+ PSRLL $0x17, X2
+ PXOR X2, X8
+ MOVOA X7, X1
+ PADDL X10, X1
+ MOVOA X1, X2
+ PSLLL $0x0d, X1
+ PXOR X1, X4
+ PSRLL $0x13, X2
+ PXOR X2, X4
+ MOVOA X5, X1
+ PADDL X8, X1
+ MOVOA X1, X2
+ PSLLL $0x0d, X1
+ PXOR X1, X14
+ PSRLL $0x13, X2
+ PXOR X2, X14
+ MOVOA X10, X1
+ PADDL X4, X1
+ MOVOA X1, X2
+ PSLLL $0x12, X1
+ PXOR X1, X0
+ PSRLL $0x0e, X2
+ PXOR X2, X0
+ MOVOA 320(R12), X1
+ MOVOA X0, 320(R12)
+ MOVOA X8, X0
+ PADDL X14, X0
+ MOVOA X0, X2
+ PSLLL $0x12, X0
+ PXOR X0, X12
+ PSRLL $0x0e, X2
+ PXOR X2, X12
+ MOVOA X11, X0
+ PADDL X1, X0
+ MOVOA X0, X2
+ PSLLL $0x07, X0
+ PXOR X0, X6
+ PSRLL $0x19, X2
+ PXOR X2, X6
+ MOVOA 336(R12), X2
+ MOVOA X12, 336(R12)
+ MOVOA X3, X0
+ PADDL X2, X0
+ MOVOA X0, X12
+ PSLLL $0x07, X0
+ PXOR X0, X13
+ PSRLL $0x19, X12
+ PXOR X12, X13
+ MOVOA X1, X0
+ PADDL X6, X0
+ MOVOA X0, X12
+ PSLLL $0x09, X0
+ PXOR X0, X15
+ PSRLL $0x17, X12
+ PXOR X12, X15
+ MOVOA X2, X0
+ PADDL X13, X0
+ MOVOA X0, X12
+ PSLLL $0x09, X0
+ PXOR X0, X9
+ PSRLL $0x17, X12
+ PXOR X12, X9
+ MOVOA X6, X0
+ PADDL X15, X0
+ MOVOA X0, X12
+ PSLLL $0x0d, X0
+ PXOR X0, X11
+ PSRLL $0x13, X12
+ PXOR X12, X11
+ MOVOA X13, X0
+ PADDL X9, X0
+ MOVOA X0, X12
+ PSLLL $0x0d, X0
+ PXOR X0, X3
+ PSRLL $0x13, X12
+ PXOR X12, X3
+ MOVOA X15, X0
+ PADDL X11, X0
+ MOVOA X0, X12
+ PSLLL $0x12, X0
+ PXOR X0, X1
+ PSRLL $0x0e, X12
+ PXOR X12, X1
+ MOVOA X9, X0
+ PADDL X3, X0
+ MOVOA X0, X12
+ PSLLL $0x12, X0
+ PXOR X0, X2
+ PSRLL $0x0e, X12
+ PXOR X12, X2
+ MOVOA 320(R12), X12
+ MOVOA 336(R12), X0
+ SUBQ $0x02, DX
+ JA MAINLOOP1
+ PADDL 112(R12), X12
+ PADDL 176(R12), X7
+ PADDL 224(R12), X10
+ PADDL 272(R12), X4
+ MOVD X12, DX
+ MOVD X7, CX
+ MOVD X10, R8
+ MOVD X4, R9
+ PSHUFL $0x39, X12, X12
+ PSHUFL $0x39, X7, X7
+ PSHUFL $0x39, X10, X10
+ PSHUFL $0x39, X4, X4
+ XORL (SI), DX
+ XORL 4(SI), CX
+ XORL 8(SI), R8
+ XORL 12(SI), R9
+ MOVL DX, (DI)
+ MOVL CX, 4(DI)
+ MOVL R8, 8(DI)
+ MOVL R9, 12(DI)
+ MOVD X12, DX
+ MOVD X7, CX
+ MOVD X10, R8
+ MOVD X4, R9
+ PSHUFL $0x39, X12, X12
+ PSHUFL $0x39, X7, X7
+ PSHUFL $0x39, X10, X10
+ PSHUFL $0x39, X4, X4
+ XORL 64(SI), DX
+ XORL 68(SI), CX
+ XORL 72(SI), R8
+ XORL 76(SI), R9
+ MOVL DX, 64(DI)
+ MOVL CX, 68(DI)
+ MOVL R8, 72(DI)
+ MOVL R9, 76(DI)
+ MOVD X12, DX
+ MOVD X7, CX
+ MOVD X10, R8
+ MOVD X4, R9
+ PSHUFL $0x39, X12, X12
+ PSHUFL $0x39, X7, X7
+ PSHUFL $0x39, X10, X10
+ PSHUFL $0x39, X4, X4
+ XORL 128(SI), DX
+ XORL 132(SI), CX
+ XORL 136(SI), R8
+ XORL 140(SI), R9
+ MOVL DX, 128(DI)
+ MOVL CX, 132(DI)
+ MOVL R8, 136(DI)
+ MOVL R9, 140(DI)
+ MOVD X12, DX
+ MOVD X7, CX
+ MOVD X10, R8
+ MOVD X4, R9
+ XORL 192(SI), DX
+ XORL 196(SI), CX
+ XORL 200(SI), R8
+ XORL 204(SI), R9
+ MOVL DX, 192(DI)
+ MOVL CX, 196(DI)
+ MOVL R8, 200(DI)
+ MOVL R9, 204(DI)
+ PADDL 240(R12), X14
+ PADDL 64(R12), X0
+ PADDL 128(R12), X5
+ PADDL 192(R12), X8
+ MOVD X14, DX
+ MOVD X0, CX
+ MOVD X5, R8
+ MOVD X8, R9
+ PSHUFL $0x39, X14, X14
+ PSHUFL $0x39, X0, X0
+ PSHUFL $0x39, X5, X5
+ PSHUFL $0x39, X8, X8
+ XORL 16(SI), DX
+ XORL 20(SI), CX
+ XORL 24(SI), R8
+ XORL 28(SI), R9
+ MOVL DX, 16(DI)
+ MOVL CX, 20(DI)
+ MOVL R8, 24(DI)
+ MOVL R9, 28(DI)
+ MOVD X14, DX
+ MOVD X0, CX
+ MOVD X5, R8
+ MOVD X8, R9
+ PSHUFL $0x39, X14, X14
+ PSHUFL $0x39, X0, X0
+ PSHUFL $0x39, X5, X5
+ PSHUFL $0x39, X8, X8
+ XORL 80(SI), DX
+ XORL 84(SI), CX
+ XORL 88(SI), R8
+ XORL 92(SI), R9
+ MOVL DX, 80(DI)
+ MOVL CX, 84(DI)
+ MOVL R8, 88(DI)
+ MOVL R9, 92(DI)
+ MOVD X14, DX
+ MOVD X0, CX
+ MOVD X5, R8
+ MOVD X8, R9
+ PSHUFL $0x39, X14, X14
+ PSHUFL $0x39, X0, X0
+ PSHUFL $0x39, X5, X5
+ PSHUFL $0x39, X8, X8
+ XORL 144(SI), DX
+ XORL 148(SI), CX
+ XORL 152(SI), R8
+ XORL 156(SI), R9
+ MOVL DX, 144(DI)
+ MOVL CX, 148(DI)
+ MOVL R8, 152(DI)
+ MOVL R9, 156(DI)
+ MOVD X14, DX
+ MOVD X0, CX
+ MOVD X5, R8
+ MOVD X8, R9
+ XORL 208(SI), DX
+ XORL 212(SI), CX
+ XORL 216(SI), R8
+ XORL 220(SI), R9
+ MOVL DX, 208(DI)
+ MOVL CX, 212(DI)
+ MOVL R8, 216(DI)
+ MOVL R9, 220(DI)
+ PADDL 288(R12), X15
+ PADDL 304(R12), X11
+ PADDL 80(R12), X1
+ PADDL 144(R12), X6
+ MOVD X15, DX
+ MOVD X11, CX
+ MOVD X1, R8
+ MOVD X6, R9
+ PSHUFL $0x39, X15, X15
+ PSHUFL $0x39, X11, X11
+ PSHUFL $0x39, X1, X1
+ PSHUFL $0x39, X6, X6
+ XORL 32(SI), DX
+ XORL 36(SI), CX
+ XORL 40(SI), R8
+ XORL 44(SI), R9
+ MOVL DX, 32(DI)
+ MOVL CX, 36(DI)
+ MOVL R8, 40(DI)
+ MOVL R9, 44(DI)
+ MOVD X15, DX
+ MOVD X11, CX
+ MOVD X1, R8
+ MOVD X6, R9
+ PSHUFL $0x39, X15, X15
+ PSHUFL $0x39, X11, X11
+ PSHUFL $0x39, X1, X1
+ PSHUFL $0x39, X6, X6
+ XORL 96(SI), DX
+ XORL 100(SI), CX
+ XORL 104(SI), R8
+ XORL 108(SI), R9
+ MOVL DX, 96(DI)
+ MOVL CX, 100(DI)
+ MOVL R8, 104(DI)
+ MOVL R9, 108(DI)
+ MOVD X15, DX
+ MOVD X11, CX
+ MOVD X1, R8
+ MOVD X6, R9
+ PSHUFL $0x39, X15, X15
+ PSHUFL $0x39, X11, X11
+ PSHUFL $0x39, X1, X1
+ PSHUFL $0x39, X6, X6
+ XORL 160(SI), DX
+ XORL 164(SI), CX
+ XORL 168(SI), R8
+ XORL 172(SI), R9
+ MOVL DX, 160(DI)
+ MOVL CX, 164(DI)
+ MOVL R8, 168(DI)
+ MOVL R9, 172(DI)
+ MOVD X15, DX
+ MOVD X11, CX
+ MOVD X1, R8
+ MOVD X6, R9
+ XORL 224(SI), DX
+ XORL 228(SI), CX
+ XORL 232(SI), R8
+ XORL 236(SI), R9
+ MOVL DX, 224(DI)
+ MOVL CX, 228(DI)
+ MOVL R8, 232(DI)
+ MOVL R9, 236(DI)
+ PADDL 160(R12), X13
+ PADDL 208(R12), X9
+ PADDL 256(R12), X3
+ PADDL 96(R12), X2
+ MOVD X13, DX
+ MOVD X9, CX
+ MOVD X3, R8
+ MOVD X2, R9
+ PSHUFL $0x39, X13, X13
+ PSHUFL $0x39, X9, X9
+ PSHUFL $0x39, X3, X3
+ PSHUFL $0x39, X2, X2
+ XORL 48(SI), DX
+ XORL 52(SI), CX
+ XORL 56(SI), R8
+ XORL 60(SI), R9
+ MOVL DX, 48(DI)
+ MOVL CX, 52(DI)
+ MOVL R8, 56(DI)
+ MOVL R9, 60(DI)
+ MOVD X13, DX
+ MOVD X9, CX
+ MOVD X3, R8
+ MOVD X2, R9
+ PSHUFL $0x39, X13, X13
+ PSHUFL $0x39, X9, X9
+ PSHUFL $0x39, X3, X3
+ PSHUFL $0x39, X2, X2
+ XORL 112(SI), DX
+ XORL 116(SI), CX
+ XORL 120(SI), R8
+ XORL 124(SI), R9
+ MOVL DX, 112(DI)
+ MOVL CX, 116(DI)
+ MOVL R8, 120(DI)
+ MOVL R9, 124(DI)
+ MOVD X13, DX
+ MOVD X9, CX
+ MOVD X3, R8
+ MOVD X2, R9
+ PSHUFL $0x39, X13, X13
+ PSHUFL $0x39, X9, X9
+ PSHUFL $0x39, X3, X3
+ PSHUFL $0x39, X2, X2
+ XORL 176(SI), DX
+ XORL 180(SI), CX
+ XORL 184(SI), R8
+ XORL 188(SI), R9
+ MOVL DX, 176(DI)
+ MOVL CX, 180(DI)
+ MOVL R8, 184(DI)
+ MOVL R9, 188(DI)
+ MOVD X13, DX
+ MOVD X9, CX
+ MOVD X3, R8
+ MOVD X2, R9
+ XORL 240(SI), DX
+ XORL 244(SI), CX
+ XORL 248(SI), R8
+ XORL 252(SI), R9
+ MOVL DX, 240(DI)
+ MOVL CX, 244(DI)
+ MOVL R8, 248(DI)
+ MOVL R9, 252(DI)
+ MOVQ 352(R12), R9
+ SUBQ $0x00000100, R9
+ ADDQ $0x00000100, SI
+ ADDQ $0x00000100, DI
+ CMPQ R9, $0x00000100
+ JAE BYTESATLEAST256
+ CMPQ R9, $0x00
+ JBE DONE
- MOVQ DX,R9
- MOVQ CX,DX
- MOVQ R8,R10
- CMPQ R9,$0
- JBE DONE
- START:
- MOVL 20(R10),CX
- MOVL 0(R10),R8
- MOVL 0(DX),AX
- MOVL 16(R10),R11
- MOVL CX,0(R12)
- MOVL R8, 4 (R12)
- MOVL AX, 8 (R12)
- MOVL R11, 12 (R12)
- MOVL 8(DX),CX
- MOVL 24(R10),R8
- MOVL 4(R10),AX
- MOVL 4(DX),R11
- MOVL CX,16(R12)
- MOVL R8, 20 (R12)
- MOVL AX, 24 (R12)
- MOVL R11, 28 (R12)
- MOVL 12(DX),CX
- MOVL 12(R10),DX
- MOVL 28(R10),R8
- MOVL 8(R10),AX
- MOVL DX,32(R12)
- MOVL CX, 36 (R12)
- MOVL R8, 40 (R12)
- MOVL AX, 44 (R12)
- MOVQ $1634760805,DX
- MOVQ $857760878,CX
- MOVQ $2036477234,R8
- MOVQ $1797285236,AX
- MOVL DX,48(R12)
- MOVL CX, 52 (R12)
- MOVL R8, 56 (R12)
- MOVL AX, 60 (R12)
- CMPQ R9,$256
- JB BYTESBETWEEN1AND255
- MOVOA 48(R12),X0
- PSHUFL $0X55,X0,X1
- PSHUFL $0XAA,X0,X2
- PSHUFL $0XFF,X0,X3
- PSHUFL $0X00,X0,X0
- MOVOA X1,64(R12)
- MOVOA X2,80(R12)
- MOVOA X3,96(R12)
- MOVOA X0,112(R12)
- MOVOA 0(R12),X0
- PSHUFL $0XAA,X0,X1
- PSHUFL $0XFF,X0,X2
- PSHUFL $0X00,X0,X3
- PSHUFL $0X55,X0,X0
- MOVOA X1,128(R12)
- MOVOA X2,144(R12)
- MOVOA X3,160(R12)
- MOVOA X0,176(R12)
- MOVOA 16(R12),X0
- PSHUFL $0XFF,X0,X1
- PSHUFL $0X55,X0,X2
- PSHUFL $0XAA,X0,X0
- MOVOA X1,192(R12)
- MOVOA X2,208(R12)
- MOVOA X0,224(R12)
- MOVOA 32(R12),X0
- PSHUFL $0X00,X0,X1
- PSHUFL $0XAA,X0,X2
- PSHUFL $0XFF,X0,X0
- MOVOA X1,240(R12)
- MOVOA X2,256(R12)
- MOVOA X0,272(R12)
- BYTESATLEAST256:
- MOVL 16(R12),DX
- MOVL 36 (R12),CX
- MOVL DX,288(R12)
- MOVL CX,304(R12)
- SHLQ $32,CX
- ADDQ CX,DX
- ADDQ $1,DX
- MOVQ DX,CX
- SHRQ $32,CX
- MOVL DX, 292 (R12)
- MOVL CX, 308 (R12)
- ADDQ $1,DX
- MOVQ DX,CX
- SHRQ $32,CX
- MOVL DX, 296 (R12)
- MOVL CX, 312 (R12)
- ADDQ $1,DX
- MOVQ DX,CX
- SHRQ $32,CX
- MOVL DX, 300 (R12)
- MOVL CX, 316 (R12)
- ADDQ $1,DX
- MOVQ DX,CX
- SHRQ $32,CX
- MOVL DX,16(R12)
- MOVL CX, 36 (R12)
- MOVQ R9,352(R12)
- MOVQ $20,DX
- MOVOA 64(R12),X0
- MOVOA 80(R12),X1
- MOVOA 96(R12),X2
- MOVOA 256(R12),X3
- MOVOA 272(R12),X4
- MOVOA 128(R12),X5
- MOVOA 144(R12),X6
- MOVOA 176(R12),X7
- MOVOA 192(R12),X8
- MOVOA 208(R12),X9
- MOVOA 224(R12),X10
- MOVOA 304(R12),X11
- MOVOA 112(R12),X12
- MOVOA 160(R12),X13
- MOVOA 240(R12),X14
- MOVOA 288(R12),X15
- MAINLOOP1:
- MOVOA X1,320(R12)
- MOVOA X2,336(R12)
- MOVOA X13,X1
- PADDL X12,X1
- MOVOA X1,X2
- PSLLL $7,X1
- PXOR X1,X14
- PSRLL $25,X2
- PXOR X2,X14
- MOVOA X7,X1
- PADDL X0,X1
- MOVOA X1,X2
- PSLLL $7,X1
- PXOR X1,X11
- PSRLL $25,X2
- PXOR X2,X11
- MOVOA X12,X1
- PADDL X14,X1
- MOVOA X1,X2
- PSLLL $9,X1
- PXOR X1,X15
- PSRLL $23,X2
- PXOR X2,X15
- MOVOA X0,X1
- PADDL X11,X1
- MOVOA X1,X2
- PSLLL $9,X1
- PXOR X1,X9
- PSRLL $23,X2
- PXOR X2,X9
- MOVOA X14,X1
- PADDL X15,X1
- MOVOA X1,X2
- PSLLL $13,X1
- PXOR X1,X13
- PSRLL $19,X2
- PXOR X2,X13
- MOVOA X11,X1
- PADDL X9,X1
- MOVOA X1,X2
- PSLLL $13,X1
- PXOR X1,X7
- PSRLL $19,X2
- PXOR X2,X7
- MOVOA X15,X1
- PADDL X13,X1
- MOVOA X1,X2
- PSLLL $18,X1
- PXOR X1,X12
- PSRLL $14,X2
- PXOR X2,X12
- MOVOA 320(R12),X1
- MOVOA X12,320(R12)
- MOVOA X9,X2
- PADDL X7,X2
- MOVOA X2,X12
- PSLLL $18,X2
- PXOR X2,X0
- PSRLL $14,X12
- PXOR X12,X0
- MOVOA X5,X2
- PADDL X1,X2
- MOVOA X2,X12
- PSLLL $7,X2
- PXOR X2,X3
- PSRLL $25,X12
- PXOR X12,X3
- MOVOA 336(R12),X2
- MOVOA X0,336(R12)
- MOVOA X6,X0
- PADDL X2,X0
- MOVOA X0,X12
- PSLLL $7,X0
- PXOR X0,X4
- PSRLL $25,X12
- PXOR X12,X4
- MOVOA X1,X0
- PADDL X3,X0
- MOVOA X0,X12
- PSLLL $9,X0
- PXOR X0,X10
- PSRLL $23,X12
- PXOR X12,X10
- MOVOA X2,X0
- PADDL X4,X0
- MOVOA X0,X12
- PSLLL $9,X0
- PXOR X0,X8
- PSRLL $23,X12
- PXOR X12,X8
- MOVOA X3,X0
- PADDL X10,X0
- MOVOA X0,X12
- PSLLL $13,X0
- PXOR X0,X5
- PSRLL $19,X12
- PXOR X12,X5
- MOVOA X4,X0
- PADDL X8,X0
- MOVOA X0,X12
- PSLLL $13,X0
- PXOR X0,X6
- PSRLL $19,X12
- PXOR X12,X6
- MOVOA X10,X0
- PADDL X5,X0
- MOVOA X0,X12
- PSLLL $18,X0
- PXOR X0,X1
- PSRLL $14,X12
- PXOR X12,X1
- MOVOA 320(R12),X0
- MOVOA X1,320(R12)
- MOVOA X4,X1
- PADDL X0,X1
- MOVOA X1,X12
- PSLLL $7,X1
- PXOR X1,X7
- PSRLL $25,X12
- PXOR X12,X7
- MOVOA X8,X1
- PADDL X6,X1
- MOVOA X1,X12
- PSLLL $18,X1
- PXOR X1,X2
- PSRLL $14,X12
- PXOR X12,X2
- MOVOA 336(R12),X12
- MOVOA X2,336(R12)
- MOVOA X14,X1
- PADDL X12,X1
- MOVOA X1,X2
- PSLLL $7,X1
- PXOR X1,X5
- PSRLL $25,X2
- PXOR X2,X5
- MOVOA X0,X1
- PADDL X7,X1
- MOVOA X1,X2
- PSLLL $9,X1
- PXOR X1,X10
- PSRLL $23,X2
- PXOR X2,X10
- MOVOA X12,X1
- PADDL X5,X1
- MOVOA X1,X2
- PSLLL $9,X1
- PXOR X1,X8
- PSRLL $23,X2
- PXOR X2,X8
- MOVOA X7,X1
- PADDL X10,X1
- MOVOA X1,X2
- PSLLL $13,X1
- PXOR X1,X4
- PSRLL $19,X2
- PXOR X2,X4
- MOVOA X5,X1
- PADDL X8,X1
- MOVOA X1,X2
- PSLLL $13,X1
- PXOR X1,X14
- PSRLL $19,X2
- PXOR X2,X14
- MOVOA X10,X1
- PADDL X4,X1
- MOVOA X1,X2
- PSLLL $18,X1
- PXOR X1,X0
- PSRLL $14,X2
- PXOR X2,X0
- MOVOA 320(R12),X1
- MOVOA X0,320(R12)
- MOVOA X8,X0
- PADDL X14,X0
- MOVOA X0,X2
- PSLLL $18,X0
- PXOR X0,X12
- PSRLL $14,X2
- PXOR X2,X12
- MOVOA X11,X0
- PADDL X1,X0
- MOVOA X0,X2
- PSLLL $7,X0
- PXOR X0,X6
- PSRLL $25,X2
- PXOR X2,X6
- MOVOA 336(R12),X2
- MOVOA X12,336(R12)
- MOVOA X3,X0
- PADDL X2,X0
- MOVOA X0,X12
- PSLLL $7,X0
- PXOR X0,X13
- PSRLL $25,X12
- PXOR X12,X13
- MOVOA X1,X0
- PADDL X6,X0
- MOVOA X0,X12
- PSLLL $9,X0
- PXOR X0,X15
- PSRLL $23,X12
- PXOR X12,X15
- MOVOA X2,X0
- PADDL X13,X0
- MOVOA X0,X12
- PSLLL $9,X0
- PXOR X0,X9
- PSRLL $23,X12
- PXOR X12,X9
- MOVOA X6,X0
- PADDL X15,X0
- MOVOA X0,X12
- PSLLL $13,X0
- PXOR X0,X11
- PSRLL $19,X12
- PXOR X12,X11
- MOVOA X13,X0
- PADDL X9,X0
- MOVOA X0,X12
- PSLLL $13,X0
- PXOR X0,X3
- PSRLL $19,X12
- PXOR X12,X3
- MOVOA X15,X0
- PADDL X11,X0
- MOVOA X0,X12
- PSLLL $18,X0
- PXOR X0,X1
- PSRLL $14,X12
- PXOR X12,X1
- MOVOA X9,X0
- PADDL X3,X0
- MOVOA X0,X12
- PSLLL $18,X0
- PXOR X0,X2
- PSRLL $14,X12
- PXOR X12,X2
- MOVOA 320(R12),X12
- MOVOA 336(R12),X0
- SUBQ $2,DX
- JA MAINLOOP1
- PADDL 112(R12),X12
- PADDL 176(R12),X7
- PADDL 224(R12),X10
- PADDL 272(R12),X4
- MOVD X12,DX
- MOVD X7,CX
- MOVD X10,R8
- MOVD X4,R9
- PSHUFL $0X39,X12,X12
- PSHUFL $0X39,X7,X7
- PSHUFL $0X39,X10,X10
- PSHUFL $0X39,X4,X4
- XORL 0(SI),DX
- XORL 4(SI),CX
- XORL 8(SI),R8
- XORL 12(SI),R9
- MOVL DX,0(DI)
- MOVL CX,4(DI)
- MOVL R8,8(DI)
- MOVL R9,12(DI)
- MOVD X12,DX
- MOVD X7,CX
- MOVD X10,R8
- MOVD X4,R9
- PSHUFL $0X39,X12,X12
- PSHUFL $0X39,X7,X7
- PSHUFL $0X39,X10,X10
- PSHUFL $0X39,X4,X4
- XORL 64(SI),DX
- XORL 68(SI),CX
- XORL 72(SI),R8
- XORL 76(SI),R9
- MOVL DX,64(DI)
- MOVL CX,68(DI)
- MOVL R8,72(DI)
- MOVL R9,76(DI)
- MOVD X12,DX
- MOVD X7,CX
- MOVD X10,R8
- MOVD X4,R9
- PSHUFL $0X39,X12,X12
- PSHUFL $0X39,X7,X7
- PSHUFL $0X39,X10,X10
- PSHUFL $0X39,X4,X4
- XORL 128(SI),DX
- XORL 132(SI),CX
- XORL 136(SI),R8
- XORL 140(SI),R9
- MOVL DX,128(DI)
- MOVL CX,132(DI)
- MOVL R8,136(DI)
- MOVL R9,140(DI)
- MOVD X12,DX
- MOVD X7,CX
- MOVD X10,R8
- MOVD X4,R9
- XORL 192(SI),DX
- XORL 196(SI),CX
- XORL 200(SI),R8
- XORL 204(SI),R9
- MOVL DX,192(DI)
- MOVL CX,196(DI)
- MOVL R8,200(DI)
- MOVL R9,204(DI)
- PADDL 240(R12),X14
- PADDL 64(R12),X0
- PADDL 128(R12),X5
- PADDL 192(R12),X8
- MOVD X14,DX
- MOVD X0,CX
- MOVD X5,R8
- MOVD X8,R9
- PSHUFL $0X39,X14,X14
- PSHUFL $0X39,X0,X0
- PSHUFL $0X39,X5,X5
- PSHUFL $0X39,X8,X8
- XORL 16(SI),DX
- XORL 20(SI),CX
- XORL 24(SI),R8
- XORL 28(SI),R9
- MOVL DX,16(DI)
- MOVL CX,20(DI)
- MOVL R8,24(DI)
- MOVL R9,28(DI)
- MOVD X14,DX
- MOVD X0,CX
- MOVD X5,R8
- MOVD X8,R9
- PSHUFL $0X39,X14,X14
- PSHUFL $0X39,X0,X0
- PSHUFL $0X39,X5,X5
- PSHUFL $0X39,X8,X8
- XORL 80(SI),DX
- XORL 84(SI),CX
- XORL 88(SI),R8
- XORL 92(SI),R9
- MOVL DX,80(DI)
- MOVL CX,84(DI)
- MOVL R8,88(DI)
- MOVL R9,92(DI)
- MOVD X14,DX
- MOVD X0,CX
- MOVD X5,R8
- MOVD X8,R9
- PSHUFL $0X39,X14,X14
- PSHUFL $0X39,X0,X0
- PSHUFL $0X39,X5,X5
- PSHUFL $0X39,X8,X8
- XORL 144(SI),DX
- XORL 148(SI),CX
- XORL 152(SI),R8
- XORL 156(SI),R9
- MOVL DX,144(DI)
- MOVL CX,148(DI)
- MOVL R8,152(DI)
- MOVL R9,156(DI)
- MOVD X14,DX
- MOVD X0,CX
- MOVD X5,R8
- MOVD X8,R9
- XORL 208(SI),DX
- XORL 212(SI),CX
- XORL 216(SI),R8
- XORL 220(SI),R9
- MOVL DX,208(DI)
- MOVL CX,212(DI)
- MOVL R8,216(DI)
- MOVL R9,220(DI)
- PADDL 288(R12),X15
- PADDL 304(R12),X11
- PADDL 80(R12),X1
- PADDL 144(R12),X6
- MOVD X15,DX
- MOVD X11,CX
- MOVD X1,R8
- MOVD X6,R9
- PSHUFL $0X39,X15,X15
- PSHUFL $0X39,X11,X11
- PSHUFL $0X39,X1,X1
- PSHUFL $0X39,X6,X6
- XORL 32(SI),DX
- XORL 36(SI),CX
- XORL 40(SI),R8
- XORL 44(SI),R9
- MOVL DX,32(DI)
- MOVL CX,36(DI)
- MOVL R8,40(DI)
- MOVL R9,44(DI)
- MOVD X15,DX
- MOVD X11,CX
- MOVD X1,R8
- MOVD X6,R9
- PSHUFL $0X39,X15,X15
- PSHUFL $0X39,X11,X11
- PSHUFL $0X39,X1,X1
- PSHUFL $0X39,X6,X6
- XORL 96(SI),DX
- XORL 100(SI),CX
- XORL 104(SI),R8
- XORL 108(SI),R9
- MOVL DX,96(DI)
- MOVL CX,100(DI)
- MOVL R8,104(DI)
- MOVL R9,108(DI)
- MOVD X15,DX
- MOVD X11,CX
- MOVD X1,R8
- MOVD X6,R9
- PSHUFL $0X39,X15,X15
- PSHUFL $0X39,X11,X11
- PSHUFL $0X39,X1,X1
- PSHUFL $0X39,X6,X6
- XORL 160(SI),DX
- XORL 164(SI),CX
- XORL 168(SI),R8
- XORL 172(SI),R9
- MOVL DX,160(DI)
- MOVL CX,164(DI)
- MOVL R8,168(DI)
- MOVL R9,172(DI)
- MOVD X15,DX
- MOVD X11,CX
- MOVD X1,R8
- MOVD X6,R9
- XORL 224(SI),DX
- XORL 228(SI),CX
- XORL 232(SI),R8
- XORL 236(SI),R9
- MOVL DX,224(DI)
- MOVL CX,228(DI)
- MOVL R8,232(DI)
- MOVL R9,236(DI)
- PADDL 160(R12),X13
- PADDL 208(R12),X9
- PADDL 256(R12),X3
- PADDL 96(R12),X2
- MOVD X13,DX
- MOVD X9,CX
- MOVD X3,R8
- MOVD X2,R9
- PSHUFL $0X39,X13,X13
- PSHUFL $0X39,X9,X9
- PSHUFL $0X39,X3,X3
- PSHUFL $0X39,X2,X2
- XORL 48(SI),DX
- XORL 52(SI),CX
- XORL 56(SI),R8
- XORL 60(SI),R9
- MOVL DX,48(DI)
- MOVL CX,52(DI)
- MOVL R8,56(DI)
- MOVL R9,60(DI)
- MOVD X13,DX
- MOVD X9,CX
- MOVD X3,R8
- MOVD X2,R9
- PSHUFL $0X39,X13,X13
- PSHUFL $0X39,X9,X9
- PSHUFL $0X39,X3,X3
- PSHUFL $0X39,X2,X2
- XORL 112(SI),DX
- XORL 116(SI),CX
- XORL 120(SI),R8
- XORL 124(SI),R9
- MOVL DX,112(DI)
- MOVL CX,116(DI)
- MOVL R8,120(DI)
- MOVL R9,124(DI)
- MOVD X13,DX
- MOVD X9,CX
- MOVD X3,R8
- MOVD X2,R9
- PSHUFL $0X39,X13,X13
- PSHUFL $0X39,X9,X9
- PSHUFL $0X39,X3,X3
- PSHUFL $0X39,X2,X2
- XORL 176(SI),DX
- XORL 180(SI),CX
- XORL 184(SI),R8
- XORL 188(SI),R9
- MOVL DX,176(DI)
- MOVL CX,180(DI)
- MOVL R8,184(DI)
- MOVL R9,188(DI)
- MOVD X13,DX
- MOVD X9,CX
- MOVD X3,R8
- MOVD X2,R9
- XORL 240(SI),DX
- XORL 244(SI),CX
- XORL 248(SI),R8
- XORL 252(SI),R9
- MOVL DX,240(DI)
- MOVL CX,244(DI)
- MOVL R8,248(DI)
- MOVL R9,252(DI)
- MOVQ 352(R12),R9
- SUBQ $256,R9
- ADDQ $256,SI
- ADDQ $256,DI
- CMPQ R9,$256
- JAE BYTESATLEAST256
- CMPQ R9,$0
- JBE DONE
- BYTESBETWEEN1AND255:
- CMPQ R9,$64
- JAE NOCOPY
- MOVQ DI,DX
- LEAQ 360(R12),DI
- MOVQ R9,CX
+BYTESBETWEEN1AND255:
+ CMPQ R9, $0x40
+ JAE NOCOPY
+ MOVQ DI, DX
+ LEAQ 360(R12), DI
+ MOVQ R9, CX
REP; MOVSB
- LEAQ 360(R12),DI
- LEAQ 360(R12),SI
- NOCOPY:
- MOVQ R9,352(R12)
- MOVOA 48(R12),X0
- MOVOA 0(R12),X1
- MOVOA 16(R12),X2
- MOVOA 32(R12),X3
- MOVOA X1,X4
- MOVQ $20,CX
- MAINLOOP2:
- PADDL X0,X4
- MOVOA X0,X5
- MOVOA X4,X6
- PSLLL $7,X4
- PSRLL $25,X6
- PXOR X4,X3
- PXOR X6,X3
- PADDL X3,X5
- MOVOA X3,X4
- MOVOA X5,X6
- PSLLL $9,X5
- PSRLL $23,X6
- PXOR X5,X2
- PSHUFL $0X93,X3,X3
- PXOR X6,X2
- PADDL X2,X4
- MOVOA X2,X5
- MOVOA X4,X6
- PSLLL $13,X4
- PSRLL $19,X6
- PXOR X4,X1
- PSHUFL $0X4E,X2,X2
- PXOR X6,X1
- PADDL X1,X5
- MOVOA X3,X4
- MOVOA X5,X6
- PSLLL $18,X5
- PSRLL $14,X6
- PXOR X5,X0
- PSHUFL $0X39,X1,X1
- PXOR X6,X0
- PADDL X0,X4
- MOVOA X0,X5
- MOVOA X4,X6
- PSLLL $7,X4
- PSRLL $25,X6
- PXOR X4,X1
- PXOR X6,X1
- PADDL X1,X5
- MOVOA X1,X4
- MOVOA X5,X6
- PSLLL $9,X5
- PSRLL $23,X6
- PXOR X5,X2
- PSHUFL $0X93,X1,X1
- PXOR X6,X2
- PADDL X2,X4
- MOVOA X2,X5
- MOVOA X4,X6
- PSLLL $13,X4
- PSRLL $19,X6
- PXOR X4,X3
- PSHUFL $0X4E,X2,X2
- PXOR X6,X3
- PADDL X3,X5
- MOVOA X1,X4
- MOVOA X5,X6
- PSLLL $18,X5
- PSRLL $14,X6
- PXOR X5,X0
- PSHUFL $0X39,X3,X3
- PXOR X6,X0
- PADDL X0,X4
- MOVOA X0,X5
- MOVOA X4,X6
- PSLLL $7,X4
- PSRLL $25,X6
- PXOR X4,X3
- PXOR X6,X3
- PADDL X3,X5
- MOVOA X3,X4
- MOVOA X5,X6
- PSLLL $9,X5
- PSRLL $23,X6
- PXOR X5,X2
- PSHUFL $0X93,X3,X3
- PXOR X6,X2
- PADDL X2,X4
- MOVOA X2,X5
- MOVOA X4,X6
- PSLLL $13,X4
- PSRLL $19,X6
- PXOR X4,X1
- PSHUFL $0X4E,X2,X2
- PXOR X6,X1
- PADDL X1,X5
- MOVOA X3,X4
- MOVOA X5,X6
- PSLLL $18,X5
- PSRLL $14,X6
- PXOR X5,X0
- PSHUFL $0X39,X1,X1
- PXOR X6,X0
- PADDL X0,X4
- MOVOA X0,X5
- MOVOA X4,X6
- PSLLL $7,X4
- PSRLL $25,X6
- PXOR X4,X1
- PXOR X6,X1
- PADDL X1,X5
- MOVOA X1,X4
- MOVOA X5,X6
- PSLLL $9,X5
- PSRLL $23,X6
- PXOR X5,X2
- PSHUFL $0X93,X1,X1
- PXOR X6,X2
- PADDL X2,X4
- MOVOA X2,X5
- MOVOA X4,X6
- PSLLL $13,X4
- PSRLL $19,X6
- PXOR X4,X3
- PSHUFL $0X4E,X2,X2
- PXOR X6,X3
- SUBQ $4,CX
- PADDL X3,X5
- MOVOA X1,X4
- MOVOA X5,X6
- PSLLL $18,X5
- PXOR X7,X7
- PSRLL $14,X6
- PXOR X5,X0
- PSHUFL $0X39,X3,X3
- PXOR X6,X0
- JA MAINLOOP2
- PADDL 48(R12),X0
- PADDL 0(R12),X1
- PADDL 16(R12),X2
- PADDL 32(R12),X3
- MOVD X0,CX
- MOVD X1,R8
- MOVD X2,R9
- MOVD X3,AX
- PSHUFL $0X39,X0,X0
- PSHUFL $0X39,X1,X1
- PSHUFL $0X39,X2,X2
- PSHUFL $0X39,X3,X3
- XORL 0(SI),CX
- XORL 48(SI),R8
- XORL 32(SI),R9
- XORL 16(SI),AX
- MOVL CX,0(DI)
- MOVL R8,48(DI)
- MOVL R9,32(DI)
- MOVL AX,16(DI)
- MOVD X0,CX
- MOVD X1,R8
- MOVD X2,R9
- MOVD X3,AX
- PSHUFL $0X39,X0,X0
- PSHUFL $0X39,X1,X1
- PSHUFL $0X39,X2,X2
- PSHUFL $0X39,X3,X3
- XORL 20(SI),CX
- XORL 4(SI),R8
- XORL 52(SI),R9
- XORL 36(SI),AX
- MOVL CX,20(DI)
- MOVL R8,4(DI)
- MOVL R9,52(DI)
- MOVL AX,36(DI)
- MOVD X0,CX
- MOVD X1,R8
- MOVD X2,R9
- MOVD X3,AX
- PSHUFL $0X39,X0,X0
- PSHUFL $0X39,X1,X1
- PSHUFL $0X39,X2,X2
- PSHUFL $0X39,X3,X3
- XORL 40(SI),CX
- XORL 24(SI),R8
- XORL 8(SI),R9
- XORL 56(SI),AX
- MOVL CX,40(DI)
- MOVL R8,24(DI)
- MOVL R9,8(DI)
- MOVL AX,56(DI)
- MOVD X0,CX
- MOVD X1,R8
- MOVD X2,R9
- MOVD X3,AX
- XORL 60(SI),CX
- XORL 44(SI),R8
- XORL 28(SI),R9
- XORL 12(SI),AX
- MOVL CX,60(DI)
- MOVL R8,44(DI)
- MOVL R9,28(DI)
- MOVL AX,12(DI)
- MOVQ 352(R12),R9
- MOVL 16(R12),CX
- MOVL 36 (R12),R8
- ADDQ $1,CX
- SHLQ $32,R8
- ADDQ R8,CX
- MOVQ CX,R8
- SHRQ $32,R8
- MOVL CX,16(R12)
- MOVL R8, 36 (R12)
- CMPQ R9,$64
- JA BYTESATLEAST65
- JAE BYTESATLEAST64
- MOVQ DI,SI
- MOVQ DX,DI
- MOVQ R9,CX
+ LEAQ 360(R12), DI
+ LEAQ 360(R12), SI
+
+NOCOPY:
+ MOVQ R9, 352(R12)
+ MOVOA 48(R12), X0
+ MOVOA (R12), X1
+ MOVOA 16(R12), X2
+ MOVOA 32(R12), X3
+ MOVOA X1, X4
+ MOVQ $0x00000014, CX
+
+MAINLOOP2:
+ PADDL X0, X4
+ MOVOA X0, X5
+ MOVOA X4, X6
+ PSLLL $0x07, X4
+ PSRLL $0x19, X6
+ PXOR X4, X3
+ PXOR X6, X3
+ PADDL X3, X5
+ MOVOA X3, X4
+ MOVOA X5, X6
+ PSLLL $0x09, X5
+ PSRLL $0x17, X6
+ PXOR X5, X2
+ PSHUFL $0x93, X3, X3
+ PXOR X6, X2
+ PADDL X2, X4
+ MOVOA X2, X5
+ MOVOA X4, X6
+ PSLLL $0x0d, X4
+ PSRLL $0x13, X6
+ PXOR X4, X1
+ PSHUFL $0x4e, X2, X2
+ PXOR X6, X1
+ PADDL X1, X5
+ MOVOA X3, X4
+ MOVOA X5, X6
+ PSLLL $0x12, X5
+ PSRLL $0x0e, X6
+ PXOR X5, X0
+ PSHUFL $0x39, X1, X1
+ PXOR X6, X0
+ PADDL X0, X4
+ MOVOA X0, X5
+ MOVOA X4, X6
+ PSLLL $0x07, X4
+ PSRLL $0x19, X6
+ PXOR X4, X1
+ PXOR X6, X1
+ PADDL X1, X5
+ MOVOA X1, X4
+ MOVOA X5, X6
+ PSLLL $0x09, X5
+ PSRLL $0x17, X6
+ PXOR X5, X2
+ PSHUFL $0x93, X1, X1
+ PXOR X6, X2
+ PADDL X2, X4
+ MOVOA X2, X5
+ MOVOA X4, X6
+ PSLLL $0x0d, X4
+ PSRLL $0x13, X6
+ PXOR X4, X3
+ PSHUFL $0x4e, X2, X2
+ PXOR X6, X3
+ PADDL X3, X5
+ MOVOA X1, X4
+ MOVOA X5, X6
+ PSLLL $0x12, X5
+ PSRLL $0x0e, X6
+ PXOR X5, X0
+ PSHUFL $0x39, X3, X3
+ PXOR X6, X0
+ PADDL X0, X4
+ MOVOA X0, X5
+ MOVOA X4, X6
+ PSLLL $0x07, X4
+ PSRLL $0x19, X6
+ PXOR X4, X3
+ PXOR X6, X3
+ PADDL X3, X5
+ MOVOA X3, X4
+ MOVOA X5, X6
+ PSLLL $0x09, X5
+ PSRLL $0x17, X6
+ PXOR X5, X2
+ PSHUFL $0x93, X3, X3
+ PXOR X6, X2
+ PADDL X2, X4
+ MOVOA X2, X5
+ MOVOA X4, X6
+ PSLLL $0x0d, X4
+ PSRLL $0x13, X6
+ PXOR X4, X1
+ PSHUFL $0x4e, X2, X2
+ PXOR X6, X1
+ PADDL X1, X5
+ MOVOA X3, X4
+ MOVOA X5, X6
+ PSLLL $0x12, X5
+ PSRLL $0x0e, X6
+ PXOR X5, X0
+ PSHUFL $0x39, X1, X1
+ PXOR X6, X0
+ PADDL X0, X4
+ MOVOA X0, X5
+ MOVOA X4, X6
+ PSLLL $0x07, X4
+ PSRLL $0x19, X6
+ PXOR X4, X1
+ PXOR X6, X1
+ PADDL X1, X5
+ MOVOA X1, X4
+ MOVOA X5, X6
+ PSLLL $0x09, X5
+ PSRLL $0x17, X6
+ PXOR X5, X2
+ PSHUFL $0x93, X1, X1
+ PXOR X6, X2
+ PADDL X2, X4
+ MOVOA X2, X5
+ MOVOA X4, X6
+ PSLLL $0x0d, X4
+ PSRLL $0x13, X6
+ PXOR X4, X3
+ PSHUFL $0x4e, X2, X2
+ PXOR X6, X3
+ SUBQ $0x04, CX
+ PADDL X3, X5
+ MOVOA X1, X4
+ MOVOA X5, X6
+ PSLLL $0x12, X5
+ PXOR X7, X7
+ PSRLL $0x0e, X6
+ PXOR X5, X0
+ PSHUFL $0x39, X3, X3
+ PXOR X6, X0
+ JA MAINLOOP2
+ PADDL 48(R12), X0
+ PADDL (R12), X1
+ PADDL 16(R12), X2
+ PADDL 32(R12), X3
+ MOVD X0, CX
+ MOVD X1, R8
+ MOVD X2, R9
+ MOVD X3, AX
+ PSHUFL $0x39, X0, X0
+ PSHUFL $0x39, X1, X1
+ PSHUFL $0x39, X2, X2
+ PSHUFL $0x39, X3, X3
+ XORL (SI), CX
+ XORL 48(SI), R8
+ XORL 32(SI), R9
+ XORL 16(SI), AX
+ MOVL CX, (DI)
+ MOVL R8, 48(DI)
+ MOVL R9, 32(DI)
+ MOVL AX, 16(DI)
+ MOVD X0, CX
+ MOVD X1, R8
+ MOVD X2, R9
+ MOVD X3, AX
+ PSHUFL $0x39, X0, X0
+ PSHUFL $0x39, X1, X1
+ PSHUFL $0x39, X2, X2
+ PSHUFL $0x39, X3, X3
+ XORL 20(SI), CX
+ XORL 4(SI), R8
+ XORL 52(SI), R9
+ XORL 36(SI), AX
+ MOVL CX, 20(DI)
+ MOVL R8, 4(DI)
+ MOVL R9, 52(DI)
+ MOVL AX, 36(DI)
+ MOVD X0, CX
+ MOVD X1, R8
+ MOVD X2, R9
+ MOVD X3, AX
+ PSHUFL $0x39, X0, X0
+ PSHUFL $0x39, X1, X1
+ PSHUFL $0x39, X2, X2
+ PSHUFL $0x39, X3, X3
+ XORL 40(SI), CX
+ XORL 24(SI), R8
+ XORL 8(SI), R9
+ XORL 56(SI), AX
+ MOVL CX, 40(DI)
+ MOVL R8, 24(DI)
+ MOVL R9, 8(DI)
+ MOVL AX, 56(DI)
+ MOVD X0, CX
+ MOVD X1, R8
+ MOVD X2, R9
+ MOVD X3, AX
+ XORL 60(SI), CX
+ XORL 44(SI), R8
+ XORL 28(SI), R9
+ XORL 12(SI), AX
+ MOVL CX, 60(DI)
+ MOVL R8, 44(DI)
+ MOVL R9, 28(DI)
+ MOVL AX, 12(DI)
+ MOVQ 352(R12), R9
+ MOVL 16(R12), CX
+ MOVL 36(R12), R8
+ ADDQ $0x01, CX
+ SHLQ $0x20, R8
+ ADDQ R8, CX
+ MOVQ CX, R8
+ SHRQ $0x20, R8
+ MOVL CX, 16(R12)
+ MOVL R8, 36(R12)
+ CMPQ R9, $0x40
+ JA BYTESATLEAST65
+ JAE BYTESATLEAST64
+ MOVQ DI, SI
+ MOVQ DX, DI
+ MOVQ R9, CX
REP; MOVSB
- BYTESATLEAST64:
- DONE:
+
+BYTESATLEAST64:
+DONE:
RET
- BYTESATLEAST65:
- SUBQ $64,R9
- ADDQ $64,DI
- ADDQ $64,SI
- JMP BYTESBETWEEN1AND255
+
+BYTESATLEAST65:
+ SUBQ $0x40, R9
+ ADDQ $0x40, DI
+ ADDQ $0x40, SI
+ JMP BYTESBETWEEN1AND255
diff --git a/vendor/golang.org/x/crypto/sha3/doc.go b/vendor/golang.org/x/crypto/sha3/doc.go
index decd8cf9bf..bbf391fe6e 100644
--- a/vendor/golang.org/x/crypto/sha3/doc.go
+++ b/vendor/golang.org/x/crypto/sha3/doc.go
@@ -5,6 +5,10 @@
// Package sha3 implements the SHA-3 fixed-output-length hash functions and
// the SHAKE variable-output-length hash functions defined by FIPS-202.
//
+// All types in this package also implement [encoding.BinaryMarshaler],
+// [encoding.BinaryAppender] and [encoding.BinaryUnmarshaler] to marshal and
+// unmarshal the internal state of the hash.
+//
// Both types of hash function use the "sponge" construction and the Keccak
// permutation. For a detailed specification see http://keccak.noekeon.org/
//
@@ -59,4 +63,4 @@
// They produce output of the same length, with the same security strengths
// against all attacks. This means, in particular, that SHA3-256 only has
// 128-bit collision resistance, because its output length is 32 bytes.
-package sha3 // import "golang.org/x/crypto/sha3"
+package sha3
diff --git a/vendor/golang.org/x/crypto/sha3/hashes.go b/vendor/golang.org/x/crypto/sha3/hashes.go
index 5eae6cb922..31fffbe044 100644
--- a/vendor/golang.org/x/crypto/sha3/hashes.go
+++ b/vendor/golang.org/x/crypto/sha3/hashes.go
@@ -9,6 +9,7 @@ package sha3
// bytes.
import (
+ "crypto"
"hash"
)
@@ -40,33 +41,59 @@ func New512() hash.Hash {
return new512()
}
+func init() {
+ crypto.RegisterHash(crypto.SHA3_224, New224)
+ crypto.RegisterHash(crypto.SHA3_256, New256)
+ crypto.RegisterHash(crypto.SHA3_384, New384)
+ crypto.RegisterHash(crypto.SHA3_512, New512)
+}
+
+const (
+ dsbyteSHA3 = 0b00000110
+ dsbyteKeccak = 0b00000001
+ dsbyteShake = 0b00011111
+ dsbyteCShake = 0b00000100
+
+ // rateK[c] is the rate in bytes for Keccak[c] where c is the capacity in
+ // bits. Given the sponge size is 1600 bits, the rate is 1600 - c bits.
+ rateK256 = (1600 - 256) / 8
+ rateK448 = (1600 - 448) / 8
+ rateK512 = (1600 - 512) / 8
+ rateK768 = (1600 - 768) / 8
+ rateK1024 = (1600 - 1024) / 8
+)
+
func new224Generic() *state {
- return &state{rate: 144, outputLen: 28, dsbyte: 0x06}
+ return &state{rate: rateK448, outputLen: 28, dsbyte: dsbyteSHA3}
}
func new256Generic() *state {
- return &state{rate: 136, outputLen: 32, dsbyte: 0x06}
+ return &state{rate: rateK512, outputLen: 32, dsbyte: dsbyteSHA3}
}
func new384Generic() *state {
- return &state{rate: 104, outputLen: 48, dsbyte: 0x06}
+ return &state{rate: rateK768, outputLen: 48, dsbyte: dsbyteSHA3}
}
func new512Generic() *state {
- return &state{rate: 72, outputLen: 64, dsbyte: 0x06}
+ return &state{rate: rateK1024, outputLen: 64, dsbyte: dsbyteSHA3}
}
// NewLegacyKeccak256 creates a new Keccak-256 hash.
//
// Only use this function if you require compatibility with an existing cryptosystem
// that uses non-standard padding. All other users should use New256 instead.
-func NewLegacyKeccak256() hash.Hash { return &state{rate: 136, outputLen: 32, dsbyte: 0x01} }
+func NewLegacyKeccak256() hash.Hash {
+ return &state{rate: rateK512, outputLen: 32, dsbyte: dsbyteKeccak}
+}
// NewLegacyKeccak512 creates a new Keccak-512 hash.
//
// Only use this function if you require compatibility with an existing cryptosystem
// that uses non-standard padding. All other users should use New512 instead.
-func NewLegacyKeccak512() hash.Hash { return &state{rate: 72, outputLen: 64, dsbyte: 0x01} }
+func NewLegacyKeccak512() hash.Hash {
+ return &state{rate: rateK1024, outputLen: 64, dsbyte: dsbyteKeccak}
+}
// Sum224 returns the SHA3-224 digest of the data.
func Sum224(data []byte) (digest [28]byte) {
diff --git a/vendor/golang.org/x/crypto/sha3/keccakf_amd64.s b/vendor/golang.org/x/crypto/sha3/keccakf_amd64.s
index 1f53938861..99e2f16e97 100644
--- a/vendor/golang.org/x/crypto/sha3/keccakf_amd64.s
+++ b/vendor/golang.org/x/crypto/sha3/keccakf_amd64.s
@@ -1,390 +1,5419 @@
-// Copyright 2015 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
+// Code generated by command: go run keccakf_amd64_asm.go -out ../keccakf_amd64.s -pkg sha3. DO NOT EDIT.
//go:build amd64 && !purego && gc
-// This code was translated into a form compatible with 6a from the public
-// domain sources at https://github.com/gvanas/KeccakCodePackage
-
-// Offsets in state
-#define _ba (0*8)
-#define _be (1*8)
-#define _bi (2*8)
-#define _bo (3*8)
-#define _bu (4*8)
-#define _ga (5*8)
-#define _ge (6*8)
-#define _gi (7*8)
-#define _go (8*8)
-#define _gu (9*8)
-#define _ka (10*8)
-#define _ke (11*8)
-#define _ki (12*8)
-#define _ko (13*8)
-#define _ku (14*8)
-#define _ma (15*8)
-#define _me (16*8)
-#define _mi (17*8)
-#define _mo (18*8)
-#define _mu (19*8)
-#define _sa (20*8)
-#define _se (21*8)
-#define _si (22*8)
-#define _so (23*8)
-#define _su (24*8)
-
-// Temporary registers
-#define rT1 AX
-
-// Round vars
-#define rpState DI
-#define rpStack SP
-
-#define rDa BX
-#define rDe CX
-#define rDi DX
-#define rDo R8
-#define rDu R9
-
-#define rBa R10
-#define rBe R11
-#define rBi R12
-#define rBo R13
-#define rBu R14
-
-#define rCa SI
-#define rCe BP
-#define rCi rBi
-#define rCo rBo
-#define rCu R15
-
-#define MOVQ_RBI_RCE MOVQ rBi, rCe
-#define XORQ_RT1_RCA XORQ rT1, rCa
-#define XORQ_RT1_RCE XORQ rT1, rCe
-#define XORQ_RBA_RCU XORQ rBa, rCu
-#define XORQ_RBE_RCU XORQ rBe, rCu
-#define XORQ_RDU_RCU XORQ rDu, rCu
-#define XORQ_RDA_RCA XORQ rDa, rCa
-#define XORQ_RDE_RCE XORQ rDe, rCe
-
-#define mKeccakRound(iState, oState, rc, B_RBI_RCE, G_RT1_RCA, G_RT1_RCE, G_RBA_RCU, K_RT1_RCA, K_RT1_RCE, K_RBA_RCU, M_RT1_RCA, M_RT1_RCE, M_RBE_RCU, S_RDU_RCU, S_RDA_RCA, S_RDE_RCE) \
- /* Prepare round */ \
- MOVQ rCe, rDa; \
- ROLQ $1, rDa; \
- \
- MOVQ _bi(iState), rCi; \
- XORQ _gi(iState), rDi; \
- XORQ rCu, rDa; \
- XORQ _ki(iState), rCi; \
- XORQ _mi(iState), rDi; \
- XORQ rDi, rCi; \
- \
- MOVQ rCi, rDe; \
- ROLQ $1, rDe; \
- \
- MOVQ _bo(iState), rCo; \
- XORQ _go(iState), rDo; \
- XORQ rCa, rDe; \
- XORQ _ko(iState), rCo; \
- XORQ _mo(iState), rDo; \
- XORQ rDo, rCo; \
- \
- MOVQ rCo, rDi; \
- ROLQ $1, rDi; \
- \
- MOVQ rCu, rDo; \
- XORQ rCe, rDi; \
- ROLQ $1, rDo; \
- \
- MOVQ rCa, rDu; \
- XORQ rCi, rDo; \
- ROLQ $1, rDu; \
- \
- /* Result b */ \
- MOVQ _ba(iState), rBa; \
- MOVQ _ge(iState), rBe; \
- XORQ rCo, rDu; \
- MOVQ _ki(iState), rBi; \
- MOVQ _mo(iState), rBo; \
- MOVQ _su(iState), rBu; \
- XORQ rDe, rBe; \
- ROLQ $44, rBe; \
- XORQ rDi, rBi; \
- XORQ rDa, rBa; \
- ROLQ $43, rBi; \
- \
- MOVQ rBe, rCa; \
- MOVQ rc, rT1; \
- ORQ rBi, rCa; \
- XORQ rBa, rT1; \
- XORQ rT1, rCa; \
- MOVQ rCa, _ba(oState); \
- \
- XORQ rDu, rBu; \
- ROLQ $14, rBu; \
- MOVQ rBa, rCu; \
- ANDQ rBe, rCu; \
- XORQ rBu, rCu; \
- MOVQ rCu, _bu(oState); \
- \
- XORQ rDo, rBo; \
- ROLQ $21, rBo; \
- MOVQ rBo, rT1; \
- ANDQ rBu, rT1; \
- XORQ rBi, rT1; \
- MOVQ rT1, _bi(oState); \
- \
- NOTQ rBi; \
- ORQ rBa, rBu; \
- ORQ rBo, rBi; \
- XORQ rBo, rBu; \
- XORQ rBe, rBi; \
- MOVQ rBu, _bo(oState); \
- MOVQ rBi, _be(oState); \
- B_RBI_RCE; \
- \
- /* Result g */ \
- MOVQ _gu(iState), rBe; \
- XORQ rDu, rBe; \
- MOVQ _ka(iState), rBi; \
- ROLQ $20, rBe; \
- XORQ rDa, rBi; \
- ROLQ $3, rBi; \
- MOVQ _bo(iState), rBa; \
- MOVQ rBe, rT1; \
- ORQ rBi, rT1; \
- XORQ rDo, rBa; \
- MOVQ _me(iState), rBo; \
- MOVQ _si(iState), rBu; \
- ROLQ $28, rBa; \
- XORQ rBa, rT1; \
- MOVQ rT1, _ga(oState); \
- G_RT1_RCA; \
- \
- XORQ rDe, rBo; \
- ROLQ $45, rBo; \
- MOVQ rBi, rT1; \
- ANDQ rBo, rT1; \
- XORQ rBe, rT1; \
- MOVQ rT1, _ge(oState); \
- G_RT1_RCE; \
- \
- XORQ rDi, rBu; \
- ROLQ $61, rBu; \
- MOVQ rBu, rT1; \
- ORQ rBa, rT1; \
- XORQ rBo, rT1; \
- MOVQ rT1, _go(oState); \
- \
- ANDQ rBe, rBa; \
- XORQ rBu, rBa; \
- MOVQ rBa, _gu(oState); \
- NOTQ rBu; \
- G_RBA_RCU; \
- \
- ORQ rBu, rBo; \
- XORQ rBi, rBo; \
- MOVQ rBo, _gi(oState); \
- \
- /* Result k */ \
- MOVQ _be(iState), rBa; \
- MOVQ _gi(iState), rBe; \
- MOVQ _ko(iState), rBi; \
- MOVQ _mu(iState), rBo; \
- MOVQ _sa(iState), rBu; \
- XORQ rDi, rBe; \
- ROLQ $6, rBe; \
- XORQ rDo, rBi; \
- ROLQ $25, rBi; \
- MOVQ rBe, rT1; \
- ORQ rBi, rT1; \
- XORQ rDe, rBa; \
- ROLQ $1, rBa; \
- XORQ rBa, rT1; \
- MOVQ rT1, _ka(oState); \
- K_RT1_RCA; \
- \
- XORQ rDu, rBo; \
- ROLQ $8, rBo; \
- MOVQ rBi, rT1; \
- ANDQ rBo, rT1; \
- XORQ rBe, rT1; \
- MOVQ rT1, _ke(oState); \
- K_RT1_RCE; \
- \
- XORQ rDa, rBu; \
- ROLQ $18, rBu; \
- NOTQ rBo; \
- MOVQ rBo, rT1; \
- ANDQ rBu, rT1; \
- XORQ rBi, rT1; \
- MOVQ rT1, _ki(oState); \
- \
- MOVQ rBu, rT1; \
- ORQ rBa, rT1; \
- XORQ rBo, rT1; \
- MOVQ rT1, _ko(oState); \
- \
- ANDQ rBe, rBa; \
- XORQ rBu, rBa; \
- MOVQ rBa, _ku(oState); \
- K_RBA_RCU; \
- \
- /* Result m */ \
- MOVQ _ga(iState), rBe; \
- XORQ rDa, rBe; \
- MOVQ _ke(iState), rBi; \
- ROLQ $36, rBe; \
- XORQ rDe, rBi; \
- MOVQ _bu(iState), rBa; \
- ROLQ $10, rBi; \
- MOVQ rBe, rT1; \
- MOVQ _mi(iState), rBo; \
- ANDQ rBi, rT1; \
- XORQ rDu, rBa; \
- MOVQ _so(iState), rBu; \
- ROLQ $27, rBa; \
- XORQ rBa, rT1; \
- MOVQ rT1, _ma(oState); \
- M_RT1_RCA; \
- \
- XORQ rDi, rBo; \
- ROLQ $15, rBo; \
- MOVQ rBi, rT1; \
- ORQ rBo, rT1; \
- XORQ rBe, rT1; \
- MOVQ rT1, _me(oState); \
- M_RT1_RCE; \
- \
- XORQ rDo, rBu; \
- ROLQ $56, rBu; \
- NOTQ rBo; \
- MOVQ rBo, rT1; \
- ORQ rBu, rT1; \
- XORQ rBi, rT1; \
- MOVQ rT1, _mi(oState); \
- \
- ORQ rBa, rBe; \
- XORQ rBu, rBe; \
- MOVQ rBe, _mu(oState); \
- \
- ANDQ rBa, rBu; \
- XORQ rBo, rBu; \
- MOVQ rBu, _mo(oState); \
- M_RBE_RCU; \
- \
- /* Result s */ \
- MOVQ _bi(iState), rBa; \
- MOVQ _go(iState), rBe; \
- MOVQ _ku(iState), rBi; \
- XORQ rDi, rBa; \
- MOVQ _ma(iState), rBo; \
- ROLQ $62, rBa; \
- XORQ rDo, rBe; \
- MOVQ _se(iState), rBu; \
- ROLQ $55, rBe; \
- \
- XORQ rDu, rBi; \
- MOVQ rBa, rDu; \
- XORQ rDe, rBu; \
- ROLQ $2, rBu; \
- ANDQ rBe, rDu; \
- XORQ rBu, rDu; \
- MOVQ rDu, _su(oState); \
- \
- ROLQ $39, rBi; \
- S_RDU_RCU; \
- NOTQ rBe; \
- XORQ rDa, rBo; \
- MOVQ rBe, rDa; \
- ANDQ rBi, rDa; \
- XORQ rBa, rDa; \
- MOVQ rDa, _sa(oState); \
- S_RDA_RCA; \
- \
- ROLQ $41, rBo; \
- MOVQ rBi, rDe; \
- ORQ rBo, rDe; \
- XORQ rBe, rDe; \
- MOVQ rDe, _se(oState); \
- S_RDE_RCE; \
- \
- MOVQ rBo, rDi; \
- MOVQ rBu, rDo; \
- ANDQ rBu, rDi; \
- ORQ rBa, rDo; \
- XORQ rBi, rDi; \
- XORQ rBo, rDo; \
- MOVQ rDi, _si(oState); \
- MOVQ rDo, _so(oState) \
-
// func keccakF1600(a *[25]uint64)
-TEXT ·keccakF1600(SB), 0, $200-8
- MOVQ a+0(FP), rpState
+TEXT ·keccakF1600(SB), $200-8
+ MOVQ a+0(FP), DI
// Convert the user state into an internal state
- NOTQ _be(rpState)
- NOTQ _bi(rpState)
- NOTQ _go(rpState)
- NOTQ _ki(rpState)
- NOTQ _mi(rpState)
- NOTQ _sa(rpState)
+ NOTQ 8(DI)
+ NOTQ 16(DI)
+ NOTQ 64(DI)
+ NOTQ 96(DI)
+ NOTQ 136(DI)
+ NOTQ 160(DI)
// Execute the KeccakF permutation
- MOVQ _ba(rpState), rCa
- MOVQ _be(rpState), rCe
- MOVQ _bu(rpState), rCu
-
- XORQ _ga(rpState), rCa
- XORQ _ge(rpState), rCe
- XORQ _gu(rpState), rCu
-
- XORQ _ka(rpState), rCa
- XORQ _ke(rpState), rCe
- XORQ _ku(rpState), rCu
-
- XORQ _ma(rpState), rCa
- XORQ _me(rpState), rCe
- XORQ _mu(rpState), rCu
-
- XORQ _sa(rpState), rCa
- XORQ _se(rpState), rCe
- MOVQ _si(rpState), rDi
- MOVQ _so(rpState), rDo
- XORQ _su(rpState), rCu
-
- mKeccakRound(rpState, rpStack, $0x0000000000000001, MOVQ_RBI_RCE, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBE_RCU, XORQ_RDU_RCU, XORQ_RDA_RCA, XORQ_RDE_RCE)
- mKeccakRound(rpStack, rpState, $0x0000000000008082, MOVQ_RBI_RCE, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBE_RCU, XORQ_RDU_RCU, XORQ_RDA_RCA, XORQ_RDE_RCE)
- mKeccakRound(rpState, rpStack, $0x800000000000808a, MOVQ_RBI_RCE, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBE_RCU, XORQ_RDU_RCU, XORQ_RDA_RCA, XORQ_RDE_RCE)
- mKeccakRound(rpStack, rpState, $0x8000000080008000, MOVQ_RBI_RCE, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBE_RCU, XORQ_RDU_RCU, XORQ_RDA_RCA, XORQ_RDE_RCE)
- mKeccakRound(rpState, rpStack, $0x000000000000808b, MOVQ_RBI_RCE, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBE_RCU, XORQ_RDU_RCU, XORQ_RDA_RCA, XORQ_RDE_RCE)
- mKeccakRound(rpStack, rpState, $0x0000000080000001, MOVQ_RBI_RCE, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBE_RCU, XORQ_RDU_RCU, XORQ_RDA_RCA, XORQ_RDE_RCE)
- mKeccakRound(rpState, rpStack, $0x8000000080008081, MOVQ_RBI_RCE, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBE_RCU, XORQ_RDU_RCU, XORQ_RDA_RCA, XORQ_RDE_RCE)
- mKeccakRound(rpStack, rpState, $0x8000000000008009, MOVQ_RBI_RCE, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBE_RCU, XORQ_RDU_RCU, XORQ_RDA_RCA, XORQ_RDE_RCE)
- mKeccakRound(rpState, rpStack, $0x000000000000008a, MOVQ_RBI_RCE, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBE_RCU, XORQ_RDU_RCU, XORQ_RDA_RCA, XORQ_RDE_RCE)
- mKeccakRound(rpStack, rpState, $0x0000000000000088, MOVQ_RBI_RCE, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBE_RCU, XORQ_RDU_RCU, XORQ_RDA_RCA, XORQ_RDE_RCE)
- mKeccakRound(rpState, rpStack, $0x0000000080008009, MOVQ_RBI_RCE, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBE_RCU, XORQ_RDU_RCU, XORQ_RDA_RCA, XORQ_RDE_RCE)
- mKeccakRound(rpStack, rpState, $0x000000008000000a, MOVQ_RBI_RCE, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBE_RCU, XORQ_RDU_RCU, XORQ_RDA_RCA, XORQ_RDE_RCE)
- mKeccakRound(rpState, rpStack, $0x000000008000808b, MOVQ_RBI_RCE, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBE_RCU, XORQ_RDU_RCU, XORQ_RDA_RCA, XORQ_RDE_RCE)
- mKeccakRound(rpStack, rpState, $0x800000000000008b, MOVQ_RBI_RCE, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBE_RCU, XORQ_RDU_RCU, XORQ_RDA_RCA, XORQ_RDE_RCE)
- mKeccakRound(rpState, rpStack, $0x8000000000008089, MOVQ_RBI_RCE, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBE_RCU, XORQ_RDU_RCU, XORQ_RDA_RCA, XORQ_RDE_RCE)
- mKeccakRound(rpStack, rpState, $0x8000000000008003, MOVQ_RBI_RCE, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBE_RCU, XORQ_RDU_RCU, XORQ_RDA_RCA, XORQ_RDE_RCE)
- mKeccakRound(rpState, rpStack, $0x8000000000008002, MOVQ_RBI_RCE, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBE_RCU, XORQ_RDU_RCU, XORQ_RDA_RCA, XORQ_RDE_RCE)
- mKeccakRound(rpStack, rpState, $0x8000000000000080, MOVQ_RBI_RCE, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBE_RCU, XORQ_RDU_RCU, XORQ_RDA_RCA, XORQ_RDE_RCE)
- mKeccakRound(rpState, rpStack, $0x000000000000800a, MOVQ_RBI_RCE, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBE_RCU, XORQ_RDU_RCU, XORQ_RDA_RCA, XORQ_RDE_RCE)
- mKeccakRound(rpStack, rpState, $0x800000008000000a, MOVQ_RBI_RCE, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBE_RCU, XORQ_RDU_RCU, XORQ_RDA_RCA, XORQ_RDE_RCE)
- mKeccakRound(rpState, rpStack, $0x8000000080008081, MOVQ_RBI_RCE, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBE_RCU, XORQ_RDU_RCU, XORQ_RDA_RCA, XORQ_RDE_RCE)
- mKeccakRound(rpStack, rpState, $0x8000000000008080, MOVQ_RBI_RCE, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBE_RCU, XORQ_RDU_RCU, XORQ_RDA_RCA, XORQ_RDE_RCE)
- mKeccakRound(rpState, rpStack, $0x0000000080000001, MOVQ_RBI_RCE, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBA_RCU, XORQ_RT1_RCA, XORQ_RT1_RCE, XORQ_RBE_RCU, XORQ_RDU_RCU, XORQ_RDA_RCA, XORQ_RDE_RCE)
- mKeccakRound(rpStack, rpState, $0x8000000080008008, NOP, NOP, NOP, NOP, NOP, NOP, NOP, NOP, NOP, NOP, NOP, NOP, NOP)
+ MOVQ (DI), SI
+ MOVQ 8(DI), BP
+ MOVQ 32(DI), R15
+ XORQ 40(DI), SI
+ XORQ 48(DI), BP
+ XORQ 72(DI), R15
+ XORQ 80(DI), SI
+ XORQ 88(DI), BP
+ XORQ 112(DI), R15
+ XORQ 120(DI), SI
+ XORQ 128(DI), BP
+ XORQ 152(DI), R15
+ XORQ 160(DI), SI
+ XORQ 168(DI), BP
+ MOVQ 176(DI), DX
+ MOVQ 184(DI), R8
+ XORQ 192(DI), R15
- // Revert the internal state to the user state
- NOTQ _be(rpState)
- NOTQ _bi(rpState)
- NOTQ _go(rpState)
- NOTQ _ki(rpState)
- NOTQ _mi(rpState)
- NOTQ _sa(rpState)
+ // Prepare round
+ MOVQ BP, BX
+ ROLQ $0x01, BX
+ MOVQ 16(DI), R12
+ XORQ 56(DI), DX
+ XORQ R15, BX
+ XORQ 96(DI), R12
+ XORQ 136(DI), DX
+ XORQ DX, R12
+ MOVQ R12, CX
+ ROLQ $0x01, CX
+ MOVQ 24(DI), R13
+ XORQ 64(DI), R8
+ XORQ SI, CX
+ XORQ 104(DI), R13
+ XORQ 144(DI), R8
+ XORQ R8, R13
+ MOVQ R13, DX
+ ROLQ $0x01, DX
+ MOVQ R15, R8
+ XORQ BP, DX
+ ROLQ $0x01, R8
+ MOVQ SI, R9
+ XORQ R12, R8
+ ROLQ $0x01, R9
+
+ // Result b
+ MOVQ (DI), R10
+ MOVQ 48(DI), R11
+ XORQ R13, R9
+ MOVQ 96(DI), R12
+ MOVQ 144(DI), R13
+ MOVQ 192(DI), R14
+ XORQ CX, R11
+ ROLQ $0x2c, R11
+ XORQ DX, R12
+ XORQ BX, R10
+ ROLQ $0x2b, R12
+ MOVQ R11, SI
+ MOVQ $0x0000000000000001, AX
+ ORQ R12, SI
+ XORQ R10, AX
+ XORQ AX, SI
+ MOVQ SI, (SP)
+ XORQ R9, R14
+ ROLQ $0x0e, R14
+ MOVQ R10, R15
+ ANDQ R11, R15
+ XORQ R14, R15
+ MOVQ R15, 32(SP)
+ XORQ R8, R13
+ ROLQ $0x15, R13
+ MOVQ R13, AX
+ ANDQ R14, AX
+ XORQ R12, AX
+ MOVQ AX, 16(SP)
+ NOTQ R12
+ ORQ R10, R14
+ ORQ R13, R12
+ XORQ R13, R14
+ XORQ R11, R12
+ MOVQ R14, 24(SP)
+ MOVQ R12, 8(SP)
+ MOVQ R12, BP
+
+ // Result g
+ MOVQ 72(DI), R11
+ XORQ R9, R11
+ MOVQ 80(DI), R12
+ ROLQ $0x14, R11
+ XORQ BX, R12
+ ROLQ $0x03, R12
+ MOVQ 24(DI), R10
+ MOVQ R11, AX
+ ORQ R12, AX
+ XORQ R8, R10
+ MOVQ 128(DI), R13
+ MOVQ 176(DI), R14
+ ROLQ $0x1c, R10
+ XORQ R10, AX
+ MOVQ AX, 40(SP)
+ XORQ AX, SI
+ XORQ CX, R13
+ ROLQ $0x2d, R13
+ MOVQ R12, AX
+ ANDQ R13, AX
+ XORQ R11, AX
+ MOVQ AX, 48(SP)
+ XORQ AX, BP
+ XORQ DX, R14
+ ROLQ $0x3d, R14
+ MOVQ R14, AX
+ ORQ R10, AX
+ XORQ R13, AX
+ MOVQ AX, 64(SP)
+ ANDQ R11, R10
+ XORQ R14, R10
+ MOVQ R10, 72(SP)
+ NOTQ R14
+ XORQ R10, R15
+ ORQ R14, R13
+ XORQ R12, R13
+ MOVQ R13, 56(SP)
+
+ // Result k
+ MOVQ 8(DI), R10
+ MOVQ 56(DI), R11
+ MOVQ 104(DI), R12
+ MOVQ 152(DI), R13
+ MOVQ 160(DI), R14
+ XORQ DX, R11
+ ROLQ $0x06, R11
+ XORQ R8, R12
+ ROLQ $0x19, R12
+ MOVQ R11, AX
+ ORQ R12, AX
+ XORQ CX, R10
+ ROLQ $0x01, R10
+ XORQ R10, AX
+ MOVQ AX, 80(SP)
+ XORQ AX, SI
+ XORQ R9, R13
+ ROLQ $0x08, R13
+ MOVQ R12, AX
+ ANDQ R13, AX
+ XORQ R11, AX
+ MOVQ AX, 88(SP)
+ XORQ AX, BP
+ XORQ BX, R14
+ ROLQ $0x12, R14
+ NOTQ R13
+ MOVQ R13, AX
+ ANDQ R14, AX
+ XORQ R12, AX
+ MOVQ AX, 96(SP)
+ MOVQ R14, AX
+ ORQ R10, AX
+ XORQ R13, AX
+ MOVQ AX, 104(SP)
+ ANDQ R11, R10
+ XORQ R14, R10
+ MOVQ R10, 112(SP)
+ XORQ R10, R15
+
+ // Result m
+ MOVQ 40(DI), R11
+ XORQ BX, R11
+ MOVQ 88(DI), R12
+ ROLQ $0x24, R11
+ XORQ CX, R12
+ MOVQ 32(DI), R10
+ ROLQ $0x0a, R12
+ MOVQ R11, AX
+ MOVQ 136(DI), R13
+ ANDQ R12, AX
+ XORQ R9, R10
+ MOVQ 184(DI), R14
+ ROLQ $0x1b, R10
+ XORQ R10, AX
+ MOVQ AX, 120(SP)
+ XORQ AX, SI
+ XORQ DX, R13
+ ROLQ $0x0f, R13
+ MOVQ R12, AX
+ ORQ R13, AX
+ XORQ R11, AX
+ MOVQ AX, 128(SP)
+ XORQ AX, BP
+ XORQ R8, R14
+ ROLQ $0x38, R14
+ NOTQ R13
+ MOVQ R13, AX
+ ORQ R14, AX
+ XORQ R12, AX
+ MOVQ AX, 136(SP)
+ ORQ R10, R11
+ XORQ R14, R11
+ MOVQ R11, 152(SP)
+ ANDQ R10, R14
+ XORQ R13, R14
+ MOVQ R14, 144(SP)
+ XORQ R11, R15
+
+ // Result s
+ MOVQ 16(DI), R10
+ MOVQ 64(DI), R11
+ MOVQ 112(DI), R12
+ XORQ DX, R10
+ MOVQ 120(DI), R13
+ ROLQ $0x3e, R10
+ XORQ R8, R11
+ MOVQ 168(DI), R14
+ ROLQ $0x37, R11
+ XORQ R9, R12
+ MOVQ R10, R9
+ XORQ CX, R14
+ ROLQ $0x02, R14
+ ANDQ R11, R9
+ XORQ R14, R9
+ MOVQ R9, 192(SP)
+ ROLQ $0x27, R12
+ XORQ R9, R15
+ NOTQ R11
+ XORQ BX, R13
+ MOVQ R11, BX
+ ANDQ R12, BX
+ XORQ R10, BX
+ MOVQ BX, 160(SP)
+ XORQ BX, SI
+ ROLQ $0x29, R13
+ MOVQ R12, CX
+ ORQ R13, CX
+ XORQ R11, CX
+ MOVQ CX, 168(SP)
+ XORQ CX, BP
+ MOVQ R13, DX
+ MOVQ R14, R8
+ ANDQ R14, DX
+ ORQ R10, R8
+ XORQ R12, DX
+ XORQ R13, R8
+ MOVQ DX, 176(SP)
+ MOVQ R8, 184(SP)
+
+ // Prepare round
+ MOVQ BP, BX
+ ROLQ $0x01, BX
+ MOVQ 16(SP), R12
+ XORQ 56(SP), DX
+ XORQ R15, BX
+ XORQ 96(SP), R12
+ XORQ 136(SP), DX
+ XORQ DX, R12
+ MOVQ R12, CX
+ ROLQ $0x01, CX
+ MOVQ 24(SP), R13
+ XORQ 64(SP), R8
+ XORQ SI, CX
+ XORQ 104(SP), R13
+ XORQ 144(SP), R8
+ XORQ R8, R13
+ MOVQ R13, DX
+ ROLQ $0x01, DX
+ MOVQ R15, R8
+ XORQ BP, DX
+ ROLQ $0x01, R8
+ MOVQ SI, R9
+ XORQ R12, R8
+ ROLQ $0x01, R9
+
+ // Result b
+ MOVQ (SP), R10
+ MOVQ 48(SP), R11
+ XORQ R13, R9
+ MOVQ 96(SP), R12
+ MOVQ 144(SP), R13
+ MOVQ 192(SP), R14
+ XORQ CX, R11
+ ROLQ $0x2c, R11
+ XORQ DX, R12
+ XORQ BX, R10
+ ROLQ $0x2b, R12
+ MOVQ R11, SI
+ MOVQ $0x0000000000008082, AX
+ ORQ R12, SI
+ XORQ R10, AX
+ XORQ AX, SI
+ MOVQ SI, (DI)
+ XORQ R9, R14
+ ROLQ $0x0e, R14
+ MOVQ R10, R15
+ ANDQ R11, R15
+ XORQ R14, R15
+ MOVQ R15, 32(DI)
+ XORQ R8, R13
+ ROLQ $0x15, R13
+ MOVQ R13, AX
+ ANDQ R14, AX
+ XORQ R12, AX
+ MOVQ AX, 16(DI)
+ NOTQ R12
+ ORQ R10, R14
+ ORQ R13, R12
+ XORQ R13, R14
+ XORQ R11, R12
+ MOVQ R14, 24(DI)
+ MOVQ R12, 8(DI)
+ MOVQ R12, BP
+
+ // Result g
+ MOVQ 72(SP), R11
+ XORQ R9, R11
+ MOVQ 80(SP), R12
+ ROLQ $0x14, R11
+ XORQ BX, R12
+ ROLQ $0x03, R12
+ MOVQ 24(SP), R10
+ MOVQ R11, AX
+ ORQ R12, AX
+ XORQ R8, R10
+ MOVQ 128(SP), R13
+ MOVQ 176(SP), R14
+ ROLQ $0x1c, R10
+ XORQ R10, AX
+ MOVQ AX, 40(DI)
+ XORQ AX, SI
+ XORQ CX, R13
+ ROLQ $0x2d, R13
+ MOVQ R12, AX
+ ANDQ R13, AX
+ XORQ R11, AX
+ MOVQ AX, 48(DI)
+ XORQ AX, BP
+ XORQ DX, R14
+ ROLQ $0x3d, R14
+ MOVQ R14, AX
+ ORQ R10, AX
+ XORQ R13, AX
+ MOVQ AX, 64(DI)
+ ANDQ R11, R10
+ XORQ R14, R10
+ MOVQ R10, 72(DI)
+ NOTQ R14
+ XORQ R10, R15
+ ORQ R14, R13
+ XORQ R12, R13
+ MOVQ R13, 56(DI)
+
+ // Result k
+ MOVQ 8(SP), R10
+ MOVQ 56(SP), R11
+ MOVQ 104(SP), R12
+ MOVQ 152(SP), R13
+ MOVQ 160(SP), R14
+ XORQ DX, R11
+ ROLQ $0x06, R11
+ XORQ R8, R12
+ ROLQ $0x19, R12
+ MOVQ R11, AX
+ ORQ R12, AX
+ XORQ CX, R10
+ ROLQ $0x01, R10
+ XORQ R10, AX
+ MOVQ AX, 80(DI)
+ XORQ AX, SI
+ XORQ R9, R13
+ ROLQ $0x08, R13
+ MOVQ R12, AX
+ ANDQ R13, AX
+ XORQ R11, AX
+ MOVQ AX, 88(DI)
+ XORQ AX, BP
+ XORQ BX, R14
+ ROLQ $0x12, R14
+ NOTQ R13
+ MOVQ R13, AX
+ ANDQ R14, AX
+ XORQ R12, AX
+ MOVQ AX, 96(DI)
+ MOVQ R14, AX
+ ORQ R10, AX
+ XORQ R13, AX
+ MOVQ AX, 104(DI)
+ ANDQ R11, R10
+ XORQ R14, R10
+ MOVQ R10, 112(DI)
+ XORQ R10, R15
+
+ // Result m
+ MOVQ 40(SP), R11
+ XORQ BX, R11
+ MOVQ 88(SP), R12
+ ROLQ $0x24, R11
+ XORQ CX, R12
+ MOVQ 32(SP), R10
+ ROLQ $0x0a, R12
+ MOVQ R11, AX
+ MOVQ 136(SP), R13
+ ANDQ R12, AX
+ XORQ R9, R10
+ MOVQ 184(SP), R14
+ ROLQ $0x1b, R10
+ XORQ R10, AX
+ MOVQ AX, 120(DI)
+ XORQ AX, SI
+ XORQ DX, R13
+ ROLQ $0x0f, R13
+ MOVQ R12, AX
+ ORQ R13, AX
+ XORQ R11, AX
+ MOVQ AX, 128(DI)
+ XORQ AX, BP
+ XORQ R8, R14
+ ROLQ $0x38, R14
+ NOTQ R13
+ MOVQ R13, AX
+ ORQ R14, AX
+ XORQ R12, AX
+ MOVQ AX, 136(DI)
+ ORQ R10, R11
+ XORQ R14, R11
+ MOVQ R11, 152(DI)
+ ANDQ R10, R14
+ XORQ R13, R14
+ MOVQ R14, 144(DI)
+ XORQ R11, R15
+
+ // Result s
+ MOVQ 16(SP), R10
+ MOVQ 64(SP), R11
+ MOVQ 112(SP), R12
+ XORQ DX, R10
+ MOVQ 120(SP), R13
+ ROLQ $0x3e, R10
+ XORQ R8, R11
+ MOVQ 168(SP), R14
+ ROLQ $0x37, R11
+ XORQ R9, R12
+ MOVQ R10, R9
+ XORQ CX, R14
+ ROLQ $0x02, R14
+ ANDQ R11, R9
+ XORQ R14, R9
+ MOVQ R9, 192(DI)
+ ROLQ $0x27, R12
+ XORQ R9, R15
+ NOTQ R11
+ XORQ BX, R13
+ MOVQ R11, BX
+ ANDQ R12, BX
+ XORQ R10, BX
+ MOVQ BX, 160(DI)
+ XORQ BX, SI
+ ROLQ $0x29, R13
+ MOVQ R12, CX
+ ORQ R13, CX
+ XORQ R11, CX
+ MOVQ CX, 168(DI)
+ XORQ CX, BP
+ MOVQ R13, DX
+ MOVQ R14, R8
+ ANDQ R14, DX
+ ORQ R10, R8
+ XORQ R12, DX
+ XORQ R13, R8
+ MOVQ DX, 176(DI)
+ MOVQ R8, 184(DI)
+
+ // Prepare round
+ MOVQ BP, BX
+ ROLQ $0x01, BX
+ MOVQ 16(DI), R12
+ XORQ 56(DI), DX
+ XORQ R15, BX
+ XORQ 96(DI), R12
+ XORQ 136(DI), DX
+ XORQ DX, R12
+ MOVQ R12, CX
+ ROLQ $0x01, CX
+ MOVQ 24(DI), R13
+ XORQ 64(DI), R8
+ XORQ SI, CX
+ XORQ 104(DI), R13
+ XORQ 144(DI), R8
+ XORQ R8, R13
+ MOVQ R13, DX
+ ROLQ $0x01, DX
+ MOVQ R15, R8
+ XORQ BP, DX
+ ROLQ $0x01, R8
+ MOVQ SI, R9
+ XORQ R12, R8
+ ROLQ $0x01, R9
+
+ // Result b
+ MOVQ (DI), R10
+ MOVQ 48(DI), R11
+ XORQ R13, R9
+ MOVQ 96(DI), R12
+ MOVQ 144(DI), R13
+ MOVQ 192(DI), R14
+ XORQ CX, R11
+ ROLQ $0x2c, R11
+ XORQ DX, R12
+ XORQ BX, R10
+ ROLQ $0x2b, R12
+ MOVQ R11, SI
+ MOVQ $0x800000000000808a, AX
+ ORQ R12, SI
+ XORQ R10, AX
+ XORQ AX, SI
+ MOVQ SI, (SP)
+ XORQ R9, R14
+ ROLQ $0x0e, R14
+ MOVQ R10, R15
+ ANDQ R11, R15
+ XORQ R14, R15
+ MOVQ R15, 32(SP)
+ XORQ R8, R13
+ ROLQ $0x15, R13
+ MOVQ R13, AX
+ ANDQ R14, AX
+ XORQ R12, AX
+ MOVQ AX, 16(SP)
+ NOTQ R12
+ ORQ R10, R14
+ ORQ R13, R12
+ XORQ R13, R14
+ XORQ R11, R12
+ MOVQ R14, 24(SP)
+ MOVQ R12, 8(SP)
+ MOVQ R12, BP
+
+ // Result g
+ MOVQ 72(DI), R11
+ XORQ R9, R11
+ MOVQ 80(DI), R12
+ ROLQ $0x14, R11
+ XORQ BX, R12
+ ROLQ $0x03, R12
+ MOVQ 24(DI), R10
+ MOVQ R11, AX
+ ORQ R12, AX
+ XORQ R8, R10
+ MOVQ 128(DI), R13
+ MOVQ 176(DI), R14
+ ROLQ $0x1c, R10
+ XORQ R10, AX
+ MOVQ AX, 40(SP)
+ XORQ AX, SI
+ XORQ CX, R13
+ ROLQ $0x2d, R13
+ MOVQ R12, AX
+ ANDQ R13, AX
+ XORQ R11, AX
+ MOVQ AX, 48(SP)
+ XORQ AX, BP
+ XORQ DX, R14
+ ROLQ $0x3d, R14
+ MOVQ R14, AX
+ ORQ R10, AX
+ XORQ R13, AX
+ MOVQ AX, 64(SP)
+ ANDQ R11, R10
+ XORQ R14, R10
+ MOVQ R10, 72(SP)
+ NOTQ R14
+ XORQ R10, R15
+ ORQ R14, R13
+ XORQ R12, R13
+ MOVQ R13, 56(SP)
+
+ // Result k
+ MOVQ 8(DI), R10
+ MOVQ 56(DI), R11
+ MOVQ 104(DI), R12
+ MOVQ 152(DI), R13
+ MOVQ 160(DI), R14
+ XORQ DX, R11
+ ROLQ $0x06, R11
+ XORQ R8, R12
+ ROLQ $0x19, R12
+ MOVQ R11, AX
+ ORQ R12, AX
+ XORQ CX, R10
+ ROLQ $0x01, R10
+ XORQ R10, AX
+ MOVQ AX, 80(SP)
+ XORQ AX, SI
+ XORQ R9, R13
+ ROLQ $0x08, R13
+ MOVQ R12, AX
+ ANDQ R13, AX
+ XORQ R11, AX
+ MOVQ AX, 88(SP)
+ XORQ AX, BP
+ XORQ BX, R14
+ ROLQ $0x12, R14
+ NOTQ R13
+ MOVQ R13, AX
+ ANDQ R14, AX
+ XORQ R12, AX
+ MOVQ AX, 96(SP)
+ MOVQ R14, AX
+ ORQ R10, AX
+ XORQ R13, AX
+ MOVQ AX, 104(SP)
+ ANDQ R11, R10
+ XORQ R14, R10
+ MOVQ R10, 112(SP)
+ XORQ R10, R15
+
+ // Result m
+ MOVQ 40(DI), R11
+ XORQ BX, R11
+ MOVQ 88(DI), R12
+ ROLQ $0x24, R11
+ XORQ CX, R12
+ MOVQ 32(DI), R10
+ ROLQ $0x0a, R12
+ MOVQ R11, AX
+ MOVQ 136(DI), R13
+ ANDQ R12, AX
+ XORQ R9, R10
+ MOVQ 184(DI), R14
+ ROLQ $0x1b, R10
+ XORQ R10, AX
+ MOVQ AX, 120(SP)
+ XORQ AX, SI
+ XORQ DX, R13
+ ROLQ $0x0f, R13
+ MOVQ R12, AX
+ ORQ R13, AX
+ XORQ R11, AX
+ MOVQ AX, 128(SP)
+ XORQ AX, BP
+ XORQ R8, R14
+ ROLQ $0x38, R14
+ NOTQ R13
+ MOVQ R13, AX
+ ORQ R14, AX
+ XORQ R12, AX
+ MOVQ AX, 136(SP)
+ ORQ R10, R11
+ XORQ R14, R11
+ MOVQ R11, 152(SP)
+ ANDQ R10, R14
+ XORQ R13, R14
+ MOVQ R14, 144(SP)
+ XORQ R11, R15
+
+ // Result s
+ MOVQ 16(DI), R10
+ MOVQ 64(DI), R11
+ MOVQ 112(DI), R12
+ XORQ DX, R10
+ MOVQ 120(DI), R13
+ ROLQ $0x3e, R10
+ XORQ R8, R11
+ MOVQ 168(DI), R14
+ ROLQ $0x37, R11
+ XORQ R9, R12
+ MOVQ R10, R9
+ XORQ CX, R14
+ ROLQ $0x02, R14
+ ANDQ R11, R9
+ XORQ R14, R9
+ MOVQ R9, 192(SP)
+ ROLQ $0x27, R12
+ XORQ R9, R15
+ NOTQ R11
+ XORQ BX, R13
+ MOVQ R11, BX
+ ANDQ R12, BX
+ XORQ R10, BX
+ MOVQ BX, 160(SP)
+ XORQ BX, SI
+ ROLQ $0x29, R13
+ MOVQ R12, CX
+ ORQ R13, CX
+ XORQ R11, CX
+ MOVQ CX, 168(SP)
+ XORQ CX, BP
+ MOVQ R13, DX
+ MOVQ R14, R8
+ ANDQ R14, DX
+ ORQ R10, R8
+ XORQ R12, DX
+ XORQ R13, R8
+ MOVQ DX, 176(SP)
+ MOVQ R8, 184(SP)
+
+ // Prepare round
+ MOVQ BP, BX
+ ROLQ $0x01, BX
+ MOVQ 16(SP), R12
+ XORQ 56(SP), DX
+ XORQ R15, BX
+ XORQ 96(SP), R12
+ XORQ 136(SP), DX
+ XORQ DX, R12
+ MOVQ R12, CX
+ ROLQ $0x01, CX
+ MOVQ 24(SP), R13
+ XORQ 64(SP), R8
+ XORQ SI, CX
+ XORQ 104(SP), R13
+ XORQ 144(SP), R8
+ XORQ R8, R13
+ MOVQ R13, DX
+ ROLQ $0x01, DX
+ MOVQ R15, R8
+ XORQ BP, DX
+ ROLQ $0x01, R8
+ MOVQ SI, R9
+ XORQ R12, R8
+ ROLQ $0x01, R9
+
+ // Result b
+ MOVQ (SP), R10
+ MOVQ 48(SP), R11
+ XORQ R13, R9
+ MOVQ 96(SP), R12
+ MOVQ 144(SP), R13
+ MOVQ 192(SP), R14
+ XORQ CX, R11
+ ROLQ $0x2c, R11
+ XORQ DX, R12
+ XORQ BX, R10
+ ROLQ $0x2b, R12
+ MOVQ R11, SI
+ MOVQ $0x8000000080008000, AX
+ ORQ R12, SI
+ XORQ R10, AX
+ XORQ AX, SI
+ MOVQ SI, (DI)
+ XORQ R9, R14
+ ROLQ $0x0e, R14
+ MOVQ R10, R15
+ ANDQ R11, R15
+ XORQ R14, R15
+ MOVQ R15, 32(DI)
+ XORQ R8, R13
+ ROLQ $0x15, R13
+ MOVQ R13, AX
+ ANDQ R14, AX
+ XORQ R12, AX
+ MOVQ AX, 16(DI)
+ NOTQ R12
+ ORQ R10, R14
+ ORQ R13, R12
+ XORQ R13, R14
+ XORQ R11, R12
+ MOVQ R14, 24(DI)
+ MOVQ R12, 8(DI)
+ MOVQ R12, BP
+
+ // Result g
+ MOVQ 72(SP), R11
+ XORQ R9, R11
+ MOVQ 80(SP), R12
+ ROLQ $0x14, R11
+ XORQ BX, R12
+ ROLQ $0x03, R12
+ MOVQ 24(SP), R10
+ MOVQ R11, AX
+ ORQ R12, AX
+ XORQ R8, R10
+ MOVQ 128(SP), R13
+ MOVQ 176(SP), R14
+ ROLQ $0x1c, R10
+ XORQ R10, AX
+ MOVQ AX, 40(DI)
+ XORQ AX, SI
+ XORQ CX, R13
+ ROLQ $0x2d, R13
+ MOVQ R12, AX
+ ANDQ R13, AX
+ XORQ R11, AX
+ MOVQ AX, 48(DI)
+ XORQ AX, BP
+ XORQ DX, R14
+ ROLQ $0x3d, R14
+ MOVQ R14, AX
+ ORQ R10, AX
+ XORQ R13, AX
+ MOVQ AX, 64(DI)
+ ANDQ R11, R10
+ XORQ R14, R10
+ MOVQ R10, 72(DI)
+ NOTQ R14
+ XORQ R10, R15
+ ORQ R14, R13
+ XORQ R12, R13
+ MOVQ R13, 56(DI)
+
+ // Result k
+ MOVQ 8(SP), R10
+ MOVQ 56(SP), R11
+ MOVQ 104(SP), R12
+ MOVQ 152(SP), R13
+ MOVQ 160(SP), R14
+ XORQ DX, R11
+ ROLQ $0x06, R11
+ XORQ R8, R12
+ ROLQ $0x19, R12
+ MOVQ R11, AX
+ ORQ R12, AX
+ XORQ CX, R10
+ ROLQ $0x01, R10
+ XORQ R10, AX
+ MOVQ AX, 80(DI)
+ XORQ AX, SI
+ XORQ R9, R13
+ ROLQ $0x08, R13
+ MOVQ R12, AX
+ ANDQ R13, AX
+ XORQ R11, AX
+ MOVQ AX, 88(DI)
+ XORQ AX, BP
+ XORQ BX, R14
+ ROLQ $0x12, R14
+ NOTQ R13
+ MOVQ R13, AX
+ ANDQ R14, AX
+ XORQ R12, AX
+ MOVQ AX, 96(DI)
+ MOVQ R14, AX
+ ORQ R10, AX
+ XORQ R13, AX
+ MOVQ AX, 104(DI)
+ ANDQ R11, R10
+ XORQ R14, R10
+ MOVQ R10, 112(DI)
+ XORQ R10, R15
+
+ // Result m
+ MOVQ 40(SP), R11
+ XORQ BX, R11
+ MOVQ 88(SP), R12
+ ROLQ $0x24, R11
+ XORQ CX, R12
+ MOVQ 32(SP), R10
+ ROLQ $0x0a, R12
+ MOVQ R11, AX
+ MOVQ 136(SP), R13
+ ANDQ R12, AX
+ XORQ R9, R10
+ MOVQ 184(SP), R14
+ ROLQ $0x1b, R10
+ XORQ R10, AX
+ MOVQ AX, 120(DI)
+ XORQ AX, SI
+ XORQ DX, R13
+ ROLQ $0x0f, R13
+ MOVQ R12, AX
+ ORQ R13, AX
+ XORQ R11, AX
+ MOVQ AX, 128(DI)
+ XORQ AX, BP
+ XORQ R8, R14
+ ROLQ $0x38, R14
+ NOTQ R13
+ MOVQ R13, AX
+ ORQ R14, AX
+ XORQ R12, AX
+ MOVQ AX, 136(DI)
+ ORQ R10, R11
+ XORQ R14, R11
+ MOVQ R11, 152(DI)
+ ANDQ R10, R14
+ XORQ R13, R14
+ MOVQ R14, 144(DI)
+ XORQ R11, R15
+
+ // Result s
+ MOVQ 16(SP), R10
+ MOVQ 64(SP), R11
+ MOVQ 112(SP), R12
+ XORQ DX, R10
+ MOVQ 120(SP), R13
+ ROLQ $0x3e, R10
+ XORQ R8, R11
+ MOVQ 168(SP), R14
+ ROLQ $0x37, R11
+ XORQ R9, R12
+ MOVQ R10, R9
+ XORQ CX, R14
+ ROLQ $0x02, R14
+ ANDQ R11, R9
+ XORQ R14, R9
+ MOVQ R9, 192(DI)
+ ROLQ $0x27, R12
+ XORQ R9, R15
+ NOTQ R11
+ XORQ BX, R13
+ MOVQ R11, BX
+ ANDQ R12, BX
+ XORQ R10, BX
+ MOVQ BX, 160(DI)
+ XORQ BX, SI
+ ROLQ $0x29, R13
+ MOVQ R12, CX
+ ORQ R13, CX
+ XORQ R11, CX
+ MOVQ CX, 168(DI)
+ XORQ CX, BP
+ MOVQ R13, DX
+ MOVQ R14, R8
+ ANDQ R14, DX
+ ORQ R10, R8
+ XORQ R12, DX
+ XORQ R13, R8
+ MOVQ DX, 176(DI)
+ MOVQ R8, 184(DI)
+
+ // Prepare round
+ MOVQ BP, BX
+ ROLQ $0x01, BX
+ MOVQ 16(DI), R12
+ XORQ 56(DI), DX
+ XORQ R15, BX
+ XORQ 96(DI), R12
+ XORQ 136(DI), DX
+ XORQ DX, R12
+ MOVQ R12, CX
+ ROLQ $0x01, CX
+ MOVQ 24(DI), R13
+ XORQ 64(DI), R8
+ XORQ SI, CX
+ XORQ 104(DI), R13
+ XORQ 144(DI), R8
+ XORQ R8, R13
+ MOVQ R13, DX
+ ROLQ $0x01, DX
+ MOVQ R15, R8
+ XORQ BP, DX
+ ROLQ $0x01, R8
+ MOVQ SI, R9
+ XORQ R12, R8
+ ROLQ $0x01, R9
+
+ // Result b
+ MOVQ (DI), R10
+ MOVQ 48(DI), R11
+ XORQ R13, R9
+ MOVQ 96(DI), R12
+ MOVQ 144(DI), R13
+ MOVQ 192(DI), R14
+ XORQ CX, R11
+ ROLQ $0x2c, R11
+ XORQ DX, R12
+ XORQ BX, R10
+ ROLQ $0x2b, R12
+ MOVQ R11, SI
+ MOVQ $0x000000000000808b, AX
+ ORQ R12, SI
+ XORQ R10, AX
+ XORQ AX, SI
+ MOVQ SI, (SP)
+ XORQ R9, R14
+ ROLQ $0x0e, R14
+ MOVQ R10, R15
+ ANDQ R11, R15
+ XORQ R14, R15
+ MOVQ R15, 32(SP)
+ XORQ R8, R13
+ ROLQ $0x15, R13
+ MOVQ R13, AX
+ ANDQ R14, AX
+ XORQ R12, AX
+ MOVQ AX, 16(SP)
+ NOTQ R12
+ ORQ R10, R14
+ ORQ R13, R12
+ XORQ R13, R14
+ XORQ R11, R12
+ MOVQ R14, 24(SP)
+ MOVQ R12, 8(SP)
+ MOVQ R12, BP
+
+ // Result g
+ MOVQ 72(DI), R11
+ XORQ R9, R11
+ MOVQ 80(DI), R12
+ ROLQ $0x14, R11
+ XORQ BX, R12
+ ROLQ $0x03, R12
+ MOVQ 24(DI), R10
+ MOVQ R11, AX
+ ORQ R12, AX
+ XORQ R8, R10
+ MOVQ 128(DI), R13
+ MOVQ 176(DI), R14
+ ROLQ $0x1c, R10
+ XORQ R10, AX
+ MOVQ AX, 40(SP)
+ XORQ AX, SI
+ XORQ CX, R13
+ ROLQ $0x2d, R13
+ MOVQ R12, AX
+ ANDQ R13, AX
+ XORQ R11, AX
+ MOVQ AX, 48(SP)
+ XORQ AX, BP
+ XORQ DX, R14
+ ROLQ $0x3d, R14
+ MOVQ R14, AX
+ ORQ R10, AX
+ XORQ R13, AX
+ MOVQ AX, 64(SP)
+ ANDQ R11, R10
+ XORQ R14, R10
+ MOVQ R10, 72(SP)
+ NOTQ R14
+ XORQ R10, R15
+ ORQ R14, R13
+ XORQ R12, R13
+ MOVQ R13, 56(SP)
+
+ // Result k
+ MOVQ 8(DI), R10
+ MOVQ 56(DI), R11
+ MOVQ 104(DI), R12
+ MOVQ 152(DI), R13
+ MOVQ 160(DI), R14
+ XORQ DX, R11
+ ROLQ $0x06, R11
+ XORQ R8, R12
+ ROLQ $0x19, R12
+ MOVQ R11, AX
+ ORQ R12, AX
+ XORQ CX, R10
+ ROLQ $0x01, R10
+ XORQ R10, AX
+ MOVQ AX, 80(SP)
+ XORQ AX, SI
+ XORQ R9, R13
+ ROLQ $0x08, R13
+ MOVQ R12, AX
+ ANDQ R13, AX
+ XORQ R11, AX
+ MOVQ AX, 88(SP)
+ XORQ AX, BP
+ XORQ BX, R14
+ ROLQ $0x12, R14
+ NOTQ R13
+ MOVQ R13, AX
+ ANDQ R14, AX
+ XORQ R12, AX
+ MOVQ AX, 96(SP)
+ MOVQ R14, AX
+ ORQ R10, AX
+ XORQ R13, AX
+ MOVQ AX, 104(SP)
+ ANDQ R11, R10
+ XORQ R14, R10
+ MOVQ R10, 112(SP)
+ XORQ R10, R15
+
+ // Result m
+ MOVQ 40(DI), R11
+ XORQ BX, R11
+ MOVQ 88(DI), R12
+ ROLQ $0x24, R11
+ XORQ CX, R12
+ MOVQ 32(DI), R10
+ ROLQ $0x0a, R12
+ MOVQ R11, AX
+ MOVQ 136(DI), R13
+ ANDQ R12, AX
+ XORQ R9, R10
+ MOVQ 184(DI), R14
+ ROLQ $0x1b, R10
+ XORQ R10, AX
+ MOVQ AX, 120(SP)
+ XORQ AX, SI
+ XORQ DX, R13
+ ROLQ $0x0f, R13
+ MOVQ R12, AX
+ ORQ R13, AX
+ XORQ R11, AX
+ MOVQ AX, 128(SP)
+ XORQ AX, BP
+ XORQ R8, R14
+ ROLQ $0x38, R14
+ NOTQ R13
+ MOVQ R13, AX
+ ORQ R14, AX
+ XORQ R12, AX
+ MOVQ AX, 136(SP)
+ ORQ R10, R11
+ XORQ R14, R11
+ MOVQ R11, 152(SP)
+ ANDQ R10, R14
+ XORQ R13, R14
+ MOVQ R14, 144(SP)
+ XORQ R11, R15
+
+ // Result s
+ MOVQ 16(DI), R10
+ MOVQ 64(DI), R11
+ MOVQ 112(DI), R12
+ XORQ DX, R10
+ MOVQ 120(DI), R13
+ ROLQ $0x3e, R10
+ XORQ R8, R11
+ MOVQ 168(DI), R14
+ ROLQ $0x37, R11
+ XORQ R9, R12
+ MOVQ R10, R9
+ XORQ CX, R14
+ ROLQ $0x02, R14
+ ANDQ R11, R9
+ XORQ R14, R9
+ MOVQ R9, 192(SP)
+ ROLQ $0x27, R12
+ XORQ R9, R15
+ NOTQ R11
+ XORQ BX, R13
+ MOVQ R11, BX
+ ANDQ R12, BX
+ XORQ R10, BX
+ MOVQ BX, 160(SP)
+ XORQ BX, SI
+ ROLQ $0x29, R13
+ MOVQ R12, CX
+ ORQ R13, CX
+ XORQ R11, CX
+ MOVQ CX, 168(SP)
+ XORQ CX, BP
+ MOVQ R13, DX
+ MOVQ R14, R8
+ ANDQ R14, DX
+ ORQ R10, R8
+ XORQ R12, DX
+ XORQ R13, R8
+ MOVQ DX, 176(SP)
+ MOVQ R8, 184(SP)
+
+ // Prepare round
+ MOVQ BP, BX
+ ROLQ $0x01, BX
+ MOVQ 16(SP), R12
+ XORQ 56(SP), DX
+ XORQ R15, BX
+ XORQ 96(SP), R12
+ XORQ 136(SP), DX
+ XORQ DX, R12
+ MOVQ R12, CX
+ ROLQ $0x01, CX
+ MOVQ 24(SP), R13
+ XORQ 64(SP), R8
+ XORQ SI, CX
+ XORQ 104(SP), R13
+ XORQ 144(SP), R8
+ XORQ R8, R13
+ MOVQ R13, DX
+ ROLQ $0x01, DX
+ MOVQ R15, R8
+ XORQ BP, DX
+ ROLQ $0x01, R8
+ MOVQ SI, R9
+ XORQ R12, R8
+ ROLQ $0x01, R9
+
+ // Result b
+ MOVQ (SP), R10
+ MOVQ 48(SP), R11
+ XORQ R13, R9
+ MOVQ 96(SP), R12
+ MOVQ 144(SP), R13
+ MOVQ 192(SP), R14
+ XORQ CX, R11
+ ROLQ $0x2c, R11
+ XORQ DX, R12
+ XORQ BX, R10
+ ROLQ $0x2b, R12
+ MOVQ R11, SI
+ MOVQ $0x0000000080000001, AX
+ ORQ R12, SI
+ XORQ R10, AX
+ XORQ AX, SI
+ MOVQ SI, (DI)
+ XORQ R9, R14
+ ROLQ $0x0e, R14
+ MOVQ R10, R15
+ ANDQ R11, R15
+ XORQ R14, R15
+ MOVQ R15, 32(DI)
+ XORQ R8, R13
+ ROLQ $0x15, R13
+ MOVQ R13, AX
+ ANDQ R14, AX
+ XORQ R12, AX
+ MOVQ AX, 16(DI)
+ NOTQ R12
+ ORQ R10, R14
+ ORQ R13, R12
+ XORQ R13, R14
+ XORQ R11, R12
+ MOVQ R14, 24(DI)
+ MOVQ R12, 8(DI)
+ MOVQ R12, BP
+
+ // Result g
+ MOVQ 72(SP), R11
+ XORQ R9, R11
+ MOVQ 80(SP), R12
+ ROLQ $0x14, R11
+ XORQ BX, R12
+ ROLQ $0x03, R12
+ MOVQ 24(SP), R10
+ MOVQ R11, AX
+ ORQ R12, AX
+ XORQ R8, R10
+ MOVQ 128(SP), R13
+ MOVQ 176(SP), R14
+ ROLQ $0x1c, R10
+ XORQ R10, AX
+ MOVQ AX, 40(DI)
+ XORQ AX, SI
+ XORQ CX, R13
+ ROLQ $0x2d, R13
+ MOVQ R12, AX
+ ANDQ R13, AX
+ XORQ R11, AX
+ MOVQ AX, 48(DI)
+ XORQ AX, BP
+ XORQ DX, R14
+ ROLQ $0x3d, R14
+ MOVQ R14, AX
+ ORQ R10, AX
+ XORQ R13, AX
+ MOVQ AX, 64(DI)
+ ANDQ R11, R10
+ XORQ R14, R10
+ MOVQ R10, 72(DI)
+ NOTQ R14
+ XORQ R10, R15
+ ORQ R14, R13
+ XORQ R12, R13
+ MOVQ R13, 56(DI)
+
+ // Result k
+ MOVQ 8(SP), R10
+ MOVQ 56(SP), R11
+ MOVQ 104(SP), R12
+ MOVQ 152(SP), R13
+ MOVQ 160(SP), R14
+ XORQ DX, R11
+ ROLQ $0x06, R11
+ XORQ R8, R12
+ ROLQ $0x19, R12
+ MOVQ R11, AX
+ ORQ R12, AX
+ XORQ CX, R10
+ ROLQ $0x01, R10
+ XORQ R10, AX
+ MOVQ AX, 80(DI)
+ XORQ AX, SI
+ XORQ R9, R13
+ ROLQ $0x08, R13
+ MOVQ R12, AX
+ ANDQ R13, AX
+ XORQ R11, AX
+ MOVQ AX, 88(DI)
+ XORQ AX, BP
+ XORQ BX, R14
+ ROLQ $0x12, R14
+ NOTQ R13
+ MOVQ R13, AX
+ ANDQ R14, AX
+ XORQ R12, AX
+ MOVQ AX, 96(DI)
+ MOVQ R14, AX
+ ORQ R10, AX
+ XORQ R13, AX
+ MOVQ AX, 104(DI)
+ ANDQ R11, R10
+ XORQ R14, R10
+ MOVQ R10, 112(DI)
+ XORQ R10, R15
+
+ // Result m
+ MOVQ 40(SP), R11
+ XORQ BX, R11
+ MOVQ 88(SP), R12
+ ROLQ $0x24, R11
+ XORQ CX, R12
+ MOVQ 32(SP), R10
+ ROLQ $0x0a, R12
+ MOVQ R11, AX
+ MOVQ 136(SP), R13
+ ANDQ R12, AX
+ XORQ R9, R10
+ MOVQ 184(SP), R14
+ ROLQ $0x1b, R10
+ XORQ R10, AX
+ MOVQ AX, 120(DI)
+ XORQ AX, SI
+ XORQ DX, R13
+ ROLQ $0x0f, R13
+ MOVQ R12, AX
+ ORQ R13, AX
+ XORQ R11, AX
+ MOVQ AX, 128(DI)
+ XORQ AX, BP
+ XORQ R8, R14
+ ROLQ $0x38, R14
+ NOTQ R13
+ MOVQ R13, AX
+ ORQ R14, AX
+ XORQ R12, AX
+ MOVQ AX, 136(DI)
+ ORQ R10, R11
+ XORQ R14, R11
+ MOVQ R11, 152(DI)
+ ANDQ R10, R14
+ XORQ R13, R14
+ MOVQ R14, 144(DI)
+ XORQ R11, R15
+
+ // Result s
+ MOVQ 16(SP), R10
+ MOVQ 64(SP), R11
+ MOVQ 112(SP), R12
+ XORQ DX, R10
+ MOVQ 120(SP), R13
+ ROLQ $0x3e, R10
+ XORQ R8, R11
+ MOVQ 168(SP), R14
+ ROLQ $0x37, R11
+ XORQ R9, R12
+ MOVQ R10, R9
+ XORQ CX, R14
+ ROLQ $0x02, R14
+ ANDQ R11, R9
+ XORQ R14, R9
+ MOVQ R9, 192(DI)
+ ROLQ $0x27, R12
+ XORQ R9, R15
+ NOTQ R11
+ XORQ BX, R13
+ MOVQ R11, BX
+ ANDQ R12, BX
+ XORQ R10, BX
+ MOVQ BX, 160(DI)
+ XORQ BX, SI
+ ROLQ $0x29, R13
+ MOVQ R12, CX
+ ORQ R13, CX
+ XORQ R11, CX
+ MOVQ CX, 168(DI)
+ XORQ CX, BP
+ MOVQ R13, DX
+ MOVQ R14, R8
+ ANDQ R14, DX
+ ORQ R10, R8
+ XORQ R12, DX
+ XORQ R13, R8
+ MOVQ DX, 176(DI)
+ MOVQ R8, 184(DI)
+
+ // Prepare round
+ MOVQ BP, BX
+ ROLQ $0x01, BX
+ MOVQ 16(DI), R12
+ XORQ 56(DI), DX
+ XORQ R15, BX
+ XORQ 96(DI), R12
+ XORQ 136(DI), DX
+ XORQ DX, R12
+ MOVQ R12, CX
+ ROLQ $0x01, CX
+ MOVQ 24(DI), R13
+ XORQ 64(DI), R8
+ XORQ SI, CX
+ XORQ 104(DI), R13
+ XORQ 144(DI), R8
+ XORQ R8, R13
+ MOVQ R13, DX
+ ROLQ $0x01, DX
+ MOVQ R15, R8
+ XORQ BP, DX
+ ROLQ $0x01, R8
+ MOVQ SI, R9
+ XORQ R12, R8
+ ROLQ $0x01, R9
+
+ // Result b
+ MOVQ (DI), R10
+ MOVQ 48(DI), R11
+ XORQ R13, R9
+ MOVQ 96(DI), R12
+ MOVQ 144(DI), R13
+ MOVQ 192(DI), R14
+ XORQ CX, R11
+ ROLQ $0x2c, R11
+ XORQ DX, R12
+ XORQ BX, R10
+ ROLQ $0x2b, R12
+ MOVQ R11, SI
+ MOVQ $0x8000000080008081, AX
+ ORQ R12, SI
+ XORQ R10, AX
+ XORQ AX, SI
+ MOVQ SI, (SP)
+ XORQ R9, R14
+ ROLQ $0x0e, R14
+ MOVQ R10, R15
+ ANDQ R11, R15
+ XORQ R14, R15
+ MOVQ R15, 32(SP)
+ XORQ R8, R13
+ ROLQ $0x15, R13
+ MOVQ R13, AX
+ ANDQ R14, AX
+ XORQ R12, AX
+ MOVQ AX, 16(SP)
+ NOTQ R12
+ ORQ R10, R14
+ ORQ R13, R12
+ XORQ R13, R14
+ XORQ R11, R12
+ MOVQ R14, 24(SP)
+ MOVQ R12, 8(SP)
+ MOVQ R12, BP
+
+ // Result g
+ MOVQ 72(DI), R11
+ XORQ R9, R11
+ MOVQ 80(DI), R12
+ ROLQ $0x14, R11
+ XORQ BX, R12
+ ROLQ $0x03, R12
+ MOVQ 24(DI), R10
+ MOVQ R11, AX
+ ORQ R12, AX
+ XORQ R8, R10
+ MOVQ 128(DI), R13
+ MOVQ 176(DI), R14
+ ROLQ $0x1c, R10
+ XORQ R10, AX
+ MOVQ AX, 40(SP)
+ XORQ AX, SI
+ XORQ CX, R13
+ ROLQ $0x2d, R13
+ MOVQ R12, AX
+ ANDQ R13, AX
+ XORQ R11, AX
+ MOVQ AX, 48(SP)
+ XORQ AX, BP
+ XORQ DX, R14
+ ROLQ $0x3d, R14
+ MOVQ R14, AX
+ ORQ R10, AX
+ XORQ R13, AX
+ MOVQ AX, 64(SP)
+ ANDQ R11, R10
+ XORQ R14, R10
+ MOVQ R10, 72(SP)
+ NOTQ R14
+ XORQ R10, R15
+ ORQ R14, R13
+ XORQ R12, R13
+ MOVQ R13, 56(SP)
+
+ // Result k
+ MOVQ 8(DI), R10
+ MOVQ 56(DI), R11
+ MOVQ 104(DI), R12
+ MOVQ 152(DI), R13
+ MOVQ 160(DI), R14
+ XORQ DX, R11
+ ROLQ $0x06, R11
+ XORQ R8, R12
+ ROLQ $0x19, R12
+ MOVQ R11, AX
+ ORQ R12, AX
+ XORQ CX, R10
+ ROLQ $0x01, R10
+ XORQ R10, AX
+ MOVQ AX, 80(SP)
+ XORQ AX, SI
+ XORQ R9, R13
+ ROLQ $0x08, R13
+ MOVQ R12, AX
+ ANDQ R13, AX
+ XORQ R11, AX
+ MOVQ AX, 88(SP)
+ XORQ AX, BP
+ XORQ BX, R14
+ ROLQ $0x12, R14
+ NOTQ R13
+ MOVQ R13, AX
+ ANDQ R14, AX
+ XORQ R12, AX
+ MOVQ AX, 96(SP)
+ MOVQ R14, AX
+ ORQ R10, AX
+ XORQ R13, AX
+ MOVQ AX, 104(SP)
+ ANDQ R11, R10
+ XORQ R14, R10
+ MOVQ R10, 112(SP)
+ XORQ R10, R15
+
+ // Result m
+ MOVQ 40(DI), R11
+ XORQ BX, R11
+ MOVQ 88(DI), R12
+ ROLQ $0x24, R11
+ XORQ CX, R12
+ MOVQ 32(DI), R10
+ ROLQ $0x0a, R12
+ MOVQ R11, AX
+ MOVQ 136(DI), R13
+ ANDQ R12, AX
+ XORQ R9, R10
+ MOVQ 184(DI), R14
+ ROLQ $0x1b, R10
+ XORQ R10, AX
+ MOVQ AX, 120(SP)
+ XORQ AX, SI
+ XORQ DX, R13
+ ROLQ $0x0f, R13
+ MOVQ R12, AX
+ ORQ R13, AX
+ XORQ R11, AX
+ MOVQ AX, 128(SP)
+ XORQ AX, BP
+ XORQ R8, R14
+ ROLQ $0x38, R14
+ NOTQ R13
+ MOVQ R13, AX
+ ORQ R14, AX
+ XORQ R12, AX
+ MOVQ AX, 136(SP)
+ ORQ R10, R11
+ XORQ R14, R11
+ MOVQ R11, 152(SP)
+ ANDQ R10, R14
+ XORQ R13, R14
+ MOVQ R14, 144(SP)
+ XORQ R11, R15
+
+ // Result s
+ MOVQ 16(DI), R10
+ MOVQ 64(DI), R11
+ MOVQ 112(DI), R12
+ XORQ DX, R10
+ MOVQ 120(DI), R13
+ ROLQ $0x3e, R10
+ XORQ R8, R11
+ MOVQ 168(DI), R14
+ ROLQ $0x37, R11
+ XORQ R9, R12
+ MOVQ R10, R9
+ XORQ CX, R14
+ ROLQ $0x02, R14
+ ANDQ R11, R9
+ XORQ R14, R9
+ MOVQ R9, 192(SP)
+ ROLQ $0x27, R12
+ XORQ R9, R15
+ NOTQ R11
+ XORQ BX, R13
+ MOVQ R11, BX
+ ANDQ R12, BX
+ XORQ R10, BX
+ MOVQ BX, 160(SP)
+ XORQ BX, SI
+ ROLQ $0x29, R13
+ MOVQ R12, CX
+ ORQ R13, CX
+ XORQ R11, CX
+ MOVQ CX, 168(SP)
+ XORQ CX, BP
+ MOVQ R13, DX
+ MOVQ R14, R8
+ ANDQ R14, DX
+ ORQ R10, R8
+ XORQ R12, DX
+ XORQ R13, R8
+ MOVQ DX, 176(SP)
+ MOVQ R8, 184(SP)
+
+ // Prepare round
+ MOVQ BP, BX
+ ROLQ $0x01, BX
+ MOVQ 16(SP), R12
+ XORQ 56(SP), DX
+ XORQ R15, BX
+ XORQ 96(SP), R12
+ XORQ 136(SP), DX
+ XORQ DX, R12
+ MOVQ R12, CX
+ ROLQ $0x01, CX
+ MOVQ 24(SP), R13
+ XORQ 64(SP), R8
+ XORQ SI, CX
+ XORQ 104(SP), R13
+ XORQ 144(SP), R8
+ XORQ R8, R13
+ MOVQ R13, DX
+ ROLQ $0x01, DX
+ MOVQ R15, R8
+ XORQ BP, DX
+ ROLQ $0x01, R8
+ MOVQ SI, R9
+ XORQ R12, R8
+ ROLQ $0x01, R9
+
+ // Result b
+ MOVQ (SP), R10
+ MOVQ 48(SP), R11
+ XORQ R13, R9
+ MOVQ 96(SP), R12
+ MOVQ 144(SP), R13
+ MOVQ 192(SP), R14
+ XORQ CX, R11
+ ROLQ $0x2c, R11
+ XORQ DX, R12
+ XORQ BX, R10
+ ROLQ $0x2b, R12
+ MOVQ R11, SI
+ MOVQ $0x8000000000008009, AX
+ ORQ R12, SI
+ XORQ R10, AX
+ XORQ AX, SI
+ MOVQ SI, (DI)
+ XORQ R9, R14
+ ROLQ $0x0e, R14
+ MOVQ R10, R15
+ ANDQ R11, R15
+ XORQ R14, R15
+ MOVQ R15, 32(DI)
+ XORQ R8, R13
+ ROLQ $0x15, R13
+ MOVQ R13, AX
+ ANDQ R14, AX
+ XORQ R12, AX
+ MOVQ AX, 16(DI)
+ NOTQ R12
+ ORQ R10, R14
+ ORQ R13, R12
+ XORQ R13, R14
+ XORQ R11, R12
+ MOVQ R14, 24(DI)
+ MOVQ R12, 8(DI)
+ MOVQ R12, BP
+
+ // Result g
+ MOVQ 72(SP), R11
+ XORQ R9, R11
+ MOVQ 80(SP), R12
+ ROLQ $0x14, R11
+ XORQ BX, R12
+ ROLQ $0x03, R12
+ MOVQ 24(SP), R10
+ MOVQ R11, AX
+ ORQ R12, AX
+ XORQ R8, R10
+ MOVQ 128(SP), R13
+ MOVQ 176(SP), R14
+ ROLQ $0x1c, R10
+ XORQ R10, AX
+ MOVQ AX, 40(DI)
+ XORQ AX, SI
+ XORQ CX, R13
+ ROLQ $0x2d, R13
+ MOVQ R12, AX
+ ANDQ R13, AX
+ XORQ R11, AX
+ MOVQ AX, 48(DI)
+ XORQ AX, BP
+ XORQ DX, R14
+ ROLQ $0x3d, R14
+ MOVQ R14, AX
+ ORQ R10, AX
+ XORQ R13, AX
+ MOVQ AX, 64(DI)
+ ANDQ R11, R10
+ XORQ R14, R10
+ MOVQ R10, 72(DI)
+ NOTQ R14
+ XORQ R10, R15
+ ORQ R14, R13
+ XORQ R12, R13
+ MOVQ R13, 56(DI)
+
+ // Result k
+ MOVQ 8(SP), R10
+ MOVQ 56(SP), R11
+ MOVQ 104(SP), R12
+ MOVQ 152(SP), R13
+ MOVQ 160(SP), R14
+ XORQ DX, R11
+ ROLQ $0x06, R11
+ XORQ R8, R12
+ ROLQ $0x19, R12
+ MOVQ R11, AX
+ ORQ R12, AX
+ XORQ CX, R10
+ ROLQ $0x01, R10
+ XORQ R10, AX
+ MOVQ AX, 80(DI)
+ XORQ AX, SI
+ XORQ R9, R13
+ ROLQ $0x08, R13
+ MOVQ R12, AX
+ ANDQ R13, AX
+ XORQ R11, AX
+ MOVQ AX, 88(DI)
+ XORQ AX, BP
+ XORQ BX, R14
+ ROLQ $0x12, R14
+ NOTQ R13
+ MOVQ R13, AX
+ ANDQ R14, AX
+ XORQ R12, AX
+ MOVQ AX, 96(DI)
+ MOVQ R14, AX
+ ORQ R10, AX
+ XORQ R13, AX
+ MOVQ AX, 104(DI)
+ ANDQ R11, R10
+ XORQ R14, R10
+ MOVQ R10, 112(DI)
+ XORQ R10, R15
+
+ // Result m
+ MOVQ 40(SP), R11
+ XORQ BX, R11
+ MOVQ 88(SP), R12
+ ROLQ $0x24, R11
+ XORQ CX, R12
+ MOVQ 32(SP), R10
+ ROLQ $0x0a, R12
+ MOVQ R11, AX
+ MOVQ 136(SP), R13
+ ANDQ R12, AX
+ XORQ R9, R10
+ MOVQ 184(SP), R14
+ ROLQ $0x1b, R10
+ XORQ R10, AX
+ MOVQ AX, 120(DI)
+ XORQ AX, SI
+ XORQ DX, R13
+ ROLQ $0x0f, R13
+ MOVQ R12, AX
+ ORQ R13, AX
+ XORQ R11, AX
+ MOVQ AX, 128(DI)
+ XORQ AX, BP
+ XORQ R8, R14
+ ROLQ $0x38, R14
+ NOTQ R13
+ MOVQ R13, AX
+ ORQ R14, AX
+ XORQ R12, AX
+ MOVQ AX, 136(DI)
+ ORQ R10, R11
+ XORQ R14, R11
+ MOVQ R11, 152(DI)
+ ANDQ R10, R14
+ XORQ R13, R14
+ MOVQ R14, 144(DI)
+ XORQ R11, R15
+
+ // Result s
+ MOVQ 16(SP), R10
+ MOVQ 64(SP), R11
+ MOVQ 112(SP), R12
+ XORQ DX, R10
+ MOVQ 120(SP), R13
+ ROLQ $0x3e, R10
+ XORQ R8, R11
+ MOVQ 168(SP), R14
+ ROLQ $0x37, R11
+ XORQ R9, R12
+ MOVQ R10, R9
+ XORQ CX, R14
+ ROLQ $0x02, R14
+ ANDQ R11, R9
+ XORQ R14, R9
+ MOVQ R9, 192(DI)
+ ROLQ $0x27, R12
+ XORQ R9, R15
+ NOTQ R11
+ XORQ BX, R13
+ MOVQ R11, BX
+ ANDQ R12, BX
+ XORQ R10, BX
+ MOVQ BX, 160(DI)
+ XORQ BX, SI
+ ROLQ $0x29, R13
+ MOVQ R12, CX
+ ORQ R13, CX
+ XORQ R11, CX
+ MOVQ CX, 168(DI)
+ XORQ CX, BP
+ MOVQ R13, DX
+ MOVQ R14, R8
+ ANDQ R14, DX
+ ORQ R10, R8
+ XORQ R12, DX
+ XORQ R13, R8
+ MOVQ DX, 176(DI)
+ MOVQ R8, 184(DI)
+
+ // Prepare round
+ MOVQ BP, BX
+ ROLQ $0x01, BX
+ MOVQ 16(DI), R12
+ XORQ 56(DI), DX
+ XORQ R15, BX
+ XORQ 96(DI), R12
+ XORQ 136(DI), DX
+ XORQ DX, R12
+ MOVQ R12, CX
+ ROLQ $0x01, CX
+ MOVQ 24(DI), R13
+ XORQ 64(DI), R8
+ XORQ SI, CX
+ XORQ 104(DI), R13
+ XORQ 144(DI), R8
+ XORQ R8, R13
+ MOVQ R13, DX
+ ROLQ $0x01, DX
+ MOVQ R15, R8
+ XORQ BP, DX
+ ROLQ $0x01, R8
+ MOVQ SI, R9
+ XORQ R12, R8
+ ROLQ $0x01, R9
+
+ // Result b
+ MOVQ (DI), R10
+ MOVQ 48(DI), R11
+ XORQ R13, R9
+ MOVQ 96(DI), R12
+ MOVQ 144(DI), R13
+ MOVQ 192(DI), R14
+ XORQ CX, R11
+ ROLQ $0x2c, R11
+ XORQ DX, R12
+ XORQ BX, R10
+ ROLQ $0x2b, R12
+ MOVQ R11, SI
+ MOVQ $0x000000000000008a, AX
+ ORQ R12, SI
+ XORQ R10, AX
+ XORQ AX, SI
+ MOVQ SI, (SP)
+ XORQ R9, R14
+ ROLQ $0x0e, R14
+ MOVQ R10, R15
+ ANDQ R11, R15
+ XORQ R14, R15
+ MOVQ R15, 32(SP)
+ XORQ R8, R13
+ ROLQ $0x15, R13
+ MOVQ R13, AX
+ ANDQ R14, AX
+ XORQ R12, AX
+ MOVQ AX, 16(SP)
+ NOTQ R12
+ ORQ R10, R14
+ ORQ R13, R12
+ XORQ R13, R14
+ XORQ R11, R12
+ MOVQ R14, 24(SP)
+ MOVQ R12, 8(SP)
+ MOVQ R12, BP
+
+ // Result g
+ MOVQ 72(DI), R11
+ XORQ R9, R11
+ MOVQ 80(DI), R12
+ ROLQ $0x14, R11
+ XORQ BX, R12
+ ROLQ $0x03, R12
+ MOVQ 24(DI), R10
+ MOVQ R11, AX
+ ORQ R12, AX
+ XORQ R8, R10
+ MOVQ 128(DI), R13
+ MOVQ 176(DI), R14
+ ROLQ $0x1c, R10
+ XORQ R10, AX
+ MOVQ AX, 40(SP)
+ XORQ AX, SI
+ XORQ CX, R13
+ ROLQ $0x2d, R13
+ MOVQ R12, AX
+ ANDQ R13, AX
+ XORQ R11, AX
+ MOVQ AX, 48(SP)
+ XORQ AX, BP
+ XORQ DX, R14
+ ROLQ $0x3d, R14
+ MOVQ R14, AX
+ ORQ R10, AX
+ XORQ R13, AX
+ MOVQ AX, 64(SP)
+ ANDQ R11, R10
+ XORQ R14, R10
+ MOVQ R10, 72(SP)
+ NOTQ R14
+ XORQ R10, R15
+ ORQ R14, R13
+ XORQ R12, R13
+ MOVQ R13, 56(SP)
+
+ // Result k
+ MOVQ 8(DI), R10
+ MOVQ 56(DI), R11
+ MOVQ 104(DI), R12
+ MOVQ 152(DI), R13
+ MOVQ 160(DI), R14
+ XORQ DX, R11
+ ROLQ $0x06, R11
+ XORQ R8, R12
+ ROLQ $0x19, R12
+ MOVQ R11, AX
+ ORQ R12, AX
+ XORQ CX, R10
+ ROLQ $0x01, R10
+ XORQ R10, AX
+ MOVQ AX, 80(SP)
+ XORQ AX, SI
+ XORQ R9, R13
+ ROLQ $0x08, R13
+ MOVQ R12, AX
+ ANDQ R13, AX
+ XORQ R11, AX
+ MOVQ AX, 88(SP)
+ XORQ AX, BP
+ XORQ BX, R14
+ ROLQ $0x12, R14
+ NOTQ R13
+ MOVQ R13, AX
+ ANDQ R14, AX
+ XORQ R12, AX
+ MOVQ AX, 96(SP)
+ MOVQ R14, AX
+ ORQ R10, AX
+ XORQ R13, AX
+ MOVQ AX, 104(SP)
+ ANDQ R11, R10
+ XORQ R14, R10
+ MOVQ R10, 112(SP)
+ XORQ R10, R15
+
+ // Result m
+ MOVQ 40(DI), R11
+ XORQ BX, R11
+ MOVQ 88(DI), R12
+ ROLQ $0x24, R11
+ XORQ CX, R12
+ MOVQ 32(DI), R10
+ ROLQ $0x0a, R12
+ MOVQ R11, AX
+ MOVQ 136(DI), R13
+ ANDQ R12, AX
+ XORQ R9, R10
+ MOVQ 184(DI), R14
+ ROLQ $0x1b, R10
+ XORQ R10, AX
+ MOVQ AX, 120(SP)
+ XORQ AX, SI
+ XORQ DX, R13
+ ROLQ $0x0f, R13
+ MOVQ R12, AX
+ ORQ R13, AX
+ XORQ R11, AX
+ MOVQ AX, 128(SP)
+ XORQ AX, BP
+ XORQ R8, R14
+ ROLQ $0x38, R14
+ NOTQ R13
+ MOVQ R13, AX
+ ORQ R14, AX
+ XORQ R12, AX
+ MOVQ AX, 136(SP)
+ ORQ R10, R11
+ XORQ R14, R11
+ MOVQ R11, 152(SP)
+ ANDQ R10, R14
+ XORQ R13, R14
+ MOVQ R14, 144(SP)
+ XORQ R11, R15
+
+ // Result s
+ MOVQ 16(DI), R10
+ MOVQ 64(DI), R11
+ MOVQ 112(DI), R12
+ XORQ DX, R10
+ MOVQ 120(DI), R13
+ ROLQ $0x3e, R10
+ XORQ R8, R11
+ MOVQ 168(DI), R14
+ ROLQ $0x37, R11
+ XORQ R9, R12
+ MOVQ R10, R9
+ XORQ CX, R14
+ ROLQ $0x02, R14
+ ANDQ R11, R9
+ XORQ R14, R9
+ MOVQ R9, 192(SP)
+ ROLQ $0x27, R12
+ XORQ R9, R15
+ NOTQ R11
+ XORQ BX, R13
+ MOVQ R11, BX
+ ANDQ R12, BX
+ XORQ R10, BX
+ MOVQ BX, 160(SP)
+ XORQ BX, SI
+ ROLQ $0x29, R13
+ MOVQ R12, CX
+ ORQ R13, CX
+ XORQ R11, CX
+ MOVQ CX, 168(SP)
+ XORQ CX, BP
+ MOVQ R13, DX
+ MOVQ R14, R8
+ ANDQ R14, DX
+ ORQ R10, R8
+ XORQ R12, DX
+ XORQ R13, R8
+ MOVQ DX, 176(SP)
+ MOVQ R8, 184(SP)
+
+ // Prepare round
+ MOVQ BP, BX
+ ROLQ $0x01, BX
+ MOVQ 16(SP), R12
+ XORQ 56(SP), DX
+ XORQ R15, BX
+ XORQ 96(SP), R12
+ XORQ 136(SP), DX
+ XORQ DX, R12
+ MOVQ R12, CX
+ ROLQ $0x01, CX
+ MOVQ 24(SP), R13
+ XORQ 64(SP), R8
+ XORQ SI, CX
+ XORQ 104(SP), R13
+ XORQ 144(SP), R8
+ XORQ R8, R13
+ MOVQ R13, DX
+ ROLQ $0x01, DX
+ MOVQ R15, R8
+ XORQ BP, DX
+ ROLQ $0x01, R8
+ MOVQ SI, R9
+ XORQ R12, R8
+ ROLQ $0x01, R9
+
+ // Result b
+ MOVQ (SP), R10
+ MOVQ 48(SP), R11
+ XORQ R13, R9
+ MOVQ 96(SP), R12
+ MOVQ 144(SP), R13
+ MOVQ 192(SP), R14
+ XORQ CX, R11
+ ROLQ $0x2c, R11
+ XORQ DX, R12
+ XORQ BX, R10
+ ROLQ $0x2b, R12
+ MOVQ R11, SI
+ MOVQ $0x0000000000000088, AX
+ ORQ R12, SI
+ XORQ R10, AX
+ XORQ AX, SI
+ MOVQ SI, (DI)
+ XORQ R9, R14
+ ROLQ $0x0e, R14
+ MOVQ R10, R15
+ ANDQ R11, R15
+ XORQ R14, R15
+ MOVQ R15, 32(DI)
+ XORQ R8, R13
+ ROLQ $0x15, R13
+ MOVQ R13, AX
+ ANDQ R14, AX
+ XORQ R12, AX
+ MOVQ AX, 16(DI)
+ NOTQ R12
+ ORQ R10, R14
+ ORQ R13, R12
+ XORQ R13, R14
+ XORQ R11, R12
+ MOVQ R14, 24(DI)
+ MOVQ R12, 8(DI)
+ MOVQ R12, BP
+
+ // Result g
+ MOVQ 72(SP), R11
+ XORQ R9, R11
+ MOVQ 80(SP), R12
+ ROLQ $0x14, R11
+ XORQ BX, R12
+ ROLQ $0x03, R12
+ MOVQ 24(SP), R10
+ MOVQ R11, AX
+ ORQ R12, AX
+ XORQ R8, R10
+ MOVQ 128(SP), R13
+ MOVQ 176(SP), R14
+ ROLQ $0x1c, R10
+ XORQ R10, AX
+ MOVQ AX, 40(DI)
+ XORQ AX, SI
+ XORQ CX, R13
+ ROLQ $0x2d, R13
+ MOVQ R12, AX
+ ANDQ R13, AX
+ XORQ R11, AX
+ MOVQ AX, 48(DI)
+ XORQ AX, BP
+ XORQ DX, R14
+ ROLQ $0x3d, R14
+ MOVQ R14, AX
+ ORQ R10, AX
+ XORQ R13, AX
+ MOVQ AX, 64(DI)
+ ANDQ R11, R10
+ XORQ R14, R10
+ MOVQ R10, 72(DI)
+ NOTQ R14
+ XORQ R10, R15
+ ORQ R14, R13
+ XORQ R12, R13
+ MOVQ R13, 56(DI)
+
+ // Result k
+ MOVQ 8(SP), R10
+ MOVQ 56(SP), R11
+ MOVQ 104(SP), R12
+ MOVQ 152(SP), R13
+ MOVQ 160(SP), R14
+ XORQ DX, R11
+ ROLQ $0x06, R11
+ XORQ R8, R12
+ ROLQ $0x19, R12
+ MOVQ R11, AX
+ ORQ R12, AX
+ XORQ CX, R10
+ ROLQ $0x01, R10
+ XORQ R10, AX
+ MOVQ AX, 80(DI)
+ XORQ AX, SI
+ XORQ R9, R13
+ ROLQ $0x08, R13
+ MOVQ R12, AX
+ ANDQ R13, AX
+ XORQ R11, AX
+ MOVQ AX, 88(DI)
+ XORQ AX, BP
+ XORQ BX, R14
+ ROLQ $0x12, R14
+ NOTQ R13
+ MOVQ R13, AX
+ ANDQ R14, AX
+ XORQ R12, AX
+ MOVQ AX, 96(DI)
+ MOVQ R14, AX
+ ORQ R10, AX
+ XORQ R13, AX
+ MOVQ AX, 104(DI)
+ ANDQ R11, R10
+ XORQ R14, R10
+ MOVQ R10, 112(DI)
+ XORQ R10, R15
+
+ // Result m
+ MOVQ 40(SP), R11
+ XORQ BX, R11
+ MOVQ 88(SP), R12
+ ROLQ $0x24, R11
+ XORQ CX, R12
+ MOVQ 32(SP), R10
+ ROLQ $0x0a, R12
+ MOVQ R11, AX
+ MOVQ 136(SP), R13
+ ANDQ R12, AX
+ XORQ R9, R10
+ MOVQ 184(SP), R14
+ ROLQ $0x1b, R10
+ XORQ R10, AX
+ MOVQ AX, 120(DI)
+ XORQ AX, SI
+ XORQ DX, R13
+ ROLQ $0x0f, R13
+ MOVQ R12, AX
+ ORQ R13, AX
+ XORQ R11, AX
+ MOVQ AX, 128(DI)
+ XORQ AX, BP
+ XORQ R8, R14
+ ROLQ $0x38, R14
+ NOTQ R13
+ MOVQ R13, AX
+ ORQ R14, AX
+ XORQ R12, AX
+ MOVQ AX, 136(DI)
+ ORQ R10, R11
+ XORQ R14, R11
+ MOVQ R11, 152(DI)
+ ANDQ R10, R14
+ XORQ R13, R14
+ MOVQ R14, 144(DI)
+ XORQ R11, R15
+
+ // Result s
+ MOVQ 16(SP), R10
+ MOVQ 64(SP), R11
+ MOVQ 112(SP), R12
+ XORQ DX, R10
+ MOVQ 120(SP), R13
+ ROLQ $0x3e, R10
+ XORQ R8, R11
+ MOVQ 168(SP), R14
+ ROLQ $0x37, R11
+ XORQ R9, R12
+ MOVQ R10, R9
+ XORQ CX, R14
+ ROLQ $0x02, R14
+ ANDQ R11, R9
+ XORQ R14, R9
+ MOVQ R9, 192(DI)
+ ROLQ $0x27, R12
+ XORQ R9, R15
+ NOTQ R11
+ XORQ BX, R13
+ MOVQ R11, BX
+ ANDQ R12, BX
+ XORQ R10, BX
+ MOVQ BX, 160(DI)
+ XORQ BX, SI
+ ROLQ $0x29, R13
+ MOVQ R12, CX
+ ORQ R13, CX
+ XORQ R11, CX
+ MOVQ CX, 168(DI)
+ XORQ CX, BP
+ MOVQ R13, DX
+ MOVQ R14, R8
+ ANDQ R14, DX
+ ORQ R10, R8
+ XORQ R12, DX
+ XORQ R13, R8
+ MOVQ DX, 176(DI)
+ MOVQ R8, 184(DI)
+
+ // Prepare round
+ MOVQ BP, BX
+ ROLQ $0x01, BX
+ MOVQ 16(DI), R12
+ XORQ 56(DI), DX
+ XORQ R15, BX
+ XORQ 96(DI), R12
+ XORQ 136(DI), DX
+ XORQ DX, R12
+ MOVQ R12, CX
+ ROLQ $0x01, CX
+ MOVQ 24(DI), R13
+ XORQ 64(DI), R8
+ XORQ SI, CX
+ XORQ 104(DI), R13
+ XORQ 144(DI), R8
+ XORQ R8, R13
+ MOVQ R13, DX
+ ROLQ $0x01, DX
+ MOVQ R15, R8
+ XORQ BP, DX
+ ROLQ $0x01, R8
+ MOVQ SI, R9
+ XORQ R12, R8
+ ROLQ $0x01, R9
+
+ // Result b
+ MOVQ (DI), R10
+ MOVQ 48(DI), R11
+ XORQ R13, R9
+ MOVQ 96(DI), R12
+ MOVQ 144(DI), R13
+ MOVQ 192(DI), R14
+ XORQ CX, R11
+ ROLQ $0x2c, R11
+ XORQ DX, R12
+ XORQ BX, R10
+ ROLQ $0x2b, R12
+ MOVQ R11, SI
+ MOVQ $0x0000000080008009, AX
+ ORQ R12, SI
+ XORQ R10, AX
+ XORQ AX, SI
+ MOVQ SI, (SP)
+ XORQ R9, R14
+ ROLQ $0x0e, R14
+ MOVQ R10, R15
+ ANDQ R11, R15
+ XORQ R14, R15
+ MOVQ R15, 32(SP)
+ XORQ R8, R13
+ ROLQ $0x15, R13
+ MOVQ R13, AX
+ ANDQ R14, AX
+ XORQ R12, AX
+ MOVQ AX, 16(SP)
+ NOTQ R12
+ ORQ R10, R14
+ ORQ R13, R12
+ XORQ R13, R14
+ XORQ R11, R12
+ MOVQ R14, 24(SP)
+ MOVQ R12, 8(SP)
+ MOVQ R12, BP
+
+ // Result g
+ MOVQ 72(DI), R11
+ XORQ R9, R11
+ MOVQ 80(DI), R12
+ ROLQ $0x14, R11
+ XORQ BX, R12
+ ROLQ $0x03, R12
+ MOVQ 24(DI), R10
+ MOVQ R11, AX
+ ORQ R12, AX
+ XORQ R8, R10
+ MOVQ 128(DI), R13
+ MOVQ 176(DI), R14
+ ROLQ $0x1c, R10
+ XORQ R10, AX
+ MOVQ AX, 40(SP)
+ XORQ AX, SI
+ XORQ CX, R13
+ ROLQ $0x2d, R13
+ MOVQ R12, AX
+ ANDQ R13, AX
+ XORQ R11, AX
+ MOVQ AX, 48(SP)
+ XORQ AX, BP
+ XORQ DX, R14
+ ROLQ $0x3d, R14
+ MOVQ R14, AX
+ ORQ R10, AX
+ XORQ R13, AX
+ MOVQ AX, 64(SP)
+ ANDQ R11, R10
+ XORQ R14, R10
+ MOVQ R10, 72(SP)
+ NOTQ R14
+ XORQ R10, R15
+ ORQ R14, R13
+ XORQ R12, R13
+ MOVQ R13, 56(SP)
+
+ // Result k
+ MOVQ 8(DI), R10
+ MOVQ 56(DI), R11
+ MOVQ 104(DI), R12
+ MOVQ 152(DI), R13
+ MOVQ 160(DI), R14
+ XORQ DX, R11
+ ROLQ $0x06, R11
+ XORQ R8, R12
+ ROLQ $0x19, R12
+ MOVQ R11, AX
+ ORQ R12, AX
+ XORQ CX, R10
+ ROLQ $0x01, R10
+ XORQ R10, AX
+ MOVQ AX, 80(SP)
+ XORQ AX, SI
+ XORQ R9, R13
+ ROLQ $0x08, R13
+ MOVQ R12, AX
+ ANDQ R13, AX
+ XORQ R11, AX
+ MOVQ AX, 88(SP)
+ XORQ AX, BP
+ XORQ BX, R14
+ ROLQ $0x12, R14
+ NOTQ R13
+ MOVQ R13, AX
+ ANDQ R14, AX
+ XORQ R12, AX
+ MOVQ AX, 96(SP)
+ MOVQ R14, AX
+ ORQ R10, AX
+ XORQ R13, AX
+ MOVQ AX, 104(SP)
+ ANDQ R11, R10
+ XORQ R14, R10
+ MOVQ R10, 112(SP)
+ XORQ R10, R15
+
+ // Result m
+ MOVQ 40(DI), R11
+ XORQ BX, R11
+ MOVQ 88(DI), R12
+ ROLQ $0x24, R11
+ XORQ CX, R12
+ MOVQ 32(DI), R10
+ ROLQ $0x0a, R12
+ MOVQ R11, AX
+ MOVQ 136(DI), R13
+ ANDQ R12, AX
+ XORQ R9, R10
+ MOVQ 184(DI), R14
+ ROLQ $0x1b, R10
+ XORQ R10, AX
+ MOVQ AX, 120(SP)
+ XORQ AX, SI
+ XORQ DX, R13
+ ROLQ $0x0f, R13
+ MOVQ R12, AX
+ ORQ R13, AX
+ XORQ R11, AX
+ MOVQ AX, 128(SP)
+ XORQ AX, BP
+ XORQ R8, R14
+ ROLQ $0x38, R14
+ NOTQ R13
+ MOVQ R13, AX
+ ORQ R14, AX
+ XORQ R12, AX
+ MOVQ AX, 136(SP)
+ ORQ R10, R11
+ XORQ R14, R11
+ MOVQ R11, 152(SP)
+ ANDQ R10, R14
+ XORQ R13, R14
+ MOVQ R14, 144(SP)
+ XORQ R11, R15
+
+ // Result s
+ MOVQ 16(DI), R10
+ MOVQ 64(DI), R11
+ MOVQ 112(DI), R12
+ XORQ DX, R10
+ MOVQ 120(DI), R13
+ ROLQ $0x3e, R10
+ XORQ R8, R11
+ MOVQ 168(DI), R14
+ ROLQ $0x37, R11
+ XORQ R9, R12
+ MOVQ R10, R9
+ XORQ CX, R14
+ ROLQ $0x02, R14
+ ANDQ R11, R9
+ XORQ R14, R9
+ MOVQ R9, 192(SP)
+ ROLQ $0x27, R12
+ XORQ R9, R15
+ NOTQ R11
+ XORQ BX, R13
+ MOVQ R11, BX
+ ANDQ R12, BX
+ XORQ R10, BX
+ MOVQ BX, 160(SP)
+ XORQ BX, SI
+ ROLQ $0x29, R13
+ MOVQ R12, CX
+ ORQ R13, CX
+ XORQ R11, CX
+ MOVQ CX, 168(SP)
+ XORQ CX, BP
+ MOVQ R13, DX
+ MOVQ R14, R8
+ ANDQ R14, DX
+ ORQ R10, R8
+ XORQ R12, DX
+ XORQ R13, R8
+ MOVQ DX, 176(SP)
+ MOVQ R8, 184(SP)
+
+ // Prepare round
+ MOVQ BP, BX
+ ROLQ $0x01, BX
+ MOVQ 16(SP), R12
+ XORQ 56(SP), DX
+ XORQ R15, BX
+ XORQ 96(SP), R12
+ XORQ 136(SP), DX
+ XORQ DX, R12
+ MOVQ R12, CX
+ ROLQ $0x01, CX
+ MOVQ 24(SP), R13
+ XORQ 64(SP), R8
+ XORQ SI, CX
+ XORQ 104(SP), R13
+ XORQ 144(SP), R8
+ XORQ R8, R13
+ MOVQ R13, DX
+ ROLQ $0x01, DX
+ MOVQ R15, R8
+ XORQ BP, DX
+ ROLQ $0x01, R8
+ MOVQ SI, R9
+ XORQ R12, R8
+ ROLQ $0x01, R9
+
+ // Result b
+ MOVQ (SP), R10
+ MOVQ 48(SP), R11
+ XORQ R13, R9
+ MOVQ 96(SP), R12
+ MOVQ 144(SP), R13
+ MOVQ 192(SP), R14
+ XORQ CX, R11
+ ROLQ $0x2c, R11
+ XORQ DX, R12
+ XORQ BX, R10
+ ROLQ $0x2b, R12
+ MOVQ R11, SI
+ MOVQ $0x000000008000000a, AX
+ ORQ R12, SI
+ XORQ R10, AX
+ XORQ AX, SI
+ MOVQ SI, (DI)
+ XORQ R9, R14
+ ROLQ $0x0e, R14
+ MOVQ R10, R15
+ ANDQ R11, R15
+ XORQ R14, R15
+ MOVQ R15, 32(DI)
+ XORQ R8, R13
+ ROLQ $0x15, R13
+ MOVQ R13, AX
+ ANDQ R14, AX
+ XORQ R12, AX
+ MOVQ AX, 16(DI)
+ NOTQ R12
+ ORQ R10, R14
+ ORQ R13, R12
+ XORQ R13, R14
+ XORQ R11, R12
+ MOVQ R14, 24(DI)
+ MOVQ R12, 8(DI)
+ MOVQ R12, BP
+
+ // Result g
+ MOVQ 72(SP), R11
+ XORQ R9, R11
+ MOVQ 80(SP), R12
+ ROLQ $0x14, R11
+ XORQ BX, R12
+ ROLQ $0x03, R12
+ MOVQ 24(SP), R10
+ MOVQ R11, AX
+ ORQ R12, AX
+ XORQ R8, R10
+ MOVQ 128(SP), R13
+ MOVQ 176(SP), R14
+ ROLQ $0x1c, R10
+ XORQ R10, AX
+ MOVQ AX, 40(DI)
+ XORQ AX, SI
+ XORQ CX, R13
+ ROLQ $0x2d, R13
+ MOVQ R12, AX
+ ANDQ R13, AX
+ XORQ R11, AX
+ MOVQ AX, 48(DI)
+ XORQ AX, BP
+ XORQ DX, R14
+ ROLQ $0x3d, R14
+ MOVQ R14, AX
+ ORQ R10, AX
+ XORQ R13, AX
+ MOVQ AX, 64(DI)
+ ANDQ R11, R10
+ XORQ R14, R10
+ MOVQ R10, 72(DI)
+ NOTQ R14
+ XORQ R10, R15
+ ORQ R14, R13
+ XORQ R12, R13
+ MOVQ R13, 56(DI)
+
+ // Result k
+ MOVQ 8(SP), R10
+ MOVQ 56(SP), R11
+ MOVQ 104(SP), R12
+ MOVQ 152(SP), R13
+ MOVQ 160(SP), R14
+ XORQ DX, R11
+ ROLQ $0x06, R11
+ XORQ R8, R12
+ ROLQ $0x19, R12
+ MOVQ R11, AX
+ ORQ R12, AX
+ XORQ CX, R10
+ ROLQ $0x01, R10
+ XORQ R10, AX
+ MOVQ AX, 80(DI)
+ XORQ AX, SI
+ XORQ R9, R13
+ ROLQ $0x08, R13
+ MOVQ R12, AX
+ ANDQ R13, AX
+ XORQ R11, AX
+ MOVQ AX, 88(DI)
+ XORQ AX, BP
+ XORQ BX, R14
+ ROLQ $0x12, R14
+ NOTQ R13
+ MOVQ R13, AX
+ ANDQ R14, AX
+ XORQ R12, AX
+ MOVQ AX, 96(DI)
+ MOVQ R14, AX
+ ORQ R10, AX
+ XORQ R13, AX
+ MOVQ AX, 104(DI)
+ ANDQ R11, R10
+ XORQ R14, R10
+ MOVQ R10, 112(DI)
+ XORQ R10, R15
+
+ // Result m
+ MOVQ 40(SP), R11
+ XORQ BX, R11
+ MOVQ 88(SP), R12
+ ROLQ $0x24, R11
+ XORQ CX, R12
+ MOVQ 32(SP), R10
+ ROLQ $0x0a, R12
+ MOVQ R11, AX
+ MOVQ 136(SP), R13
+ ANDQ R12, AX
+ XORQ R9, R10
+ MOVQ 184(SP), R14
+ ROLQ $0x1b, R10
+ XORQ R10, AX
+ MOVQ AX, 120(DI)
+ XORQ AX, SI
+ XORQ DX, R13
+ ROLQ $0x0f, R13
+ MOVQ R12, AX
+ ORQ R13, AX
+ XORQ R11, AX
+ MOVQ AX, 128(DI)
+ XORQ AX, BP
+ XORQ R8, R14
+ ROLQ $0x38, R14
+ NOTQ R13
+ MOVQ R13, AX
+ ORQ R14, AX
+ XORQ R12, AX
+ MOVQ AX, 136(DI)
+ ORQ R10, R11
+ XORQ R14, R11
+ MOVQ R11, 152(DI)
+ ANDQ R10, R14
+ XORQ R13, R14
+ MOVQ R14, 144(DI)
+ XORQ R11, R15
+
+ // Result s
+ MOVQ 16(SP), R10
+ MOVQ 64(SP), R11
+ MOVQ 112(SP), R12
+ XORQ DX, R10
+ MOVQ 120(SP), R13
+ ROLQ $0x3e, R10
+ XORQ R8, R11
+ MOVQ 168(SP), R14
+ ROLQ $0x37, R11
+ XORQ R9, R12
+ MOVQ R10, R9
+ XORQ CX, R14
+ ROLQ $0x02, R14
+ ANDQ R11, R9
+ XORQ R14, R9
+ MOVQ R9, 192(DI)
+ ROLQ $0x27, R12
+ XORQ R9, R15
+ NOTQ R11
+ XORQ BX, R13
+ MOVQ R11, BX
+ ANDQ R12, BX
+ XORQ R10, BX
+ MOVQ BX, 160(DI)
+ XORQ BX, SI
+ ROLQ $0x29, R13
+ MOVQ R12, CX
+ ORQ R13, CX
+ XORQ R11, CX
+ MOVQ CX, 168(DI)
+ XORQ CX, BP
+ MOVQ R13, DX
+ MOVQ R14, R8
+ ANDQ R14, DX
+ ORQ R10, R8
+ XORQ R12, DX
+ XORQ R13, R8
+ MOVQ DX, 176(DI)
+ MOVQ R8, 184(DI)
+ // Prepare round
+ MOVQ BP, BX
+ ROLQ $0x01, BX
+ MOVQ 16(DI), R12
+ XORQ 56(DI), DX
+ XORQ R15, BX
+ XORQ 96(DI), R12
+ XORQ 136(DI), DX
+ XORQ DX, R12
+ MOVQ R12, CX
+ ROLQ $0x01, CX
+ MOVQ 24(DI), R13
+ XORQ 64(DI), R8
+ XORQ SI, CX
+ XORQ 104(DI), R13
+ XORQ 144(DI), R8
+ XORQ R8, R13
+ MOVQ R13, DX
+ ROLQ $0x01, DX
+ MOVQ R15, R8
+ XORQ BP, DX
+ ROLQ $0x01, R8
+ MOVQ SI, R9
+ XORQ R12, R8
+ ROLQ $0x01, R9
+
+ // Result b
+ MOVQ (DI), R10
+ MOVQ 48(DI), R11
+ XORQ R13, R9
+ MOVQ 96(DI), R12
+ MOVQ 144(DI), R13
+ MOVQ 192(DI), R14
+ XORQ CX, R11
+ ROLQ $0x2c, R11
+ XORQ DX, R12
+ XORQ BX, R10
+ ROLQ $0x2b, R12
+ MOVQ R11, SI
+ MOVQ $0x000000008000808b, AX
+ ORQ R12, SI
+ XORQ R10, AX
+ XORQ AX, SI
+ MOVQ SI, (SP)
+ XORQ R9, R14
+ ROLQ $0x0e, R14
+ MOVQ R10, R15
+ ANDQ R11, R15
+ XORQ R14, R15
+ MOVQ R15, 32(SP)
+ XORQ R8, R13
+ ROLQ $0x15, R13
+ MOVQ R13, AX
+ ANDQ R14, AX
+ XORQ R12, AX
+ MOVQ AX, 16(SP)
+ NOTQ R12
+ ORQ R10, R14
+ ORQ R13, R12
+ XORQ R13, R14
+ XORQ R11, R12
+ MOVQ R14, 24(SP)
+ MOVQ R12, 8(SP)
+ MOVQ R12, BP
+
+ // Result g
+ MOVQ 72(DI), R11
+ XORQ R9, R11
+ MOVQ 80(DI), R12
+ ROLQ $0x14, R11
+ XORQ BX, R12
+ ROLQ $0x03, R12
+ MOVQ 24(DI), R10
+ MOVQ R11, AX
+ ORQ R12, AX
+ XORQ R8, R10
+ MOVQ 128(DI), R13
+ MOVQ 176(DI), R14
+ ROLQ $0x1c, R10
+ XORQ R10, AX
+ MOVQ AX, 40(SP)
+ XORQ AX, SI
+ XORQ CX, R13
+ ROLQ $0x2d, R13
+ MOVQ R12, AX
+ ANDQ R13, AX
+ XORQ R11, AX
+ MOVQ AX, 48(SP)
+ XORQ AX, BP
+ XORQ DX, R14
+ ROLQ $0x3d, R14
+ MOVQ R14, AX
+ ORQ R10, AX
+ XORQ R13, AX
+ MOVQ AX, 64(SP)
+ ANDQ R11, R10
+ XORQ R14, R10
+ MOVQ R10, 72(SP)
+ NOTQ R14
+ XORQ R10, R15
+ ORQ R14, R13
+ XORQ R12, R13
+ MOVQ R13, 56(SP)
+
+ // Result k
+ MOVQ 8(DI), R10
+ MOVQ 56(DI), R11
+ MOVQ 104(DI), R12
+ MOVQ 152(DI), R13
+ MOVQ 160(DI), R14
+ XORQ DX, R11
+ ROLQ $0x06, R11
+ XORQ R8, R12
+ ROLQ $0x19, R12
+ MOVQ R11, AX
+ ORQ R12, AX
+ XORQ CX, R10
+ ROLQ $0x01, R10
+ XORQ R10, AX
+ MOVQ AX, 80(SP)
+ XORQ AX, SI
+ XORQ R9, R13
+ ROLQ $0x08, R13
+ MOVQ R12, AX
+ ANDQ R13, AX
+ XORQ R11, AX
+ MOVQ AX, 88(SP)
+ XORQ AX, BP
+ XORQ BX, R14
+ ROLQ $0x12, R14
+ NOTQ R13
+ MOVQ R13, AX
+ ANDQ R14, AX
+ XORQ R12, AX
+ MOVQ AX, 96(SP)
+ MOVQ R14, AX
+ ORQ R10, AX
+ XORQ R13, AX
+ MOVQ AX, 104(SP)
+ ANDQ R11, R10
+ XORQ R14, R10
+ MOVQ R10, 112(SP)
+ XORQ R10, R15
+
+ // Result m
+ MOVQ 40(DI), R11
+ XORQ BX, R11
+ MOVQ 88(DI), R12
+ ROLQ $0x24, R11
+ XORQ CX, R12
+ MOVQ 32(DI), R10
+ ROLQ $0x0a, R12
+ MOVQ R11, AX
+ MOVQ 136(DI), R13
+ ANDQ R12, AX
+ XORQ R9, R10
+ MOVQ 184(DI), R14
+ ROLQ $0x1b, R10
+ XORQ R10, AX
+ MOVQ AX, 120(SP)
+ XORQ AX, SI
+ XORQ DX, R13
+ ROLQ $0x0f, R13
+ MOVQ R12, AX
+ ORQ R13, AX
+ XORQ R11, AX
+ MOVQ AX, 128(SP)
+ XORQ AX, BP
+ XORQ R8, R14
+ ROLQ $0x38, R14
+ NOTQ R13
+ MOVQ R13, AX
+ ORQ R14, AX
+ XORQ R12, AX
+ MOVQ AX, 136(SP)
+ ORQ R10, R11
+ XORQ R14, R11
+ MOVQ R11, 152(SP)
+ ANDQ R10, R14
+ XORQ R13, R14
+ MOVQ R14, 144(SP)
+ XORQ R11, R15
+
+ // Result s
+ MOVQ 16(DI), R10
+ MOVQ 64(DI), R11
+ MOVQ 112(DI), R12
+ XORQ DX, R10
+ MOVQ 120(DI), R13
+ ROLQ $0x3e, R10
+ XORQ R8, R11
+ MOVQ 168(DI), R14
+ ROLQ $0x37, R11
+ XORQ R9, R12
+ MOVQ R10, R9
+ XORQ CX, R14
+ ROLQ $0x02, R14
+ ANDQ R11, R9
+ XORQ R14, R9
+ MOVQ R9, 192(SP)
+ ROLQ $0x27, R12
+ XORQ R9, R15
+ NOTQ R11
+ XORQ BX, R13
+ MOVQ R11, BX
+ ANDQ R12, BX
+ XORQ R10, BX
+ MOVQ BX, 160(SP)
+ XORQ BX, SI
+ ROLQ $0x29, R13
+ MOVQ R12, CX
+ ORQ R13, CX
+ XORQ R11, CX
+ MOVQ CX, 168(SP)
+ XORQ CX, BP
+ MOVQ R13, DX
+ MOVQ R14, R8
+ ANDQ R14, DX
+ ORQ R10, R8
+ XORQ R12, DX
+ XORQ R13, R8
+ MOVQ DX, 176(SP)
+ MOVQ R8, 184(SP)
+
+ // Prepare round
+ MOVQ BP, BX
+ ROLQ $0x01, BX
+ MOVQ 16(SP), R12
+ XORQ 56(SP), DX
+ XORQ R15, BX
+ XORQ 96(SP), R12
+ XORQ 136(SP), DX
+ XORQ DX, R12
+ MOVQ R12, CX
+ ROLQ $0x01, CX
+ MOVQ 24(SP), R13
+ XORQ 64(SP), R8
+ XORQ SI, CX
+ XORQ 104(SP), R13
+ XORQ 144(SP), R8
+ XORQ R8, R13
+ MOVQ R13, DX
+ ROLQ $0x01, DX
+ MOVQ R15, R8
+ XORQ BP, DX
+ ROLQ $0x01, R8
+ MOVQ SI, R9
+ XORQ R12, R8
+ ROLQ $0x01, R9
+
+ // Result b
+ MOVQ (SP), R10
+ MOVQ 48(SP), R11
+ XORQ R13, R9
+ MOVQ 96(SP), R12
+ MOVQ 144(SP), R13
+ MOVQ 192(SP), R14
+ XORQ CX, R11
+ ROLQ $0x2c, R11
+ XORQ DX, R12
+ XORQ BX, R10
+ ROLQ $0x2b, R12
+ MOVQ R11, SI
+ MOVQ $0x800000000000008b, AX
+ ORQ R12, SI
+ XORQ R10, AX
+ XORQ AX, SI
+ MOVQ SI, (DI)
+ XORQ R9, R14
+ ROLQ $0x0e, R14
+ MOVQ R10, R15
+ ANDQ R11, R15
+ XORQ R14, R15
+ MOVQ R15, 32(DI)
+ XORQ R8, R13
+ ROLQ $0x15, R13
+ MOVQ R13, AX
+ ANDQ R14, AX
+ XORQ R12, AX
+ MOVQ AX, 16(DI)
+ NOTQ R12
+ ORQ R10, R14
+ ORQ R13, R12
+ XORQ R13, R14
+ XORQ R11, R12
+ MOVQ R14, 24(DI)
+ MOVQ R12, 8(DI)
+ MOVQ R12, BP
+
+ // Result g
+ MOVQ 72(SP), R11
+ XORQ R9, R11
+ MOVQ 80(SP), R12
+ ROLQ $0x14, R11
+ XORQ BX, R12
+ ROLQ $0x03, R12
+ MOVQ 24(SP), R10
+ MOVQ R11, AX
+ ORQ R12, AX
+ XORQ R8, R10
+ MOVQ 128(SP), R13
+ MOVQ 176(SP), R14
+ ROLQ $0x1c, R10
+ XORQ R10, AX
+ MOVQ AX, 40(DI)
+ XORQ AX, SI
+ XORQ CX, R13
+ ROLQ $0x2d, R13
+ MOVQ R12, AX
+ ANDQ R13, AX
+ XORQ R11, AX
+ MOVQ AX, 48(DI)
+ XORQ AX, BP
+ XORQ DX, R14
+ ROLQ $0x3d, R14
+ MOVQ R14, AX
+ ORQ R10, AX
+ XORQ R13, AX
+ MOVQ AX, 64(DI)
+ ANDQ R11, R10
+ XORQ R14, R10
+ MOVQ R10, 72(DI)
+ NOTQ R14
+ XORQ R10, R15
+ ORQ R14, R13
+ XORQ R12, R13
+ MOVQ R13, 56(DI)
+
+ // Result k
+ MOVQ 8(SP), R10
+ MOVQ 56(SP), R11
+ MOVQ 104(SP), R12
+ MOVQ 152(SP), R13
+ MOVQ 160(SP), R14
+ XORQ DX, R11
+ ROLQ $0x06, R11
+ XORQ R8, R12
+ ROLQ $0x19, R12
+ MOVQ R11, AX
+ ORQ R12, AX
+ XORQ CX, R10
+ ROLQ $0x01, R10
+ XORQ R10, AX
+ MOVQ AX, 80(DI)
+ XORQ AX, SI
+ XORQ R9, R13
+ ROLQ $0x08, R13
+ MOVQ R12, AX
+ ANDQ R13, AX
+ XORQ R11, AX
+ MOVQ AX, 88(DI)
+ XORQ AX, BP
+ XORQ BX, R14
+ ROLQ $0x12, R14
+ NOTQ R13
+ MOVQ R13, AX
+ ANDQ R14, AX
+ XORQ R12, AX
+ MOVQ AX, 96(DI)
+ MOVQ R14, AX
+ ORQ R10, AX
+ XORQ R13, AX
+ MOVQ AX, 104(DI)
+ ANDQ R11, R10
+ XORQ R14, R10
+ MOVQ R10, 112(DI)
+ XORQ R10, R15
+
+ // Result m
+ MOVQ 40(SP), R11
+ XORQ BX, R11
+ MOVQ 88(SP), R12
+ ROLQ $0x24, R11
+ XORQ CX, R12
+ MOVQ 32(SP), R10
+ ROLQ $0x0a, R12
+ MOVQ R11, AX
+ MOVQ 136(SP), R13
+ ANDQ R12, AX
+ XORQ R9, R10
+ MOVQ 184(SP), R14
+ ROLQ $0x1b, R10
+ XORQ R10, AX
+ MOVQ AX, 120(DI)
+ XORQ AX, SI
+ XORQ DX, R13
+ ROLQ $0x0f, R13
+ MOVQ R12, AX
+ ORQ R13, AX
+ XORQ R11, AX
+ MOVQ AX, 128(DI)
+ XORQ AX, BP
+ XORQ R8, R14
+ ROLQ $0x38, R14
+ NOTQ R13
+ MOVQ R13, AX
+ ORQ R14, AX
+ XORQ R12, AX
+ MOVQ AX, 136(DI)
+ ORQ R10, R11
+ XORQ R14, R11
+ MOVQ R11, 152(DI)
+ ANDQ R10, R14
+ XORQ R13, R14
+ MOVQ R14, 144(DI)
+ XORQ R11, R15
+
+ // Result s
+ MOVQ 16(SP), R10
+ MOVQ 64(SP), R11
+ MOVQ 112(SP), R12
+ XORQ DX, R10
+ MOVQ 120(SP), R13
+ ROLQ $0x3e, R10
+ XORQ R8, R11
+ MOVQ 168(SP), R14
+ ROLQ $0x37, R11
+ XORQ R9, R12
+ MOVQ R10, R9
+ XORQ CX, R14
+ ROLQ $0x02, R14
+ ANDQ R11, R9
+ XORQ R14, R9
+ MOVQ R9, 192(DI)
+ ROLQ $0x27, R12
+ XORQ R9, R15
+ NOTQ R11
+ XORQ BX, R13
+ MOVQ R11, BX
+ ANDQ R12, BX
+ XORQ R10, BX
+ MOVQ BX, 160(DI)
+ XORQ BX, SI
+ ROLQ $0x29, R13
+ MOVQ R12, CX
+ ORQ R13, CX
+ XORQ R11, CX
+ MOVQ CX, 168(DI)
+ XORQ CX, BP
+ MOVQ R13, DX
+ MOVQ R14, R8
+ ANDQ R14, DX
+ ORQ R10, R8
+ XORQ R12, DX
+ XORQ R13, R8
+ MOVQ DX, 176(DI)
+ MOVQ R8, 184(DI)
+
+ // Prepare round
+ MOVQ BP, BX
+ ROLQ $0x01, BX
+ MOVQ 16(DI), R12
+ XORQ 56(DI), DX
+ XORQ R15, BX
+ XORQ 96(DI), R12
+ XORQ 136(DI), DX
+ XORQ DX, R12
+ MOVQ R12, CX
+ ROLQ $0x01, CX
+ MOVQ 24(DI), R13
+ XORQ 64(DI), R8
+ XORQ SI, CX
+ XORQ 104(DI), R13
+ XORQ 144(DI), R8
+ XORQ R8, R13
+ MOVQ R13, DX
+ ROLQ $0x01, DX
+ MOVQ R15, R8
+ XORQ BP, DX
+ ROLQ $0x01, R8
+ MOVQ SI, R9
+ XORQ R12, R8
+ ROLQ $0x01, R9
+
+ // Result b
+ MOVQ (DI), R10
+ MOVQ 48(DI), R11
+ XORQ R13, R9
+ MOVQ 96(DI), R12
+ MOVQ 144(DI), R13
+ MOVQ 192(DI), R14
+ XORQ CX, R11
+ ROLQ $0x2c, R11
+ XORQ DX, R12
+ XORQ BX, R10
+ ROLQ $0x2b, R12
+ MOVQ R11, SI
+ MOVQ $0x8000000000008089, AX
+ ORQ R12, SI
+ XORQ R10, AX
+ XORQ AX, SI
+ MOVQ SI, (SP)
+ XORQ R9, R14
+ ROLQ $0x0e, R14
+ MOVQ R10, R15
+ ANDQ R11, R15
+ XORQ R14, R15
+ MOVQ R15, 32(SP)
+ XORQ R8, R13
+ ROLQ $0x15, R13
+ MOVQ R13, AX
+ ANDQ R14, AX
+ XORQ R12, AX
+ MOVQ AX, 16(SP)
+ NOTQ R12
+ ORQ R10, R14
+ ORQ R13, R12
+ XORQ R13, R14
+ XORQ R11, R12
+ MOVQ R14, 24(SP)
+ MOVQ R12, 8(SP)
+ MOVQ R12, BP
+
+ // Result g
+ MOVQ 72(DI), R11
+ XORQ R9, R11
+ MOVQ 80(DI), R12
+ ROLQ $0x14, R11
+ XORQ BX, R12
+ ROLQ $0x03, R12
+ MOVQ 24(DI), R10
+ MOVQ R11, AX
+ ORQ R12, AX
+ XORQ R8, R10
+ MOVQ 128(DI), R13
+ MOVQ 176(DI), R14
+ ROLQ $0x1c, R10
+ XORQ R10, AX
+ MOVQ AX, 40(SP)
+ XORQ AX, SI
+ XORQ CX, R13
+ ROLQ $0x2d, R13
+ MOVQ R12, AX
+ ANDQ R13, AX
+ XORQ R11, AX
+ MOVQ AX, 48(SP)
+ XORQ AX, BP
+ XORQ DX, R14
+ ROLQ $0x3d, R14
+ MOVQ R14, AX
+ ORQ R10, AX
+ XORQ R13, AX
+ MOVQ AX, 64(SP)
+ ANDQ R11, R10
+ XORQ R14, R10
+ MOVQ R10, 72(SP)
+ NOTQ R14
+ XORQ R10, R15
+ ORQ R14, R13
+ XORQ R12, R13
+ MOVQ R13, 56(SP)
+
+ // Result k
+ MOVQ 8(DI), R10
+ MOVQ 56(DI), R11
+ MOVQ 104(DI), R12
+ MOVQ 152(DI), R13
+ MOVQ 160(DI), R14
+ XORQ DX, R11
+ ROLQ $0x06, R11
+ XORQ R8, R12
+ ROLQ $0x19, R12
+ MOVQ R11, AX
+ ORQ R12, AX
+ XORQ CX, R10
+ ROLQ $0x01, R10
+ XORQ R10, AX
+ MOVQ AX, 80(SP)
+ XORQ AX, SI
+ XORQ R9, R13
+ ROLQ $0x08, R13
+ MOVQ R12, AX
+ ANDQ R13, AX
+ XORQ R11, AX
+ MOVQ AX, 88(SP)
+ XORQ AX, BP
+ XORQ BX, R14
+ ROLQ $0x12, R14
+ NOTQ R13
+ MOVQ R13, AX
+ ANDQ R14, AX
+ XORQ R12, AX
+ MOVQ AX, 96(SP)
+ MOVQ R14, AX
+ ORQ R10, AX
+ XORQ R13, AX
+ MOVQ AX, 104(SP)
+ ANDQ R11, R10
+ XORQ R14, R10
+ MOVQ R10, 112(SP)
+ XORQ R10, R15
+
+ // Result m
+ MOVQ 40(DI), R11
+ XORQ BX, R11
+ MOVQ 88(DI), R12
+ ROLQ $0x24, R11
+ XORQ CX, R12
+ MOVQ 32(DI), R10
+ ROLQ $0x0a, R12
+ MOVQ R11, AX
+ MOVQ 136(DI), R13
+ ANDQ R12, AX
+ XORQ R9, R10
+ MOVQ 184(DI), R14
+ ROLQ $0x1b, R10
+ XORQ R10, AX
+ MOVQ AX, 120(SP)
+ XORQ AX, SI
+ XORQ DX, R13
+ ROLQ $0x0f, R13
+ MOVQ R12, AX
+ ORQ R13, AX
+ XORQ R11, AX
+ MOVQ AX, 128(SP)
+ XORQ AX, BP
+ XORQ R8, R14
+ ROLQ $0x38, R14
+ NOTQ R13
+ MOVQ R13, AX
+ ORQ R14, AX
+ XORQ R12, AX
+ MOVQ AX, 136(SP)
+ ORQ R10, R11
+ XORQ R14, R11
+ MOVQ R11, 152(SP)
+ ANDQ R10, R14
+ XORQ R13, R14
+ MOVQ R14, 144(SP)
+ XORQ R11, R15
+
+ // Result s
+ MOVQ 16(DI), R10
+ MOVQ 64(DI), R11
+ MOVQ 112(DI), R12
+ XORQ DX, R10
+ MOVQ 120(DI), R13
+ ROLQ $0x3e, R10
+ XORQ R8, R11
+ MOVQ 168(DI), R14
+ ROLQ $0x37, R11
+ XORQ R9, R12
+ MOVQ R10, R9
+ XORQ CX, R14
+ ROLQ $0x02, R14
+ ANDQ R11, R9
+ XORQ R14, R9
+ MOVQ R9, 192(SP)
+ ROLQ $0x27, R12
+ XORQ R9, R15
+ NOTQ R11
+ XORQ BX, R13
+ MOVQ R11, BX
+ ANDQ R12, BX
+ XORQ R10, BX
+ MOVQ BX, 160(SP)
+ XORQ BX, SI
+ ROLQ $0x29, R13
+ MOVQ R12, CX
+ ORQ R13, CX
+ XORQ R11, CX
+ MOVQ CX, 168(SP)
+ XORQ CX, BP
+ MOVQ R13, DX
+ MOVQ R14, R8
+ ANDQ R14, DX
+ ORQ R10, R8
+ XORQ R12, DX
+ XORQ R13, R8
+ MOVQ DX, 176(SP)
+ MOVQ R8, 184(SP)
+
+ // Prepare round
+ MOVQ BP, BX
+ ROLQ $0x01, BX
+ MOVQ 16(SP), R12
+ XORQ 56(SP), DX
+ XORQ R15, BX
+ XORQ 96(SP), R12
+ XORQ 136(SP), DX
+ XORQ DX, R12
+ MOVQ R12, CX
+ ROLQ $0x01, CX
+ MOVQ 24(SP), R13
+ XORQ 64(SP), R8
+ XORQ SI, CX
+ XORQ 104(SP), R13
+ XORQ 144(SP), R8
+ XORQ R8, R13
+ MOVQ R13, DX
+ ROLQ $0x01, DX
+ MOVQ R15, R8
+ XORQ BP, DX
+ ROLQ $0x01, R8
+ MOVQ SI, R9
+ XORQ R12, R8
+ ROLQ $0x01, R9
+
+ // Result b
+ MOVQ (SP), R10
+ MOVQ 48(SP), R11
+ XORQ R13, R9
+ MOVQ 96(SP), R12
+ MOVQ 144(SP), R13
+ MOVQ 192(SP), R14
+ XORQ CX, R11
+ ROLQ $0x2c, R11
+ XORQ DX, R12
+ XORQ BX, R10
+ ROLQ $0x2b, R12
+ MOVQ R11, SI
+ MOVQ $0x8000000000008003, AX
+ ORQ R12, SI
+ XORQ R10, AX
+ XORQ AX, SI
+ MOVQ SI, (DI)
+ XORQ R9, R14
+ ROLQ $0x0e, R14
+ MOVQ R10, R15
+ ANDQ R11, R15
+ XORQ R14, R15
+ MOVQ R15, 32(DI)
+ XORQ R8, R13
+ ROLQ $0x15, R13
+ MOVQ R13, AX
+ ANDQ R14, AX
+ XORQ R12, AX
+ MOVQ AX, 16(DI)
+ NOTQ R12
+ ORQ R10, R14
+ ORQ R13, R12
+ XORQ R13, R14
+ XORQ R11, R12
+ MOVQ R14, 24(DI)
+ MOVQ R12, 8(DI)
+ MOVQ R12, BP
+
+ // Result g
+ MOVQ 72(SP), R11
+ XORQ R9, R11
+ MOVQ 80(SP), R12
+ ROLQ $0x14, R11
+ XORQ BX, R12
+ ROLQ $0x03, R12
+ MOVQ 24(SP), R10
+ MOVQ R11, AX
+ ORQ R12, AX
+ XORQ R8, R10
+ MOVQ 128(SP), R13
+ MOVQ 176(SP), R14
+ ROLQ $0x1c, R10
+ XORQ R10, AX
+ MOVQ AX, 40(DI)
+ XORQ AX, SI
+ XORQ CX, R13
+ ROLQ $0x2d, R13
+ MOVQ R12, AX
+ ANDQ R13, AX
+ XORQ R11, AX
+ MOVQ AX, 48(DI)
+ XORQ AX, BP
+ XORQ DX, R14
+ ROLQ $0x3d, R14
+ MOVQ R14, AX
+ ORQ R10, AX
+ XORQ R13, AX
+ MOVQ AX, 64(DI)
+ ANDQ R11, R10
+ XORQ R14, R10
+ MOVQ R10, 72(DI)
+ NOTQ R14
+ XORQ R10, R15
+ ORQ R14, R13
+ XORQ R12, R13
+ MOVQ R13, 56(DI)
+
+ // Result k
+ MOVQ 8(SP), R10
+ MOVQ 56(SP), R11
+ MOVQ 104(SP), R12
+ MOVQ 152(SP), R13
+ MOVQ 160(SP), R14
+ XORQ DX, R11
+ ROLQ $0x06, R11
+ XORQ R8, R12
+ ROLQ $0x19, R12
+ MOVQ R11, AX
+ ORQ R12, AX
+ XORQ CX, R10
+ ROLQ $0x01, R10
+ XORQ R10, AX
+ MOVQ AX, 80(DI)
+ XORQ AX, SI
+ XORQ R9, R13
+ ROLQ $0x08, R13
+ MOVQ R12, AX
+ ANDQ R13, AX
+ XORQ R11, AX
+ MOVQ AX, 88(DI)
+ XORQ AX, BP
+ XORQ BX, R14
+ ROLQ $0x12, R14
+ NOTQ R13
+ MOVQ R13, AX
+ ANDQ R14, AX
+ XORQ R12, AX
+ MOVQ AX, 96(DI)
+ MOVQ R14, AX
+ ORQ R10, AX
+ XORQ R13, AX
+ MOVQ AX, 104(DI)
+ ANDQ R11, R10
+ XORQ R14, R10
+ MOVQ R10, 112(DI)
+ XORQ R10, R15
+
+ // Result m
+ MOVQ 40(SP), R11
+ XORQ BX, R11
+ MOVQ 88(SP), R12
+ ROLQ $0x24, R11
+ XORQ CX, R12
+ MOVQ 32(SP), R10
+ ROLQ $0x0a, R12
+ MOVQ R11, AX
+ MOVQ 136(SP), R13
+ ANDQ R12, AX
+ XORQ R9, R10
+ MOVQ 184(SP), R14
+ ROLQ $0x1b, R10
+ XORQ R10, AX
+ MOVQ AX, 120(DI)
+ XORQ AX, SI
+ XORQ DX, R13
+ ROLQ $0x0f, R13
+ MOVQ R12, AX
+ ORQ R13, AX
+ XORQ R11, AX
+ MOVQ AX, 128(DI)
+ XORQ AX, BP
+ XORQ R8, R14
+ ROLQ $0x38, R14
+ NOTQ R13
+ MOVQ R13, AX
+ ORQ R14, AX
+ XORQ R12, AX
+ MOVQ AX, 136(DI)
+ ORQ R10, R11
+ XORQ R14, R11
+ MOVQ R11, 152(DI)
+ ANDQ R10, R14
+ XORQ R13, R14
+ MOVQ R14, 144(DI)
+ XORQ R11, R15
+
+ // Result s
+ MOVQ 16(SP), R10
+ MOVQ 64(SP), R11
+ MOVQ 112(SP), R12
+ XORQ DX, R10
+ MOVQ 120(SP), R13
+ ROLQ $0x3e, R10
+ XORQ R8, R11
+ MOVQ 168(SP), R14
+ ROLQ $0x37, R11
+ XORQ R9, R12
+ MOVQ R10, R9
+ XORQ CX, R14
+ ROLQ $0x02, R14
+ ANDQ R11, R9
+ XORQ R14, R9
+ MOVQ R9, 192(DI)
+ ROLQ $0x27, R12
+ XORQ R9, R15
+ NOTQ R11
+ XORQ BX, R13
+ MOVQ R11, BX
+ ANDQ R12, BX
+ XORQ R10, BX
+ MOVQ BX, 160(DI)
+ XORQ BX, SI
+ ROLQ $0x29, R13
+ MOVQ R12, CX
+ ORQ R13, CX
+ XORQ R11, CX
+ MOVQ CX, 168(DI)
+ XORQ CX, BP
+ MOVQ R13, DX
+ MOVQ R14, R8
+ ANDQ R14, DX
+ ORQ R10, R8
+ XORQ R12, DX
+ XORQ R13, R8
+ MOVQ DX, 176(DI)
+ MOVQ R8, 184(DI)
+
+ // Prepare round
+ MOVQ BP, BX
+ ROLQ $0x01, BX
+ MOVQ 16(DI), R12
+ XORQ 56(DI), DX
+ XORQ R15, BX
+ XORQ 96(DI), R12
+ XORQ 136(DI), DX
+ XORQ DX, R12
+ MOVQ R12, CX
+ ROLQ $0x01, CX
+ MOVQ 24(DI), R13
+ XORQ 64(DI), R8
+ XORQ SI, CX
+ XORQ 104(DI), R13
+ XORQ 144(DI), R8
+ XORQ R8, R13
+ MOVQ R13, DX
+ ROLQ $0x01, DX
+ MOVQ R15, R8
+ XORQ BP, DX
+ ROLQ $0x01, R8
+ MOVQ SI, R9
+ XORQ R12, R8
+ ROLQ $0x01, R9
+
+ // Result b
+ MOVQ (DI), R10
+ MOVQ 48(DI), R11
+ XORQ R13, R9
+ MOVQ 96(DI), R12
+ MOVQ 144(DI), R13
+ MOVQ 192(DI), R14
+ XORQ CX, R11
+ ROLQ $0x2c, R11
+ XORQ DX, R12
+ XORQ BX, R10
+ ROLQ $0x2b, R12
+ MOVQ R11, SI
+ MOVQ $0x8000000000008002, AX
+ ORQ R12, SI
+ XORQ R10, AX
+ XORQ AX, SI
+ MOVQ SI, (SP)
+ XORQ R9, R14
+ ROLQ $0x0e, R14
+ MOVQ R10, R15
+ ANDQ R11, R15
+ XORQ R14, R15
+ MOVQ R15, 32(SP)
+ XORQ R8, R13
+ ROLQ $0x15, R13
+ MOVQ R13, AX
+ ANDQ R14, AX
+ XORQ R12, AX
+ MOVQ AX, 16(SP)
+ NOTQ R12
+ ORQ R10, R14
+ ORQ R13, R12
+ XORQ R13, R14
+ XORQ R11, R12
+ MOVQ R14, 24(SP)
+ MOVQ R12, 8(SP)
+ MOVQ R12, BP
+
+ // Result g
+ MOVQ 72(DI), R11
+ XORQ R9, R11
+ MOVQ 80(DI), R12
+ ROLQ $0x14, R11
+ XORQ BX, R12
+ ROLQ $0x03, R12
+ MOVQ 24(DI), R10
+ MOVQ R11, AX
+ ORQ R12, AX
+ XORQ R8, R10
+ MOVQ 128(DI), R13
+ MOVQ 176(DI), R14
+ ROLQ $0x1c, R10
+ XORQ R10, AX
+ MOVQ AX, 40(SP)
+ XORQ AX, SI
+ XORQ CX, R13
+ ROLQ $0x2d, R13
+ MOVQ R12, AX
+ ANDQ R13, AX
+ XORQ R11, AX
+ MOVQ AX, 48(SP)
+ XORQ AX, BP
+ XORQ DX, R14
+ ROLQ $0x3d, R14
+ MOVQ R14, AX
+ ORQ R10, AX
+ XORQ R13, AX
+ MOVQ AX, 64(SP)
+ ANDQ R11, R10
+ XORQ R14, R10
+ MOVQ R10, 72(SP)
+ NOTQ R14
+ XORQ R10, R15
+ ORQ R14, R13
+ XORQ R12, R13
+ MOVQ R13, 56(SP)
+
+ // Result k
+ MOVQ 8(DI), R10
+ MOVQ 56(DI), R11
+ MOVQ 104(DI), R12
+ MOVQ 152(DI), R13
+ MOVQ 160(DI), R14
+ XORQ DX, R11
+ ROLQ $0x06, R11
+ XORQ R8, R12
+ ROLQ $0x19, R12
+ MOVQ R11, AX
+ ORQ R12, AX
+ XORQ CX, R10
+ ROLQ $0x01, R10
+ XORQ R10, AX
+ MOVQ AX, 80(SP)
+ XORQ AX, SI
+ XORQ R9, R13
+ ROLQ $0x08, R13
+ MOVQ R12, AX
+ ANDQ R13, AX
+ XORQ R11, AX
+ MOVQ AX, 88(SP)
+ XORQ AX, BP
+ XORQ BX, R14
+ ROLQ $0x12, R14
+ NOTQ R13
+ MOVQ R13, AX
+ ANDQ R14, AX
+ XORQ R12, AX
+ MOVQ AX, 96(SP)
+ MOVQ R14, AX
+ ORQ R10, AX
+ XORQ R13, AX
+ MOVQ AX, 104(SP)
+ ANDQ R11, R10
+ XORQ R14, R10
+ MOVQ R10, 112(SP)
+ XORQ R10, R15
+
+ // Result m
+ MOVQ 40(DI), R11
+ XORQ BX, R11
+ MOVQ 88(DI), R12
+ ROLQ $0x24, R11
+ XORQ CX, R12
+ MOVQ 32(DI), R10
+ ROLQ $0x0a, R12
+ MOVQ R11, AX
+ MOVQ 136(DI), R13
+ ANDQ R12, AX
+ XORQ R9, R10
+ MOVQ 184(DI), R14
+ ROLQ $0x1b, R10
+ XORQ R10, AX
+ MOVQ AX, 120(SP)
+ XORQ AX, SI
+ XORQ DX, R13
+ ROLQ $0x0f, R13
+ MOVQ R12, AX
+ ORQ R13, AX
+ XORQ R11, AX
+ MOVQ AX, 128(SP)
+ XORQ AX, BP
+ XORQ R8, R14
+ ROLQ $0x38, R14
+ NOTQ R13
+ MOVQ R13, AX
+ ORQ R14, AX
+ XORQ R12, AX
+ MOVQ AX, 136(SP)
+ ORQ R10, R11
+ XORQ R14, R11
+ MOVQ R11, 152(SP)
+ ANDQ R10, R14
+ XORQ R13, R14
+ MOVQ R14, 144(SP)
+ XORQ R11, R15
+
+ // Result s
+ MOVQ 16(DI), R10
+ MOVQ 64(DI), R11
+ MOVQ 112(DI), R12
+ XORQ DX, R10
+ MOVQ 120(DI), R13
+ ROLQ $0x3e, R10
+ XORQ R8, R11
+ MOVQ 168(DI), R14
+ ROLQ $0x37, R11
+ XORQ R9, R12
+ MOVQ R10, R9
+ XORQ CX, R14
+ ROLQ $0x02, R14
+ ANDQ R11, R9
+ XORQ R14, R9
+ MOVQ R9, 192(SP)
+ ROLQ $0x27, R12
+ XORQ R9, R15
+ NOTQ R11
+ XORQ BX, R13
+ MOVQ R11, BX
+ ANDQ R12, BX
+ XORQ R10, BX
+ MOVQ BX, 160(SP)
+ XORQ BX, SI
+ ROLQ $0x29, R13
+ MOVQ R12, CX
+ ORQ R13, CX
+ XORQ R11, CX
+ MOVQ CX, 168(SP)
+ XORQ CX, BP
+ MOVQ R13, DX
+ MOVQ R14, R8
+ ANDQ R14, DX
+ ORQ R10, R8
+ XORQ R12, DX
+ XORQ R13, R8
+ MOVQ DX, 176(SP)
+ MOVQ R8, 184(SP)
+
+ // Prepare round
+ MOVQ BP, BX
+ ROLQ $0x01, BX
+ MOVQ 16(SP), R12
+ XORQ 56(SP), DX
+ XORQ R15, BX
+ XORQ 96(SP), R12
+ XORQ 136(SP), DX
+ XORQ DX, R12
+ MOVQ R12, CX
+ ROLQ $0x01, CX
+ MOVQ 24(SP), R13
+ XORQ 64(SP), R8
+ XORQ SI, CX
+ XORQ 104(SP), R13
+ XORQ 144(SP), R8
+ XORQ R8, R13
+ MOVQ R13, DX
+ ROLQ $0x01, DX
+ MOVQ R15, R8
+ XORQ BP, DX
+ ROLQ $0x01, R8
+ MOVQ SI, R9
+ XORQ R12, R8
+ ROLQ $0x01, R9
+
+ // Result b
+ MOVQ (SP), R10
+ MOVQ 48(SP), R11
+ XORQ R13, R9
+ MOVQ 96(SP), R12
+ MOVQ 144(SP), R13
+ MOVQ 192(SP), R14
+ XORQ CX, R11
+ ROLQ $0x2c, R11
+ XORQ DX, R12
+ XORQ BX, R10
+ ROLQ $0x2b, R12
+ MOVQ R11, SI
+ MOVQ $0x8000000000000080, AX
+ ORQ R12, SI
+ XORQ R10, AX
+ XORQ AX, SI
+ MOVQ SI, (DI)
+ XORQ R9, R14
+ ROLQ $0x0e, R14
+ MOVQ R10, R15
+ ANDQ R11, R15
+ XORQ R14, R15
+ MOVQ R15, 32(DI)
+ XORQ R8, R13
+ ROLQ $0x15, R13
+ MOVQ R13, AX
+ ANDQ R14, AX
+ XORQ R12, AX
+ MOVQ AX, 16(DI)
+ NOTQ R12
+ ORQ R10, R14
+ ORQ R13, R12
+ XORQ R13, R14
+ XORQ R11, R12
+ MOVQ R14, 24(DI)
+ MOVQ R12, 8(DI)
+ MOVQ R12, BP
+
+ // Result g
+ MOVQ 72(SP), R11
+ XORQ R9, R11
+ MOVQ 80(SP), R12
+ ROLQ $0x14, R11
+ XORQ BX, R12
+ ROLQ $0x03, R12
+ MOVQ 24(SP), R10
+ MOVQ R11, AX
+ ORQ R12, AX
+ XORQ R8, R10
+ MOVQ 128(SP), R13
+ MOVQ 176(SP), R14
+ ROLQ $0x1c, R10
+ XORQ R10, AX
+ MOVQ AX, 40(DI)
+ XORQ AX, SI
+ XORQ CX, R13
+ ROLQ $0x2d, R13
+ MOVQ R12, AX
+ ANDQ R13, AX
+ XORQ R11, AX
+ MOVQ AX, 48(DI)
+ XORQ AX, BP
+ XORQ DX, R14
+ ROLQ $0x3d, R14
+ MOVQ R14, AX
+ ORQ R10, AX
+ XORQ R13, AX
+ MOVQ AX, 64(DI)
+ ANDQ R11, R10
+ XORQ R14, R10
+ MOVQ R10, 72(DI)
+ NOTQ R14
+ XORQ R10, R15
+ ORQ R14, R13
+ XORQ R12, R13
+ MOVQ R13, 56(DI)
+
+ // Result k
+ MOVQ 8(SP), R10
+ MOVQ 56(SP), R11
+ MOVQ 104(SP), R12
+ MOVQ 152(SP), R13
+ MOVQ 160(SP), R14
+ XORQ DX, R11
+ ROLQ $0x06, R11
+ XORQ R8, R12
+ ROLQ $0x19, R12
+ MOVQ R11, AX
+ ORQ R12, AX
+ XORQ CX, R10
+ ROLQ $0x01, R10
+ XORQ R10, AX
+ MOVQ AX, 80(DI)
+ XORQ AX, SI
+ XORQ R9, R13
+ ROLQ $0x08, R13
+ MOVQ R12, AX
+ ANDQ R13, AX
+ XORQ R11, AX
+ MOVQ AX, 88(DI)
+ XORQ AX, BP
+ XORQ BX, R14
+ ROLQ $0x12, R14
+ NOTQ R13
+ MOVQ R13, AX
+ ANDQ R14, AX
+ XORQ R12, AX
+ MOVQ AX, 96(DI)
+ MOVQ R14, AX
+ ORQ R10, AX
+ XORQ R13, AX
+ MOVQ AX, 104(DI)
+ ANDQ R11, R10
+ XORQ R14, R10
+ MOVQ R10, 112(DI)
+ XORQ R10, R15
+
+ // Result m
+ MOVQ 40(SP), R11
+ XORQ BX, R11
+ MOVQ 88(SP), R12
+ ROLQ $0x24, R11
+ XORQ CX, R12
+ MOVQ 32(SP), R10
+ ROLQ $0x0a, R12
+ MOVQ R11, AX
+ MOVQ 136(SP), R13
+ ANDQ R12, AX
+ XORQ R9, R10
+ MOVQ 184(SP), R14
+ ROLQ $0x1b, R10
+ XORQ R10, AX
+ MOVQ AX, 120(DI)
+ XORQ AX, SI
+ XORQ DX, R13
+ ROLQ $0x0f, R13
+ MOVQ R12, AX
+ ORQ R13, AX
+ XORQ R11, AX
+ MOVQ AX, 128(DI)
+ XORQ AX, BP
+ XORQ R8, R14
+ ROLQ $0x38, R14
+ NOTQ R13
+ MOVQ R13, AX
+ ORQ R14, AX
+ XORQ R12, AX
+ MOVQ AX, 136(DI)
+ ORQ R10, R11
+ XORQ R14, R11
+ MOVQ R11, 152(DI)
+ ANDQ R10, R14
+ XORQ R13, R14
+ MOVQ R14, 144(DI)
+ XORQ R11, R15
+
+ // Result s
+ MOVQ 16(SP), R10
+ MOVQ 64(SP), R11
+ MOVQ 112(SP), R12
+ XORQ DX, R10
+ MOVQ 120(SP), R13
+ ROLQ $0x3e, R10
+ XORQ R8, R11
+ MOVQ 168(SP), R14
+ ROLQ $0x37, R11
+ XORQ R9, R12
+ MOVQ R10, R9
+ XORQ CX, R14
+ ROLQ $0x02, R14
+ ANDQ R11, R9
+ XORQ R14, R9
+ MOVQ R9, 192(DI)
+ ROLQ $0x27, R12
+ XORQ R9, R15
+ NOTQ R11
+ XORQ BX, R13
+ MOVQ R11, BX
+ ANDQ R12, BX
+ XORQ R10, BX
+ MOVQ BX, 160(DI)
+ XORQ BX, SI
+ ROLQ $0x29, R13
+ MOVQ R12, CX
+ ORQ R13, CX
+ XORQ R11, CX
+ MOVQ CX, 168(DI)
+ XORQ CX, BP
+ MOVQ R13, DX
+ MOVQ R14, R8
+ ANDQ R14, DX
+ ORQ R10, R8
+ XORQ R12, DX
+ XORQ R13, R8
+ MOVQ DX, 176(DI)
+ MOVQ R8, 184(DI)
+
+ // Prepare round
+ MOVQ BP, BX
+ ROLQ $0x01, BX
+ MOVQ 16(DI), R12
+ XORQ 56(DI), DX
+ XORQ R15, BX
+ XORQ 96(DI), R12
+ XORQ 136(DI), DX
+ XORQ DX, R12
+ MOVQ R12, CX
+ ROLQ $0x01, CX
+ MOVQ 24(DI), R13
+ XORQ 64(DI), R8
+ XORQ SI, CX
+ XORQ 104(DI), R13
+ XORQ 144(DI), R8
+ XORQ R8, R13
+ MOVQ R13, DX
+ ROLQ $0x01, DX
+ MOVQ R15, R8
+ XORQ BP, DX
+ ROLQ $0x01, R8
+ MOVQ SI, R9
+ XORQ R12, R8
+ ROLQ $0x01, R9
+
+ // Result b
+ MOVQ (DI), R10
+ MOVQ 48(DI), R11
+ XORQ R13, R9
+ MOVQ 96(DI), R12
+ MOVQ 144(DI), R13
+ MOVQ 192(DI), R14
+ XORQ CX, R11
+ ROLQ $0x2c, R11
+ XORQ DX, R12
+ XORQ BX, R10
+ ROLQ $0x2b, R12
+ MOVQ R11, SI
+ MOVQ $0x000000000000800a, AX
+ ORQ R12, SI
+ XORQ R10, AX
+ XORQ AX, SI
+ MOVQ SI, (SP)
+ XORQ R9, R14
+ ROLQ $0x0e, R14
+ MOVQ R10, R15
+ ANDQ R11, R15
+ XORQ R14, R15
+ MOVQ R15, 32(SP)
+ XORQ R8, R13
+ ROLQ $0x15, R13
+ MOVQ R13, AX
+ ANDQ R14, AX
+ XORQ R12, AX
+ MOVQ AX, 16(SP)
+ NOTQ R12
+ ORQ R10, R14
+ ORQ R13, R12
+ XORQ R13, R14
+ XORQ R11, R12
+ MOVQ R14, 24(SP)
+ MOVQ R12, 8(SP)
+ MOVQ R12, BP
+
+ // Result g
+ MOVQ 72(DI), R11
+ XORQ R9, R11
+ MOVQ 80(DI), R12
+ ROLQ $0x14, R11
+ XORQ BX, R12
+ ROLQ $0x03, R12
+ MOVQ 24(DI), R10
+ MOVQ R11, AX
+ ORQ R12, AX
+ XORQ R8, R10
+ MOVQ 128(DI), R13
+ MOVQ 176(DI), R14
+ ROLQ $0x1c, R10
+ XORQ R10, AX
+ MOVQ AX, 40(SP)
+ XORQ AX, SI
+ XORQ CX, R13
+ ROLQ $0x2d, R13
+ MOVQ R12, AX
+ ANDQ R13, AX
+ XORQ R11, AX
+ MOVQ AX, 48(SP)
+ XORQ AX, BP
+ XORQ DX, R14
+ ROLQ $0x3d, R14
+ MOVQ R14, AX
+ ORQ R10, AX
+ XORQ R13, AX
+ MOVQ AX, 64(SP)
+ ANDQ R11, R10
+ XORQ R14, R10
+ MOVQ R10, 72(SP)
+ NOTQ R14
+ XORQ R10, R15
+ ORQ R14, R13
+ XORQ R12, R13
+ MOVQ R13, 56(SP)
+
+ // Result k
+ MOVQ 8(DI), R10
+ MOVQ 56(DI), R11
+ MOVQ 104(DI), R12
+ MOVQ 152(DI), R13
+ MOVQ 160(DI), R14
+ XORQ DX, R11
+ ROLQ $0x06, R11
+ XORQ R8, R12
+ ROLQ $0x19, R12
+ MOVQ R11, AX
+ ORQ R12, AX
+ XORQ CX, R10
+ ROLQ $0x01, R10
+ XORQ R10, AX
+ MOVQ AX, 80(SP)
+ XORQ AX, SI
+ XORQ R9, R13
+ ROLQ $0x08, R13
+ MOVQ R12, AX
+ ANDQ R13, AX
+ XORQ R11, AX
+ MOVQ AX, 88(SP)
+ XORQ AX, BP
+ XORQ BX, R14
+ ROLQ $0x12, R14
+ NOTQ R13
+ MOVQ R13, AX
+ ANDQ R14, AX
+ XORQ R12, AX
+ MOVQ AX, 96(SP)
+ MOVQ R14, AX
+ ORQ R10, AX
+ XORQ R13, AX
+ MOVQ AX, 104(SP)
+ ANDQ R11, R10
+ XORQ R14, R10
+ MOVQ R10, 112(SP)
+ XORQ R10, R15
+
+ // Result m
+ MOVQ 40(DI), R11
+ XORQ BX, R11
+ MOVQ 88(DI), R12
+ ROLQ $0x24, R11
+ XORQ CX, R12
+ MOVQ 32(DI), R10
+ ROLQ $0x0a, R12
+ MOVQ R11, AX
+ MOVQ 136(DI), R13
+ ANDQ R12, AX
+ XORQ R9, R10
+ MOVQ 184(DI), R14
+ ROLQ $0x1b, R10
+ XORQ R10, AX
+ MOVQ AX, 120(SP)
+ XORQ AX, SI
+ XORQ DX, R13
+ ROLQ $0x0f, R13
+ MOVQ R12, AX
+ ORQ R13, AX
+ XORQ R11, AX
+ MOVQ AX, 128(SP)
+ XORQ AX, BP
+ XORQ R8, R14
+ ROLQ $0x38, R14
+ NOTQ R13
+ MOVQ R13, AX
+ ORQ R14, AX
+ XORQ R12, AX
+ MOVQ AX, 136(SP)
+ ORQ R10, R11
+ XORQ R14, R11
+ MOVQ R11, 152(SP)
+ ANDQ R10, R14
+ XORQ R13, R14
+ MOVQ R14, 144(SP)
+ XORQ R11, R15
+
+ // Result s
+ MOVQ 16(DI), R10
+ MOVQ 64(DI), R11
+ MOVQ 112(DI), R12
+ XORQ DX, R10
+ MOVQ 120(DI), R13
+ ROLQ $0x3e, R10
+ XORQ R8, R11
+ MOVQ 168(DI), R14
+ ROLQ $0x37, R11
+ XORQ R9, R12
+ MOVQ R10, R9
+ XORQ CX, R14
+ ROLQ $0x02, R14
+ ANDQ R11, R9
+ XORQ R14, R9
+ MOVQ R9, 192(SP)
+ ROLQ $0x27, R12
+ XORQ R9, R15
+ NOTQ R11
+ XORQ BX, R13
+ MOVQ R11, BX
+ ANDQ R12, BX
+ XORQ R10, BX
+ MOVQ BX, 160(SP)
+ XORQ BX, SI
+ ROLQ $0x29, R13
+ MOVQ R12, CX
+ ORQ R13, CX
+ XORQ R11, CX
+ MOVQ CX, 168(SP)
+ XORQ CX, BP
+ MOVQ R13, DX
+ MOVQ R14, R8
+ ANDQ R14, DX
+ ORQ R10, R8
+ XORQ R12, DX
+ XORQ R13, R8
+ MOVQ DX, 176(SP)
+ MOVQ R8, 184(SP)
+
+ // Prepare round
+ MOVQ BP, BX
+ ROLQ $0x01, BX
+ MOVQ 16(SP), R12
+ XORQ 56(SP), DX
+ XORQ R15, BX
+ XORQ 96(SP), R12
+ XORQ 136(SP), DX
+ XORQ DX, R12
+ MOVQ R12, CX
+ ROLQ $0x01, CX
+ MOVQ 24(SP), R13
+ XORQ 64(SP), R8
+ XORQ SI, CX
+ XORQ 104(SP), R13
+ XORQ 144(SP), R8
+ XORQ R8, R13
+ MOVQ R13, DX
+ ROLQ $0x01, DX
+ MOVQ R15, R8
+ XORQ BP, DX
+ ROLQ $0x01, R8
+ MOVQ SI, R9
+ XORQ R12, R8
+ ROLQ $0x01, R9
+
+ // Result b
+ MOVQ (SP), R10
+ MOVQ 48(SP), R11
+ XORQ R13, R9
+ MOVQ 96(SP), R12
+ MOVQ 144(SP), R13
+ MOVQ 192(SP), R14
+ XORQ CX, R11
+ ROLQ $0x2c, R11
+ XORQ DX, R12
+ XORQ BX, R10
+ ROLQ $0x2b, R12
+ MOVQ R11, SI
+ MOVQ $0x800000008000000a, AX
+ ORQ R12, SI
+ XORQ R10, AX
+ XORQ AX, SI
+ MOVQ SI, (DI)
+ XORQ R9, R14
+ ROLQ $0x0e, R14
+ MOVQ R10, R15
+ ANDQ R11, R15
+ XORQ R14, R15
+ MOVQ R15, 32(DI)
+ XORQ R8, R13
+ ROLQ $0x15, R13
+ MOVQ R13, AX
+ ANDQ R14, AX
+ XORQ R12, AX
+ MOVQ AX, 16(DI)
+ NOTQ R12
+ ORQ R10, R14
+ ORQ R13, R12
+ XORQ R13, R14
+ XORQ R11, R12
+ MOVQ R14, 24(DI)
+ MOVQ R12, 8(DI)
+ MOVQ R12, BP
+
+ // Result g
+ MOVQ 72(SP), R11
+ XORQ R9, R11
+ MOVQ 80(SP), R12
+ ROLQ $0x14, R11
+ XORQ BX, R12
+ ROLQ $0x03, R12
+ MOVQ 24(SP), R10
+ MOVQ R11, AX
+ ORQ R12, AX
+ XORQ R8, R10
+ MOVQ 128(SP), R13
+ MOVQ 176(SP), R14
+ ROLQ $0x1c, R10
+ XORQ R10, AX
+ MOVQ AX, 40(DI)
+ XORQ AX, SI
+ XORQ CX, R13
+ ROLQ $0x2d, R13
+ MOVQ R12, AX
+ ANDQ R13, AX
+ XORQ R11, AX
+ MOVQ AX, 48(DI)
+ XORQ AX, BP
+ XORQ DX, R14
+ ROLQ $0x3d, R14
+ MOVQ R14, AX
+ ORQ R10, AX
+ XORQ R13, AX
+ MOVQ AX, 64(DI)
+ ANDQ R11, R10
+ XORQ R14, R10
+ MOVQ R10, 72(DI)
+ NOTQ R14
+ XORQ R10, R15
+ ORQ R14, R13
+ XORQ R12, R13
+ MOVQ R13, 56(DI)
+
+ // Result k
+ MOVQ 8(SP), R10
+ MOVQ 56(SP), R11
+ MOVQ 104(SP), R12
+ MOVQ 152(SP), R13
+ MOVQ 160(SP), R14
+ XORQ DX, R11
+ ROLQ $0x06, R11
+ XORQ R8, R12
+ ROLQ $0x19, R12
+ MOVQ R11, AX
+ ORQ R12, AX
+ XORQ CX, R10
+ ROLQ $0x01, R10
+ XORQ R10, AX
+ MOVQ AX, 80(DI)
+ XORQ AX, SI
+ XORQ R9, R13
+ ROLQ $0x08, R13
+ MOVQ R12, AX
+ ANDQ R13, AX
+ XORQ R11, AX
+ MOVQ AX, 88(DI)
+ XORQ AX, BP
+ XORQ BX, R14
+ ROLQ $0x12, R14
+ NOTQ R13
+ MOVQ R13, AX
+ ANDQ R14, AX
+ XORQ R12, AX
+ MOVQ AX, 96(DI)
+ MOVQ R14, AX
+ ORQ R10, AX
+ XORQ R13, AX
+ MOVQ AX, 104(DI)
+ ANDQ R11, R10
+ XORQ R14, R10
+ MOVQ R10, 112(DI)
+ XORQ R10, R15
+
+ // Result m
+ MOVQ 40(SP), R11
+ XORQ BX, R11
+ MOVQ 88(SP), R12
+ ROLQ $0x24, R11
+ XORQ CX, R12
+ MOVQ 32(SP), R10
+ ROLQ $0x0a, R12
+ MOVQ R11, AX
+ MOVQ 136(SP), R13
+ ANDQ R12, AX
+ XORQ R9, R10
+ MOVQ 184(SP), R14
+ ROLQ $0x1b, R10
+ XORQ R10, AX
+ MOVQ AX, 120(DI)
+ XORQ AX, SI
+ XORQ DX, R13
+ ROLQ $0x0f, R13
+ MOVQ R12, AX
+ ORQ R13, AX
+ XORQ R11, AX
+ MOVQ AX, 128(DI)
+ XORQ AX, BP
+ XORQ R8, R14
+ ROLQ $0x38, R14
+ NOTQ R13
+ MOVQ R13, AX
+ ORQ R14, AX
+ XORQ R12, AX
+ MOVQ AX, 136(DI)
+ ORQ R10, R11
+ XORQ R14, R11
+ MOVQ R11, 152(DI)
+ ANDQ R10, R14
+ XORQ R13, R14
+ MOVQ R14, 144(DI)
+ XORQ R11, R15
+
+ // Result s
+ MOVQ 16(SP), R10
+ MOVQ 64(SP), R11
+ MOVQ 112(SP), R12
+ XORQ DX, R10
+ MOVQ 120(SP), R13
+ ROLQ $0x3e, R10
+ XORQ R8, R11
+ MOVQ 168(SP), R14
+ ROLQ $0x37, R11
+ XORQ R9, R12
+ MOVQ R10, R9
+ XORQ CX, R14
+ ROLQ $0x02, R14
+ ANDQ R11, R9
+ XORQ R14, R9
+ MOVQ R9, 192(DI)
+ ROLQ $0x27, R12
+ XORQ R9, R15
+ NOTQ R11
+ XORQ BX, R13
+ MOVQ R11, BX
+ ANDQ R12, BX
+ XORQ R10, BX
+ MOVQ BX, 160(DI)
+ XORQ BX, SI
+ ROLQ $0x29, R13
+ MOVQ R12, CX
+ ORQ R13, CX
+ XORQ R11, CX
+ MOVQ CX, 168(DI)
+ XORQ CX, BP
+ MOVQ R13, DX
+ MOVQ R14, R8
+ ANDQ R14, DX
+ ORQ R10, R8
+ XORQ R12, DX
+ XORQ R13, R8
+ MOVQ DX, 176(DI)
+ MOVQ R8, 184(DI)
+
+ // Prepare round
+ MOVQ BP, BX
+ ROLQ $0x01, BX
+ MOVQ 16(DI), R12
+ XORQ 56(DI), DX
+ XORQ R15, BX
+ XORQ 96(DI), R12
+ XORQ 136(DI), DX
+ XORQ DX, R12
+ MOVQ R12, CX
+ ROLQ $0x01, CX
+ MOVQ 24(DI), R13
+ XORQ 64(DI), R8
+ XORQ SI, CX
+ XORQ 104(DI), R13
+ XORQ 144(DI), R8
+ XORQ R8, R13
+ MOVQ R13, DX
+ ROLQ $0x01, DX
+ MOVQ R15, R8
+ XORQ BP, DX
+ ROLQ $0x01, R8
+ MOVQ SI, R9
+ XORQ R12, R8
+ ROLQ $0x01, R9
+
+ // Result b
+ MOVQ (DI), R10
+ MOVQ 48(DI), R11
+ XORQ R13, R9
+ MOVQ 96(DI), R12
+ MOVQ 144(DI), R13
+ MOVQ 192(DI), R14
+ XORQ CX, R11
+ ROLQ $0x2c, R11
+ XORQ DX, R12
+ XORQ BX, R10
+ ROLQ $0x2b, R12
+ MOVQ R11, SI
+ MOVQ $0x8000000080008081, AX
+ ORQ R12, SI
+ XORQ R10, AX
+ XORQ AX, SI
+ MOVQ SI, (SP)
+ XORQ R9, R14
+ ROLQ $0x0e, R14
+ MOVQ R10, R15
+ ANDQ R11, R15
+ XORQ R14, R15
+ MOVQ R15, 32(SP)
+ XORQ R8, R13
+ ROLQ $0x15, R13
+ MOVQ R13, AX
+ ANDQ R14, AX
+ XORQ R12, AX
+ MOVQ AX, 16(SP)
+ NOTQ R12
+ ORQ R10, R14
+ ORQ R13, R12
+ XORQ R13, R14
+ XORQ R11, R12
+ MOVQ R14, 24(SP)
+ MOVQ R12, 8(SP)
+ MOVQ R12, BP
+
+ // Result g
+ MOVQ 72(DI), R11
+ XORQ R9, R11
+ MOVQ 80(DI), R12
+ ROLQ $0x14, R11
+ XORQ BX, R12
+ ROLQ $0x03, R12
+ MOVQ 24(DI), R10
+ MOVQ R11, AX
+ ORQ R12, AX
+ XORQ R8, R10
+ MOVQ 128(DI), R13
+ MOVQ 176(DI), R14
+ ROLQ $0x1c, R10
+ XORQ R10, AX
+ MOVQ AX, 40(SP)
+ XORQ AX, SI
+ XORQ CX, R13
+ ROLQ $0x2d, R13
+ MOVQ R12, AX
+ ANDQ R13, AX
+ XORQ R11, AX
+ MOVQ AX, 48(SP)
+ XORQ AX, BP
+ XORQ DX, R14
+ ROLQ $0x3d, R14
+ MOVQ R14, AX
+ ORQ R10, AX
+ XORQ R13, AX
+ MOVQ AX, 64(SP)
+ ANDQ R11, R10
+ XORQ R14, R10
+ MOVQ R10, 72(SP)
+ NOTQ R14
+ XORQ R10, R15
+ ORQ R14, R13
+ XORQ R12, R13
+ MOVQ R13, 56(SP)
+
+ // Result k
+ MOVQ 8(DI), R10
+ MOVQ 56(DI), R11
+ MOVQ 104(DI), R12
+ MOVQ 152(DI), R13
+ MOVQ 160(DI), R14
+ XORQ DX, R11
+ ROLQ $0x06, R11
+ XORQ R8, R12
+ ROLQ $0x19, R12
+ MOVQ R11, AX
+ ORQ R12, AX
+ XORQ CX, R10
+ ROLQ $0x01, R10
+ XORQ R10, AX
+ MOVQ AX, 80(SP)
+ XORQ AX, SI
+ XORQ R9, R13
+ ROLQ $0x08, R13
+ MOVQ R12, AX
+ ANDQ R13, AX
+ XORQ R11, AX
+ MOVQ AX, 88(SP)
+ XORQ AX, BP
+ XORQ BX, R14
+ ROLQ $0x12, R14
+ NOTQ R13
+ MOVQ R13, AX
+ ANDQ R14, AX
+ XORQ R12, AX
+ MOVQ AX, 96(SP)
+ MOVQ R14, AX
+ ORQ R10, AX
+ XORQ R13, AX
+ MOVQ AX, 104(SP)
+ ANDQ R11, R10
+ XORQ R14, R10
+ MOVQ R10, 112(SP)
+ XORQ R10, R15
+
+ // Result m
+ MOVQ 40(DI), R11
+ XORQ BX, R11
+ MOVQ 88(DI), R12
+ ROLQ $0x24, R11
+ XORQ CX, R12
+ MOVQ 32(DI), R10
+ ROLQ $0x0a, R12
+ MOVQ R11, AX
+ MOVQ 136(DI), R13
+ ANDQ R12, AX
+ XORQ R9, R10
+ MOVQ 184(DI), R14
+ ROLQ $0x1b, R10
+ XORQ R10, AX
+ MOVQ AX, 120(SP)
+ XORQ AX, SI
+ XORQ DX, R13
+ ROLQ $0x0f, R13
+ MOVQ R12, AX
+ ORQ R13, AX
+ XORQ R11, AX
+ MOVQ AX, 128(SP)
+ XORQ AX, BP
+ XORQ R8, R14
+ ROLQ $0x38, R14
+ NOTQ R13
+ MOVQ R13, AX
+ ORQ R14, AX
+ XORQ R12, AX
+ MOVQ AX, 136(SP)
+ ORQ R10, R11
+ XORQ R14, R11
+ MOVQ R11, 152(SP)
+ ANDQ R10, R14
+ XORQ R13, R14
+ MOVQ R14, 144(SP)
+ XORQ R11, R15
+
+ // Result s
+ MOVQ 16(DI), R10
+ MOVQ 64(DI), R11
+ MOVQ 112(DI), R12
+ XORQ DX, R10
+ MOVQ 120(DI), R13
+ ROLQ $0x3e, R10
+ XORQ R8, R11
+ MOVQ 168(DI), R14
+ ROLQ $0x37, R11
+ XORQ R9, R12
+ MOVQ R10, R9
+ XORQ CX, R14
+ ROLQ $0x02, R14
+ ANDQ R11, R9
+ XORQ R14, R9
+ MOVQ R9, 192(SP)
+ ROLQ $0x27, R12
+ XORQ R9, R15
+ NOTQ R11
+ XORQ BX, R13
+ MOVQ R11, BX
+ ANDQ R12, BX
+ XORQ R10, BX
+ MOVQ BX, 160(SP)
+ XORQ BX, SI
+ ROLQ $0x29, R13
+ MOVQ R12, CX
+ ORQ R13, CX
+ XORQ R11, CX
+ MOVQ CX, 168(SP)
+ XORQ CX, BP
+ MOVQ R13, DX
+ MOVQ R14, R8
+ ANDQ R14, DX
+ ORQ R10, R8
+ XORQ R12, DX
+ XORQ R13, R8
+ MOVQ DX, 176(SP)
+ MOVQ R8, 184(SP)
+
+ // Prepare round
+ MOVQ BP, BX
+ ROLQ $0x01, BX
+ MOVQ 16(SP), R12
+ XORQ 56(SP), DX
+ XORQ R15, BX
+ XORQ 96(SP), R12
+ XORQ 136(SP), DX
+ XORQ DX, R12
+ MOVQ R12, CX
+ ROLQ $0x01, CX
+ MOVQ 24(SP), R13
+ XORQ 64(SP), R8
+ XORQ SI, CX
+ XORQ 104(SP), R13
+ XORQ 144(SP), R8
+ XORQ R8, R13
+ MOVQ R13, DX
+ ROLQ $0x01, DX
+ MOVQ R15, R8
+ XORQ BP, DX
+ ROLQ $0x01, R8
+ MOVQ SI, R9
+ XORQ R12, R8
+ ROLQ $0x01, R9
+
+ // Result b
+ MOVQ (SP), R10
+ MOVQ 48(SP), R11
+ XORQ R13, R9
+ MOVQ 96(SP), R12
+ MOVQ 144(SP), R13
+ MOVQ 192(SP), R14
+ XORQ CX, R11
+ ROLQ $0x2c, R11
+ XORQ DX, R12
+ XORQ BX, R10
+ ROLQ $0x2b, R12
+ MOVQ R11, SI
+ MOVQ $0x8000000000008080, AX
+ ORQ R12, SI
+ XORQ R10, AX
+ XORQ AX, SI
+ MOVQ SI, (DI)
+ XORQ R9, R14
+ ROLQ $0x0e, R14
+ MOVQ R10, R15
+ ANDQ R11, R15
+ XORQ R14, R15
+ MOVQ R15, 32(DI)
+ XORQ R8, R13
+ ROLQ $0x15, R13
+ MOVQ R13, AX
+ ANDQ R14, AX
+ XORQ R12, AX
+ MOVQ AX, 16(DI)
+ NOTQ R12
+ ORQ R10, R14
+ ORQ R13, R12
+ XORQ R13, R14
+ XORQ R11, R12
+ MOVQ R14, 24(DI)
+ MOVQ R12, 8(DI)
+ MOVQ R12, BP
+
+ // Result g
+ MOVQ 72(SP), R11
+ XORQ R9, R11
+ MOVQ 80(SP), R12
+ ROLQ $0x14, R11
+ XORQ BX, R12
+ ROLQ $0x03, R12
+ MOVQ 24(SP), R10
+ MOVQ R11, AX
+ ORQ R12, AX
+ XORQ R8, R10
+ MOVQ 128(SP), R13
+ MOVQ 176(SP), R14
+ ROLQ $0x1c, R10
+ XORQ R10, AX
+ MOVQ AX, 40(DI)
+ XORQ AX, SI
+ XORQ CX, R13
+ ROLQ $0x2d, R13
+ MOVQ R12, AX
+ ANDQ R13, AX
+ XORQ R11, AX
+ MOVQ AX, 48(DI)
+ XORQ AX, BP
+ XORQ DX, R14
+ ROLQ $0x3d, R14
+ MOVQ R14, AX
+ ORQ R10, AX
+ XORQ R13, AX
+ MOVQ AX, 64(DI)
+ ANDQ R11, R10
+ XORQ R14, R10
+ MOVQ R10, 72(DI)
+ NOTQ R14
+ XORQ R10, R15
+ ORQ R14, R13
+ XORQ R12, R13
+ MOVQ R13, 56(DI)
+
+ // Result k
+ MOVQ 8(SP), R10
+ MOVQ 56(SP), R11
+ MOVQ 104(SP), R12
+ MOVQ 152(SP), R13
+ MOVQ 160(SP), R14
+ XORQ DX, R11
+ ROLQ $0x06, R11
+ XORQ R8, R12
+ ROLQ $0x19, R12
+ MOVQ R11, AX
+ ORQ R12, AX
+ XORQ CX, R10
+ ROLQ $0x01, R10
+ XORQ R10, AX
+ MOVQ AX, 80(DI)
+ XORQ AX, SI
+ XORQ R9, R13
+ ROLQ $0x08, R13
+ MOVQ R12, AX
+ ANDQ R13, AX
+ XORQ R11, AX
+ MOVQ AX, 88(DI)
+ XORQ AX, BP
+ XORQ BX, R14
+ ROLQ $0x12, R14
+ NOTQ R13
+ MOVQ R13, AX
+ ANDQ R14, AX
+ XORQ R12, AX
+ MOVQ AX, 96(DI)
+ MOVQ R14, AX
+ ORQ R10, AX
+ XORQ R13, AX
+ MOVQ AX, 104(DI)
+ ANDQ R11, R10
+ XORQ R14, R10
+ MOVQ R10, 112(DI)
+ XORQ R10, R15
+
+ // Result m
+ MOVQ 40(SP), R11
+ XORQ BX, R11
+ MOVQ 88(SP), R12
+ ROLQ $0x24, R11
+ XORQ CX, R12
+ MOVQ 32(SP), R10
+ ROLQ $0x0a, R12
+ MOVQ R11, AX
+ MOVQ 136(SP), R13
+ ANDQ R12, AX
+ XORQ R9, R10
+ MOVQ 184(SP), R14
+ ROLQ $0x1b, R10
+ XORQ R10, AX
+ MOVQ AX, 120(DI)
+ XORQ AX, SI
+ XORQ DX, R13
+ ROLQ $0x0f, R13
+ MOVQ R12, AX
+ ORQ R13, AX
+ XORQ R11, AX
+ MOVQ AX, 128(DI)
+ XORQ AX, BP
+ XORQ R8, R14
+ ROLQ $0x38, R14
+ NOTQ R13
+ MOVQ R13, AX
+ ORQ R14, AX
+ XORQ R12, AX
+ MOVQ AX, 136(DI)
+ ORQ R10, R11
+ XORQ R14, R11
+ MOVQ R11, 152(DI)
+ ANDQ R10, R14
+ XORQ R13, R14
+ MOVQ R14, 144(DI)
+ XORQ R11, R15
+
+ // Result s
+ MOVQ 16(SP), R10
+ MOVQ 64(SP), R11
+ MOVQ 112(SP), R12
+ XORQ DX, R10
+ MOVQ 120(SP), R13
+ ROLQ $0x3e, R10
+ XORQ R8, R11
+ MOVQ 168(SP), R14
+ ROLQ $0x37, R11
+ XORQ R9, R12
+ MOVQ R10, R9
+ XORQ CX, R14
+ ROLQ $0x02, R14
+ ANDQ R11, R9
+ XORQ R14, R9
+ MOVQ R9, 192(DI)
+ ROLQ $0x27, R12
+ XORQ R9, R15
+ NOTQ R11
+ XORQ BX, R13
+ MOVQ R11, BX
+ ANDQ R12, BX
+ XORQ R10, BX
+ MOVQ BX, 160(DI)
+ XORQ BX, SI
+ ROLQ $0x29, R13
+ MOVQ R12, CX
+ ORQ R13, CX
+ XORQ R11, CX
+ MOVQ CX, 168(DI)
+ XORQ CX, BP
+ MOVQ R13, DX
+ MOVQ R14, R8
+ ANDQ R14, DX
+ ORQ R10, R8
+ XORQ R12, DX
+ XORQ R13, R8
+ MOVQ DX, 176(DI)
+ MOVQ R8, 184(DI)
+
+ // Prepare round
+ MOVQ BP, BX
+ ROLQ $0x01, BX
+ MOVQ 16(DI), R12
+ XORQ 56(DI), DX
+ XORQ R15, BX
+ XORQ 96(DI), R12
+ XORQ 136(DI), DX
+ XORQ DX, R12
+ MOVQ R12, CX
+ ROLQ $0x01, CX
+ MOVQ 24(DI), R13
+ XORQ 64(DI), R8
+ XORQ SI, CX
+ XORQ 104(DI), R13
+ XORQ 144(DI), R8
+ XORQ R8, R13
+ MOVQ R13, DX
+ ROLQ $0x01, DX
+ MOVQ R15, R8
+ XORQ BP, DX
+ ROLQ $0x01, R8
+ MOVQ SI, R9
+ XORQ R12, R8
+ ROLQ $0x01, R9
+
+ // Result b
+ MOVQ (DI), R10
+ MOVQ 48(DI), R11
+ XORQ R13, R9
+ MOVQ 96(DI), R12
+ MOVQ 144(DI), R13
+ MOVQ 192(DI), R14
+ XORQ CX, R11
+ ROLQ $0x2c, R11
+ XORQ DX, R12
+ XORQ BX, R10
+ ROLQ $0x2b, R12
+ MOVQ R11, SI
+ MOVQ $0x0000000080000001, AX
+ ORQ R12, SI
+ XORQ R10, AX
+ XORQ AX, SI
+ MOVQ SI, (SP)
+ XORQ R9, R14
+ ROLQ $0x0e, R14
+ MOVQ R10, R15
+ ANDQ R11, R15
+ XORQ R14, R15
+ MOVQ R15, 32(SP)
+ XORQ R8, R13
+ ROLQ $0x15, R13
+ MOVQ R13, AX
+ ANDQ R14, AX
+ XORQ R12, AX
+ MOVQ AX, 16(SP)
+ NOTQ R12
+ ORQ R10, R14
+ ORQ R13, R12
+ XORQ R13, R14
+ XORQ R11, R12
+ MOVQ R14, 24(SP)
+ MOVQ R12, 8(SP)
+ MOVQ R12, BP
+
+ // Result g
+ MOVQ 72(DI), R11
+ XORQ R9, R11
+ MOVQ 80(DI), R12
+ ROLQ $0x14, R11
+ XORQ BX, R12
+ ROLQ $0x03, R12
+ MOVQ 24(DI), R10
+ MOVQ R11, AX
+ ORQ R12, AX
+ XORQ R8, R10
+ MOVQ 128(DI), R13
+ MOVQ 176(DI), R14
+ ROLQ $0x1c, R10
+ XORQ R10, AX
+ MOVQ AX, 40(SP)
+ XORQ AX, SI
+ XORQ CX, R13
+ ROLQ $0x2d, R13
+ MOVQ R12, AX
+ ANDQ R13, AX
+ XORQ R11, AX
+ MOVQ AX, 48(SP)
+ XORQ AX, BP
+ XORQ DX, R14
+ ROLQ $0x3d, R14
+ MOVQ R14, AX
+ ORQ R10, AX
+ XORQ R13, AX
+ MOVQ AX, 64(SP)
+ ANDQ R11, R10
+ XORQ R14, R10
+ MOVQ R10, 72(SP)
+ NOTQ R14
+ XORQ R10, R15
+ ORQ R14, R13
+ XORQ R12, R13
+ MOVQ R13, 56(SP)
+
+ // Result k
+ MOVQ 8(DI), R10
+ MOVQ 56(DI), R11
+ MOVQ 104(DI), R12
+ MOVQ 152(DI), R13
+ MOVQ 160(DI), R14
+ XORQ DX, R11
+ ROLQ $0x06, R11
+ XORQ R8, R12
+ ROLQ $0x19, R12
+ MOVQ R11, AX
+ ORQ R12, AX
+ XORQ CX, R10
+ ROLQ $0x01, R10
+ XORQ R10, AX
+ MOVQ AX, 80(SP)
+ XORQ AX, SI
+ XORQ R9, R13
+ ROLQ $0x08, R13
+ MOVQ R12, AX
+ ANDQ R13, AX
+ XORQ R11, AX
+ MOVQ AX, 88(SP)
+ XORQ AX, BP
+ XORQ BX, R14
+ ROLQ $0x12, R14
+ NOTQ R13
+ MOVQ R13, AX
+ ANDQ R14, AX
+ XORQ R12, AX
+ MOVQ AX, 96(SP)
+ MOVQ R14, AX
+ ORQ R10, AX
+ XORQ R13, AX
+ MOVQ AX, 104(SP)
+ ANDQ R11, R10
+ XORQ R14, R10
+ MOVQ R10, 112(SP)
+ XORQ R10, R15
+
+ // Result m
+ MOVQ 40(DI), R11
+ XORQ BX, R11
+ MOVQ 88(DI), R12
+ ROLQ $0x24, R11
+ XORQ CX, R12
+ MOVQ 32(DI), R10
+ ROLQ $0x0a, R12
+ MOVQ R11, AX
+ MOVQ 136(DI), R13
+ ANDQ R12, AX
+ XORQ R9, R10
+ MOVQ 184(DI), R14
+ ROLQ $0x1b, R10
+ XORQ R10, AX
+ MOVQ AX, 120(SP)
+ XORQ AX, SI
+ XORQ DX, R13
+ ROLQ $0x0f, R13
+ MOVQ R12, AX
+ ORQ R13, AX
+ XORQ R11, AX
+ MOVQ AX, 128(SP)
+ XORQ AX, BP
+ XORQ R8, R14
+ ROLQ $0x38, R14
+ NOTQ R13
+ MOVQ R13, AX
+ ORQ R14, AX
+ XORQ R12, AX
+ MOVQ AX, 136(SP)
+ ORQ R10, R11
+ XORQ R14, R11
+ MOVQ R11, 152(SP)
+ ANDQ R10, R14
+ XORQ R13, R14
+ MOVQ R14, 144(SP)
+ XORQ R11, R15
+
+ // Result s
+ MOVQ 16(DI), R10
+ MOVQ 64(DI), R11
+ MOVQ 112(DI), R12
+ XORQ DX, R10
+ MOVQ 120(DI), R13
+ ROLQ $0x3e, R10
+ XORQ R8, R11
+ MOVQ 168(DI), R14
+ ROLQ $0x37, R11
+ XORQ R9, R12
+ MOVQ R10, R9
+ XORQ CX, R14
+ ROLQ $0x02, R14
+ ANDQ R11, R9
+ XORQ R14, R9
+ MOVQ R9, 192(SP)
+ ROLQ $0x27, R12
+ XORQ R9, R15
+ NOTQ R11
+ XORQ BX, R13
+ MOVQ R11, BX
+ ANDQ R12, BX
+ XORQ R10, BX
+ MOVQ BX, 160(SP)
+ XORQ BX, SI
+ ROLQ $0x29, R13
+ MOVQ R12, CX
+ ORQ R13, CX
+ XORQ R11, CX
+ MOVQ CX, 168(SP)
+ XORQ CX, BP
+ MOVQ R13, DX
+ MOVQ R14, R8
+ ANDQ R14, DX
+ ORQ R10, R8
+ XORQ R12, DX
+ XORQ R13, R8
+ MOVQ DX, 176(SP)
+ MOVQ R8, 184(SP)
+
+ // Prepare round
+ MOVQ BP, BX
+ ROLQ $0x01, BX
+ MOVQ 16(SP), R12
+ XORQ 56(SP), DX
+ XORQ R15, BX
+ XORQ 96(SP), R12
+ XORQ 136(SP), DX
+ XORQ DX, R12
+ MOVQ R12, CX
+ ROLQ $0x01, CX
+ MOVQ 24(SP), R13
+ XORQ 64(SP), R8
+ XORQ SI, CX
+ XORQ 104(SP), R13
+ XORQ 144(SP), R8
+ XORQ R8, R13
+ MOVQ R13, DX
+ ROLQ $0x01, DX
+ MOVQ R15, R8
+ XORQ BP, DX
+ ROLQ $0x01, R8
+ MOVQ SI, R9
+ XORQ R12, R8
+ ROLQ $0x01, R9
+
+ // Result b
+ MOVQ (SP), R10
+ MOVQ 48(SP), R11
+ XORQ R13, R9
+ MOVQ 96(SP), R12
+ MOVQ 144(SP), R13
+ MOVQ 192(SP), R14
+ XORQ CX, R11
+ ROLQ $0x2c, R11
+ XORQ DX, R12
+ XORQ BX, R10
+ ROLQ $0x2b, R12
+ MOVQ R11, SI
+ MOVQ $0x8000000080008008, AX
+ ORQ R12, SI
+ XORQ R10, AX
+ XORQ AX, SI
+ MOVQ SI, (DI)
+ XORQ R9, R14
+ ROLQ $0x0e, R14
+ MOVQ R10, R15
+ ANDQ R11, R15
+ XORQ R14, R15
+ MOVQ R15, 32(DI)
+ XORQ R8, R13
+ ROLQ $0x15, R13
+ MOVQ R13, AX
+ ANDQ R14, AX
+ XORQ R12, AX
+ MOVQ AX, 16(DI)
+ NOTQ R12
+ ORQ R10, R14
+ ORQ R13, R12
+ XORQ R13, R14
+ XORQ R11, R12
+ MOVQ R14, 24(DI)
+ MOVQ R12, 8(DI)
+ NOP
+
+ // Result g
+ MOVQ 72(SP), R11
+ XORQ R9, R11
+ MOVQ 80(SP), R12
+ ROLQ $0x14, R11
+ XORQ BX, R12
+ ROLQ $0x03, R12
+ MOVQ 24(SP), R10
+ MOVQ R11, AX
+ ORQ R12, AX
+ XORQ R8, R10
+ MOVQ 128(SP), R13
+ MOVQ 176(SP), R14
+ ROLQ $0x1c, R10
+ XORQ R10, AX
+ MOVQ AX, 40(DI)
+ NOP
+ XORQ CX, R13
+ ROLQ $0x2d, R13
+ MOVQ R12, AX
+ ANDQ R13, AX
+ XORQ R11, AX
+ MOVQ AX, 48(DI)
+ NOP
+ XORQ DX, R14
+ ROLQ $0x3d, R14
+ MOVQ R14, AX
+ ORQ R10, AX
+ XORQ R13, AX
+ MOVQ AX, 64(DI)
+ ANDQ R11, R10
+ XORQ R14, R10
+ MOVQ R10, 72(DI)
+ NOTQ R14
+ NOP
+ ORQ R14, R13
+ XORQ R12, R13
+ MOVQ R13, 56(DI)
+
+ // Result k
+ MOVQ 8(SP), R10
+ MOVQ 56(SP), R11
+ MOVQ 104(SP), R12
+ MOVQ 152(SP), R13
+ MOVQ 160(SP), R14
+ XORQ DX, R11
+ ROLQ $0x06, R11
+ XORQ R8, R12
+ ROLQ $0x19, R12
+ MOVQ R11, AX
+ ORQ R12, AX
+ XORQ CX, R10
+ ROLQ $0x01, R10
+ XORQ R10, AX
+ MOVQ AX, 80(DI)
+ NOP
+ XORQ R9, R13
+ ROLQ $0x08, R13
+ MOVQ R12, AX
+ ANDQ R13, AX
+ XORQ R11, AX
+ MOVQ AX, 88(DI)
+ NOP
+ XORQ BX, R14
+ ROLQ $0x12, R14
+ NOTQ R13
+ MOVQ R13, AX
+ ANDQ R14, AX
+ XORQ R12, AX
+ MOVQ AX, 96(DI)
+ MOVQ R14, AX
+ ORQ R10, AX
+ XORQ R13, AX
+ MOVQ AX, 104(DI)
+ ANDQ R11, R10
+ XORQ R14, R10
+ MOVQ R10, 112(DI)
+ NOP
+
+ // Result m
+ MOVQ 40(SP), R11
+ XORQ BX, R11
+ MOVQ 88(SP), R12
+ ROLQ $0x24, R11
+ XORQ CX, R12
+ MOVQ 32(SP), R10
+ ROLQ $0x0a, R12
+ MOVQ R11, AX
+ MOVQ 136(SP), R13
+ ANDQ R12, AX
+ XORQ R9, R10
+ MOVQ 184(SP), R14
+ ROLQ $0x1b, R10
+ XORQ R10, AX
+ MOVQ AX, 120(DI)
+ NOP
+ XORQ DX, R13
+ ROLQ $0x0f, R13
+ MOVQ R12, AX
+ ORQ R13, AX
+ XORQ R11, AX
+ MOVQ AX, 128(DI)
+ NOP
+ XORQ R8, R14
+ ROLQ $0x38, R14
+ NOTQ R13
+ MOVQ R13, AX
+ ORQ R14, AX
+ XORQ R12, AX
+ MOVQ AX, 136(DI)
+ ORQ R10, R11
+ XORQ R14, R11
+ MOVQ R11, 152(DI)
+ ANDQ R10, R14
+ XORQ R13, R14
+ MOVQ R14, 144(DI)
+ NOP
+
+ // Result s
+ MOVQ 16(SP), R10
+ MOVQ 64(SP), R11
+ MOVQ 112(SP), R12
+ XORQ DX, R10
+ MOVQ 120(SP), R13
+ ROLQ $0x3e, R10
+ XORQ R8, R11
+ MOVQ 168(SP), R14
+ ROLQ $0x37, R11
+ XORQ R9, R12
+ MOVQ R10, R9
+ XORQ CX, R14
+ ROLQ $0x02, R14
+ ANDQ R11, R9
+ XORQ R14, R9
+ MOVQ R9, 192(DI)
+ ROLQ $0x27, R12
+ NOP
+ NOTQ R11
+ XORQ BX, R13
+ MOVQ R11, BX
+ ANDQ R12, BX
+ XORQ R10, BX
+ MOVQ BX, 160(DI)
+ NOP
+ ROLQ $0x29, R13
+ MOVQ R12, CX
+ ORQ R13, CX
+ XORQ R11, CX
+ MOVQ CX, 168(DI)
+ NOP
+ MOVQ R13, DX
+ MOVQ R14, R8
+ ANDQ R14, DX
+ ORQ R10, R8
+ XORQ R12, DX
+ XORQ R13, R8
+ MOVQ DX, 176(DI)
+ MOVQ R8, 184(DI)
+
+ // Revert the internal state to the user state
+ NOTQ 8(DI)
+ NOTQ 16(DI)
+ NOTQ 64(DI)
+ NOTQ 96(DI)
+ NOTQ 136(DI)
+ NOTQ 160(DI)
RET
diff --git a/vendor/golang.org/x/crypto/sha3/register.go b/vendor/golang.org/x/crypto/sha3/register.go
deleted file mode 100644
index addfd5049b..0000000000
--- a/vendor/golang.org/x/crypto/sha3/register.go
+++ /dev/null
@@ -1,18 +0,0 @@
-// Copyright 2014 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-//go:build go1.4
-
-package sha3
-
-import (
- "crypto"
-)
-
-func init() {
- crypto.RegisterHash(crypto.SHA3_224, New224)
- crypto.RegisterHash(crypto.SHA3_256, New256)
- crypto.RegisterHash(crypto.SHA3_384, New384)
- crypto.RegisterHash(crypto.SHA3_512, New512)
-}
diff --git a/vendor/golang.org/x/crypto/sha3/sha3.go b/vendor/golang.org/x/crypto/sha3/sha3.go
index afedde5abf..6658c44479 100644
--- a/vendor/golang.org/x/crypto/sha3/sha3.go
+++ b/vendor/golang.org/x/crypto/sha3/sha3.go
@@ -4,6 +4,15 @@
package sha3
+import (
+ "crypto/subtle"
+ "encoding/binary"
+ "errors"
+ "unsafe"
+
+ "golang.org/x/sys/cpu"
+)
+
// spongeDirection indicates the direction bytes are flowing through the sponge.
type spongeDirection int
@@ -14,16 +23,13 @@ const (
spongeSqueezing
)
-const (
- // maxRate is the maximum size of the internal buffer. SHAKE-256
- // currently needs the largest buffer.
- maxRate = 168
-)
-
type state struct {
- // Generic sponge components.
- a [25]uint64 // main state of the hash
- rate int // the number of bytes of state to use
+ a [1600 / 8]byte // main state of the hash
+
+ // a[n:rate] is the buffer. If absorbing, it's the remaining space to XOR
+ // into before running the permutation. If squeezing, it's the remaining
+ // output to produce before running the permutation.
+ n, rate int
// dsbyte contains the "domain separation" bits and the first bit of
// the padding. Sections 6.1 and 6.2 of [1] separate the outputs of the
@@ -39,10 +45,6 @@ type state struct {
// Extendable-Output Functions (May 2014)"
dsbyte byte
- i, n int // storage[i:n] is the buffer, i is only used while squeezing
- storage [maxRate]byte
-
- // Specific to SHA-3 and SHAKE.
outputLen int // the default output size in bytes
state spongeDirection // whether the sponge is absorbing or squeezing
}
@@ -61,7 +63,7 @@ func (d *state) Reset() {
d.a[i] = 0
}
d.state = spongeAbsorbing
- d.i, d.n = 0, 0
+ d.n = 0
}
func (d *state) clone() *state {
@@ -69,22 +71,25 @@ func (d *state) clone() *state {
return &ret
}
-// permute applies the KeccakF-1600 permutation. It handles
-// any input-output buffering.
+// permute applies the KeccakF-1600 permutation.
func (d *state) permute() {
- switch d.state {
- case spongeAbsorbing:
- // If we're absorbing, we need to xor the input into the state
- // before applying the permutation.
- xorIn(d, d.storage[:d.rate])
- d.n = 0
- keccakF1600(&d.a)
- case spongeSqueezing:
- // If we're squeezing, we need to apply the permutation before
- // copying more output.
- keccakF1600(&d.a)
- d.i = 0
- copyOut(d, d.storage[:d.rate])
+ var a *[25]uint64
+ if cpu.IsBigEndian {
+ a = new([25]uint64)
+ for i := range a {
+ a[i] = binary.LittleEndian.Uint64(d.a[i*8:])
+ }
+ } else {
+ a = (*[25]uint64)(unsafe.Pointer(&d.a))
+ }
+
+ keccakF1600(a)
+ d.n = 0
+
+ if cpu.IsBigEndian {
+ for i := range a {
+ binary.LittleEndian.PutUint64(d.a[i*8:], a[i])
+ }
}
}
@@ -92,53 +97,36 @@ func (d *state) permute() {
// the multi-bitrate 10..1 padding rule, and permutes the state.
func (d *state) padAndPermute() {
// Pad with this instance's domain-separator bits. We know that there's
- // at least one byte of space in d.buf because, if it were full,
+ // at least one byte of space in the sponge because, if it were full,
// permute would have been called to empty it. dsbyte also contains the
// first one bit for the padding. See the comment in the state struct.
- d.storage[d.n] = d.dsbyte
- d.n++
- for d.n < d.rate {
- d.storage[d.n] = 0
- d.n++
- }
+ d.a[d.n] ^= d.dsbyte
// This adds the final one bit for the padding. Because of the way that
// bits are numbered from the LSB upwards, the final bit is the MSB of
// the last byte.
- d.storage[d.rate-1] ^= 0x80
+ d.a[d.rate-1] ^= 0x80
// Apply the permutation
d.permute()
d.state = spongeSqueezing
- d.n = d.rate
- copyOut(d, d.storage[:d.rate])
}
// Write absorbs more data into the hash's state. It panics if any
// output has already been read.
-func (d *state) Write(p []byte) (written int, err error) {
+func (d *state) Write(p []byte) (n int, err error) {
if d.state != spongeAbsorbing {
panic("sha3: Write after Read")
}
- written = len(p)
+
+ n = len(p)
for len(p) > 0 {
- if d.n == 0 && len(p) >= d.rate {
- // The fast path; absorb a full "rate" bytes of input and apply the permutation.
- xorIn(d, p[:d.rate])
- p = p[d.rate:]
- keccakF1600(&d.a)
- } else {
- // The slow path; buffer the input until we can fill the sponge, and then xor it in.
- todo := d.rate - d.n
- if todo > len(p) {
- todo = len(p)
- }
- d.n += copy(d.storage[d.n:], p[:todo])
- p = p[todo:]
-
- // If the sponge is full, apply the permutation.
- if d.n == d.rate {
- d.permute()
- }
+ x := subtle.XORBytes(d.a[d.n:d.rate], d.a[d.n:d.rate], p)
+ d.n += x
+ p = p[x:]
+
+ // If the sponge is full, apply the permutation.
+ if d.n == d.rate {
+ d.permute()
}
}
@@ -156,14 +144,14 @@ func (d *state) Read(out []byte) (n int, err error) {
// Now, do the squeezing.
for len(out) > 0 {
- n := copy(out, d.storage[d.i:d.n])
- d.i += n
- out = out[n:]
-
// Apply the permutation if we've squeezed the sponge dry.
- if d.i == d.rate {
+ if d.n == d.rate {
d.permute()
}
+
+ x := copy(out, d.a[d.n:d.rate])
+ d.n += x
+ out = out[x:]
}
return
@@ -183,3 +171,74 @@ func (d *state) Sum(in []byte) []byte {
dup.Read(hash)
return append(in, hash...)
}
+
+const (
+ magicSHA3 = "sha\x08"
+ magicShake = "sha\x09"
+ magicCShake = "sha\x0a"
+ magicKeccak = "sha\x0b"
+ // magic || rate || main state || n || sponge direction
+ marshaledSize = len(magicSHA3) + 1 + 200 + 1 + 1
+)
+
+func (d *state) MarshalBinary() ([]byte, error) {
+ return d.AppendBinary(make([]byte, 0, marshaledSize))
+}
+
+func (d *state) AppendBinary(b []byte) ([]byte, error) {
+ switch d.dsbyte {
+ case dsbyteSHA3:
+ b = append(b, magicSHA3...)
+ case dsbyteShake:
+ b = append(b, magicShake...)
+ case dsbyteCShake:
+ b = append(b, magicCShake...)
+ case dsbyteKeccak:
+ b = append(b, magicKeccak...)
+ default:
+ panic("unknown dsbyte")
+ }
+ // rate is at most 168, and n is at most rate.
+ b = append(b, byte(d.rate))
+ b = append(b, d.a[:]...)
+ b = append(b, byte(d.n), byte(d.state))
+ return b, nil
+}
+
+func (d *state) UnmarshalBinary(b []byte) error {
+ if len(b) != marshaledSize {
+ return errors.New("sha3: invalid hash state")
+ }
+
+ magic := string(b[:len(magicSHA3)])
+ b = b[len(magicSHA3):]
+ switch {
+ case magic == magicSHA3 && d.dsbyte == dsbyteSHA3:
+ case magic == magicShake && d.dsbyte == dsbyteShake:
+ case magic == magicCShake && d.dsbyte == dsbyteCShake:
+ case magic == magicKeccak && d.dsbyte == dsbyteKeccak:
+ default:
+ return errors.New("sha3: invalid hash state identifier")
+ }
+
+ rate := int(b[0])
+ b = b[1:]
+ if rate != d.rate {
+ return errors.New("sha3: invalid hash state function")
+ }
+
+ copy(d.a[:], b)
+ b = b[len(d.a):]
+
+ n, state := int(b[0]), spongeDirection(b[1])
+ if n > d.rate {
+ return errors.New("sha3: invalid hash state")
+ }
+ d.n = n
+ if state != spongeAbsorbing && state != spongeSqueezing {
+ return errors.New("sha3: invalid hash state")
+ }
+ d.state = state
+
+ return nil
+}
diff --git a/vendor/golang.org/x/crypto/sha3/shake.go b/vendor/golang.org/x/crypto/sha3/shake.go
index 1ea9275b8b..a6b3a4281f 100644
--- a/vendor/golang.org/x/crypto/sha3/shake.go
+++ b/vendor/golang.org/x/crypto/sha3/shake.go
@@ -16,9 +16,12 @@ package sha3
// [2] https://doi.org/10.6028/NIST.SP.800-185
import (
+ "bytes"
"encoding/binary"
+ "errors"
"hash"
"io"
+ "math/bits"
)
// ShakeHash defines the interface to hash functions that support
@@ -50,44 +53,36 @@ type cshakeState struct {
initBlock []byte
}
-// Consts for configuring initial SHA-3 state
-const (
- dsbyteShake = 0x1f
- dsbyteCShake = 0x04
- rate128 = 168
- rate256 = 136
-)
+func bytepad(data []byte, rate int) []byte {
+ out := make([]byte, 0, 9+len(data)+rate-1)
+ out = append(out, leftEncode(uint64(rate))...)
+ out = append(out, data...)
+ if padlen := rate - len(out)%rate; padlen < rate {
+ out = append(out, make([]byte, padlen)...)
+ }
+ return out
+}
-func bytepad(input []byte, w int) []byte {
- // leftEncode always returns max 9 bytes
- buf := make([]byte, 0, 9+len(input)+w)
- buf = append(buf, leftEncode(uint64(w))...)
- buf = append(buf, input...)
- padlen := w - (len(buf) % w)
- return append(buf, make([]byte, padlen)...)
-}
-
-func leftEncode(value uint64) []byte {
- var b [9]byte
- binary.BigEndian.PutUint64(b[1:], value)
- // Trim all but last leading zero bytes
- i := byte(1)
- for i < 8 && b[i] == 0 {
- i++
+func leftEncode(x uint64) []byte {
+ // Let n be the smallest positive integer for which 2^(8n) > x.
+ n := (bits.Len64(x) + 7) / 8
+ if n == 0 {
+ n = 1
}
- // Prepend number of encoded bytes
- b[i-1] = 9 - i
- return b[i-1:]
+ // Return n || x with n as a byte and x an n bytes in big-endian order.
+ b := make([]byte, 9)
+ binary.BigEndian.PutUint64(b[1:], x)
+ b = b[9-n-1:]
+ b[0] = byte(n)
+ return b
}
func newCShake(N, S []byte, rate, outputLen int, dsbyte byte) ShakeHash {
c := cshakeState{state: &state{rate: rate, outputLen: outputLen, dsbyte: dsbyte}}
-
- // leftEncode returns max 9 bytes
- c.initBlock = make([]byte, 0, 9*2+len(N)+len(S))
- c.initBlock = append(c.initBlock, leftEncode(uint64(len(N)*8))...)
+ c.initBlock = make([]byte, 0, 9+len(N)+9+len(S)) // leftEncode returns max 9 bytes
+ c.initBlock = append(c.initBlock, leftEncode(uint64(len(N))*8)...)
c.initBlock = append(c.initBlock, N...)
- c.initBlock = append(c.initBlock, leftEncode(uint64(len(S)*8))...)
+ c.initBlock = append(c.initBlock, leftEncode(uint64(len(S))*8)...)
c.initBlock = append(c.initBlock, S...)
c.Write(bytepad(c.initBlock, c.rate))
return &c
@@ -111,6 +106,30 @@ func (c *state) Clone() ShakeHash {
return c.clone()
}
+func (c *cshakeState) MarshalBinary() ([]byte, error) {
+ return c.AppendBinary(make([]byte, 0, marshaledSize+len(c.initBlock)))
+}
+
+func (c *cshakeState) AppendBinary(b []byte) ([]byte, error) {
+ b, err := c.state.AppendBinary(b)
+ if err != nil {
+ return nil, err
+ }
+ b = append(b, c.initBlock...)
+ return b, nil
+}
+
+func (c *cshakeState) UnmarshalBinary(b []byte) error {
+ if len(b) <= marshaledSize {
+ return errors.New("sha3: invalid hash state")
+ }
+ if err := c.state.UnmarshalBinary(b[:marshaledSize]); err != nil {
+ return err
+ }
+ c.initBlock = bytes.Clone(b[marshaledSize:])
+ return nil
+}
+
// NewShake128 creates a new SHAKE128 variable-output-length ShakeHash.
// Its generic security strength is 128 bits against all attacks if at
// least 32 bytes of its output are used.
@@ -126,11 +145,11 @@ func NewShake256() ShakeHash {
}
func newShake128Generic() *state {
- return &state{rate: rate128, outputLen: 32, dsbyte: dsbyteShake}
+ return &state{rate: rateK256, outputLen: 32, dsbyte: dsbyteShake}
}
func newShake256Generic() *state {
- return &state{rate: rate256, outputLen: 64, dsbyte: dsbyteShake}
+ return &state{rate: rateK512, outputLen: 64, dsbyte: dsbyteShake}
}
// NewCShake128 creates a new instance of cSHAKE128 variable-output-length ShakeHash,
@@ -143,7 +162,7 @@ func NewCShake128(N, S []byte) ShakeHash {
if len(N) == 0 && len(S) == 0 {
return NewShake128()
}
- return newCShake(N, S, rate128, 32, dsbyteCShake)
+ return newCShake(N, S, rateK256, 32, dsbyteCShake)
}
// NewCShake256 creates a new instance of cSHAKE256 variable-output-length ShakeHash,
@@ -156,7 +175,7 @@ func NewCShake256(N, S []byte) ShakeHash {
if len(N) == 0 && len(S) == 0 {
return NewShake256()
}
- return newCShake(N, S, rate256, 64, dsbyteCShake)
+ return newCShake(N, S, rateK512, 64, dsbyteCShake)
}
// ShakeSum128 writes an arbitrary-length digest of data into hash.
diff --git a/vendor/golang.org/x/crypto/sha3/xor.go b/vendor/golang.org/x/crypto/sha3/xor.go
deleted file mode 100644
index 6ada5c9574..0000000000
--- a/vendor/golang.org/x/crypto/sha3/xor.go
+++ /dev/null
@@ -1,40 +0,0 @@
-// Copyright 2015 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package sha3
-
-import (
- "crypto/subtle"
- "encoding/binary"
- "unsafe"
-
- "golang.org/x/sys/cpu"
-)
-
-// xorIn xors the bytes in buf into the state.
-func xorIn(d *state, buf []byte) {
- if cpu.IsBigEndian {
- for i := 0; len(buf) >= 8; i++ {
- a := binary.LittleEndian.Uint64(buf)
- d.a[i] ^= a
- buf = buf[8:]
- }
- } else {
- ab := (*[25 * 64 / 8]byte)(unsafe.Pointer(&d.a))
- subtle.XORBytes(ab[:], ab[:], buf)
- }
-}
-
-// copyOut copies uint64s to a byte buffer.
-func copyOut(d *state, b []byte) {
- if cpu.IsBigEndian {
- for i := 0; len(b) >= 8; i++ {
- binary.LittleEndian.PutUint64(b, d.a[i])
- b = b[8:]
- }
- } else {
- ab := (*[25 * 64 / 8]byte)(unsafe.Pointer(&d.a))
- copy(b, ab[:])
- }
-}
diff --git a/vendor/golang.org/x/net/LICENSE b/vendor/golang.org/x/net/LICENSE
index 6a66aea5ea..2a7cf70da6 100644
--- a/vendor/golang.org/x/net/LICENSE
+++ b/vendor/golang.org/x/net/LICENSE
@@ -1,4 +1,4 @@
-Copyright (c) 2009 The Go Authors. All rights reserved.
+Copyright 2009 The Go Authors.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
@@ -10,7 +10,7 @@ notice, this list of conditions and the following disclaimer.
copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the
distribution.
- * Neither the name of Google Inc. nor the names of its
+ * Neither the name of Google LLC nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
diff --git a/vendor/golang.org/x/net/context/context.go b/vendor/golang.org/x/net/context/context.go
index cf66309c4a..db1c95fab1 100644
--- a/vendor/golang.org/x/net/context/context.go
+++ b/vendor/golang.org/x/net/context/context.go
@@ -3,29 +3,31 @@
// license that can be found in the LICENSE file.
// Package context defines the Context type, which carries deadlines,
-// cancelation signals, and other request-scoped values across API boundaries
+// cancellation signals, and other request-scoped values across API boundaries
// and between processes.
// As of Go 1.7 this package is available in the standard library under the
-// name context. https://golang.org/pkg/context.
+// name [context], and migrating to it can be done automatically with [go fix].
//
-// Incoming requests to a server should create a Context, and outgoing calls to
-// servers should accept a Context. The chain of function calls between must
-// propagate the Context, optionally replacing it with a modified copy created
-// using WithDeadline, WithTimeout, WithCancel, or WithValue.
+// Incoming requests to a server should create a [Context], and outgoing
+// calls to servers should accept a Context. The chain of function
+// calls between them must propagate the Context, optionally replacing
+// it with a derived Context created using [WithCancel], [WithDeadline],
+// [WithTimeout], or [WithValue].
//
// Programs that use Contexts should follow these rules to keep interfaces
// consistent across packages and enable static analysis tools to check context
// propagation:
//
// Do not store Contexts inside a struct type; instead, pass a Context
-// explicitly to each function that needs it. The Context should be the first
+// explicitly to each function that needs it. This is discussed further in
+// https://go.dev/blog/context-and-structs. The Context should be the first
// parameter, typically named ctx:
//
// func DoSomething(ctx context.Context, arg Arg) error {
// // ... use ctx ...
// }
//
-// Do not pass a nil Context, even if a function permits it. Pass context.TODO
+// Do not pass a nil [Context], even if a function permits it. Pass [context.TODO]
// if you are unsure about which Context to use.
//
// Use context Values only for request-scoped data that transits processes and
@@ -34,9 +36,30 @@
// The same Context may be passed to functions running in different goroutines;
// Contexts are safe for simultaneous use by multiple goroutines.
//
-// See http://blog.golang.org/context for example code for a server that uses
+// See https://go.dev/blog/context for example code for a server that uses
// Contexts.
-package context // import "golang.org/x/net/context"
+//
+// [go fix]: https://go.dev/cmd/go#hdr-Update_packages_to_use_new_APIs
+package context
+
+import (
+ "context" // standard library's context, as of Go 1.7
+ "time"
+)
+
+// A Context carries a deadline, a cancellation signal, and other values across
+// API boundaries.
+//
+// Context's methods may be called by multiple goroutines simultaneously.
+type Context = context.Context
+
+// Canceled is the error returned by [Context.Err] when the context is canceled
+// for some reason other than its deadline passing.
+var Canceled = context.Canceled
+
+// DeadlineExceeded is the error returned by [Context.Err] when the context is canceled
+// due to its deadline passing.
+var DeadlineExceeded = context.DeadlineExceeded
// Background returns a non-nil, empty Context. It is never canceled, has no
// values, and has no deadline. It is typically used by the main function,
@@ -49,8 +72,73 @@ func Background() Context {
// TODO returns a non-nil, empty Context. Code should use context.TODO when
// it's unclear which Context to use or it is not yet available (because the
// surrounding function has not yet been extended to accept a Context
-// parameter). TODO is recognized by static analysis tools that determine
-// whether Contexts are propagated correctly in a program.
+// parameter).
func TODO() Context {
return todo
}
+
+var (
+ background = context.Background()
+ todo = context.TODO()
+)
+
+// A CancelFunc tells an operation to abandon its work.
+// A CancelFunc does not wait for the work to stop.
+// A CancelFunc may be called by multiple goroutines simultaneously.
+// After the first call, subsequent calls to a CancelFunc do nothing.
+type CancelFunc = context.CancelFunc
+
+// WithCancel returns a derived context that points to the parent context
+// but has a new Done channel. The returned context's Done channel is closed
+// when the returned cancel function is called or when the parent context's
+// Done channel is closed, whichever happens first.
+//
+// Canceling this context releases resources associated with it, so code should
+// call cancel as soon as the operations running in this [Context] complete.
+func WithCancel(parent Context) (ctx Context, cancel CancelFunc) {
+ return context.WithCancel(parent)
+}
+
+// WithDeadline returns a derived context that points to the parent context
+// but has the deadline adjusted to be no later than d. If the parent's
+// deadline is already earlier than d, WithDeadline(parent, d) is semantically
+// equivalent to parent. The returned [Context.Done] channel is closed when
+// the deadline expires, when the returned cancel function is called,
+// or when the parent context's Done channel is closed, whichever happens first.
+//
+// Canceling this context releases resources associated with it, so code should
+// call cancel as soon as the operations running in this [Context] complete.
+func WithDeadline(parent Context, d time.Time) (Context, CancelFunc) {
+ return context.WithDeadline(parent, d)
+}
+
+// WithTimeout returns WithDeadline(parent, time.Now().Add(timeout)).
+//
+// Canceling this context releases resources associated with it, so code should
+// call cancel as soon as the operations running in this [Context] complete:
+//
+// func slowOperationWithTimeout(ctx context.Context) (Result, error) {
+// ctx, cancel := context.WithTimeout(ctx, 100*time.Millisecond)
+// defer cancel() // releases resources if slowOperation completes before timeout elapses
+// return slowOperation(ctx)
+// }
+func WithTimeout(parent Context, timeout time.Duration) (Context, CancelFunc) {
+ return context.WithTimeout(parent, timeout)
+}
+
+// WithValue returns a derived context that points to the parent Context.
+// In the derived context, the value associated with key is val.
+//
+// Use context Values only for request-scoped data that transits processes and
+// APIs, not for passing optional parameters to functions.
+//
+// The provided key must be comparable and should not be of type
+// string or any other built-in type to avoid collisions between
+// packages using context. Users of WithValue should define their own
+// types for keys. To avoid allocating when assigning to an
+// interface{}, context keys often have concrete type
+// struct{}. Alternatively, exported context key variables' static
+// type should be a pointer or interface.
+func WithValue(parent Context, key, val interface{}) Context {
+ return context.WithValue(parent, key, val)
+}
diff --git a/vendor/golang.org/x/net/context/go17.go b/vendor/golang.org/x/net/context/go17.go
deleted file mode 100644
index 0c1b867937..0000000000
--- a/vendor/golang.org/x/net/context/go17.go
+++ /dev/null
@@ -1,72 +0,0 @@
-// Copyright 2016 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-//go:build go1.7
-
-package context
-
-import (
- "context" // standard library's context, as of Go 1.7
- "time"
-)
-
-var (
- todo = context.TODO()
- background = context.Background()
-)
-
-// Canceled is the error returned by Context.Err when the context is canceled.
-var Canceled = context.Canceled
-
-// DeadlineExceeded is the error returned by Context.Err when the context's
-// deadline passes.
-var DeadlineExceeded = context.DeadlineExceeded
-
-// WithCancel returns a copy of parent with a new Done channel. The returned
-// context's Done channel is closed when the returned cancel function is called
-// or when the parent context's Done channel is closed, whichever happens first.
-//
-// Canceling this context releases resources associated with it, so code should
-// call cancel as soon as the operations running in this Context complete.
-func WithCancel(parent Context) (ctx Context, cancel CancelFunc) {
- ctx, f := context.WithCancel(parent)
- return ctx, f
-}
-
-// WithDeadline returns a copy of the parent context with the deadline adjusted
-// to be no later than d. If the parent's deadline is already earlier than d,
-// WithDeadline(parent, d) is semantically equivalent to parent. The returned
-// context's Done channel is closed when the deadline expires, when the returned
-// cancel function is called, or when the parent context's Done channel is
-// closed, whichever happens first.
-//
-// Canceling this context releases resources associated with it, so code should
-// call cancel as soon as the operations running in this Context complete.
-func WithDeadline(parent Context, deadline time.Time) (Context, CancelFunc) {
- ctx, f := context.WithDeadline(parent, deadline)
- return ctx, f
-}
-
-// WithTimeout returns WithDeadline(parent, time.Now().Add(timeout)).
-//
-// Canceling this context releases resources associated with it, so code should
-// call cancel as soon as the operations running in this Context complete:
-//
-// func slowOperationWithTimeout(ctx context.Context) (Result, error) {
-// ctx, cancel := context.WithTimeout(ctx, 100*time.Millisecond)
-// defer cancel() // releases resources if slowOperation completes before timeout elapses
-// return slowOperation(ctx)
-// }
-func WithTimeout(parent Context, timeout time.Duration) (Context, CancelFunc) {
- return WithDeadline(parent, time.Now().Add(timeout))
-}
-
-// WithValue returns a copy of parent in which the value associated with key is
-// val.
-//
-// Use context Values only for request-scoped data that transits processes and
-// APIs, not for passing optional parameters to functions.
-func WithValue(parent Context, key interface{}, val interface{}) Context {
- return context.WithValue(parent, key, val)
-}
diff --git a/vendor/golang.org/x/net/context/go19.go b/vendor/golang.org/x/net/context/go19.go
deleted file mode 100644
index e31e35a904..0000000000
--- a/vendor/golang.org/x/net/context/go19.go
+++ /dev/null
@@ -1,20 +0,0 @@
-// Copyright 2017 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-//go:build go1.9
-
-package context
-
-import "context" // standard library's context, as of Go 1.7
-
-// A Context carries a deadline, a cancelation signal, and other values across
-// API boundaries.
-//
-// Context's methods may be called by multiple goroutines simultaneously.
-type Context = context.Context
-
-// A CancelFunc tells an operation to abandon its work.
-// A CancelFunc does not wait for the work to stop.
-// After the first call, subsequent calls to a CancelFunc do nothing.
-type CancelFunc = context.CancelFunc
diff --git a/vendor/golang.org/x/net/context/pre_go17.go b/vendor/golang.org/x/net/context/pre_go17.go
deleted file mode 100644
index 065ff3dfa5..0000000000
--- a/vendor/golang.org/x/net/context/pre_go17.go
+++ /dev/null
@@ -1,300 +0,0 @@
-// Copyright 2014 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-//go:build !go1.7
-
-package context
-
-import (
- "errors"
- "fmt"
- "sync"
- "time"
-)
-
-// An emptyCtx is never canceled, has no values, and has no deadline. It is not
-// struct{}, since vars of this type must have distinct addresses.
-type emptyCtx int
-
-func (*emptyCtx) Deadline() (deadline time.Time, ok bool) {
- return
-}
-
-func (*emptyCtx) Done() <-chan struct{} {
- return nil
-}
-
-func (*emptyCtx) Err() error {
- return nil
-}
-
-func (*emptyCtx) Value(key interface{}) interface{} {
- return nil
-}
-
-func (e *emptyCtx) String() string {
- switch e {
- case background:
- return "context.Background"
- case todo:
- return "context.TODO"
- }
- return "unknown empty Context"
-}
-
-var (
- background = new(emptyCtx)
- todo = new(emptyCtx)
-)
-
-// Canceled is the error returned by Context.Err when the context is canceled.
-var Canceled = errors.New("context canceled")
-
-// DeadlineExceeded is the error returned by Context.Err when the context's
-// deadline passes.
-var DeadlineExceeded = errors.New("context deadline exceeded")
-
-// WithCancel returns a copy of parent with a new Done channel. The returned
-// context's Done channel is closed when the returned cancel function is called
-// or when the parent context's Done channel is closed, whichever happens first.
-//
-// Canceling this context releases resources associated with it, so code should
-// call cancel as soon as the operations running in this Context complete.
-func WithCancel(parent Context) (ctx Context, cancel CancelFunc) {
- c := newCancelCtx(parent)
- propagateCancel(parent, c)
- return c, func() { c.cancel(true, Canceled) }
-}
-
-// newCancelCtx returns an initialized cancelCtx.
-func newCancelCtx(parent Context) *cancelCtx {
- return &cancelCtx{
- Context: parent,
- done: make(chan struct{}),
- }
-}
-
-// propagateCancel arranges for child to be canceled when parent is.
-func propagateCancel(parent Context, child canceler) {
- if parent.Done() == nil {
- return // parent is never canceled
- }
- if p, ok := parentCancelCtx(parent); ok {
- p.mu.Lock()
- if p.err != nil {
- // parent has already been canceled
- child.cancel(false, p.err)
- } else {
- if p.children == nil {
- p.children = make(map[canceler]bool)
- }
- p.children[child] = true
- }
- p.mu.Unlock()
- } else {
- go func() {
- select {
- case <-parent.Done():
- child.cancel(false, parent.Err())
- case <-child.Done():
- }
- }()
- }
-}
-
-// parentCancelCtx follows a chain of parent references until it finds a
-// *cancelCtx. This function understands how each of the concrete types in this
-// package represents its parent.
-func parentCancelCtx(parent Context) (*cancelCtx, bool) {
- for {
- switch c := parent.(type) {
- case *cancelCtx:
- return c, true
- case *timerCtx:
- return c.cancelCtx, true
- case *valueCtx:
- parent = c.Context
- default:
- return nil, false
- }
- }
-}
-
-// removeChild removes a context from its parent.
-func removeChild(parent Context, child canceler) {
- p, ok := parentCancelCtx(parent)
- if !ok {
- return
- }
- p.mu.Lock()
- if p.children != nil {
- delete(p.children, child)
- }
- p.mu.Unlock()
-}
-
-// A canceler is a context type that can be canceled directly. The
-// implementations are *cancelCtx and *timerCtx.
-type canceler interface {
- cancel(removeFromParent bool, err error)
- Done() <-chan struct{}
-}
-
-// A cancelCtx can be canceled. When canceled, it also cancels any children
-// that implement canceler.
-type cancelCtx struct {
- Context
-
- done chan struct{} // closed by the first cancel call.
-
- mu sync.Mutex
- children map[canceler]bool // set to nil by the first cancel call
- err error // set to non-nil by the first cancel call
-}
-
-func (c *cancelCtx) Done() <-chan struct{} {
- return c.done
-}
-
-func (c *cancelCtx) Err() error {
- c.mu.Lock()
- defer c.mu.Unlock()
- return c.err
-}
-
-func (c *cancelCtx) String() string {
- return fmt.Sprintf("%v.WithCancel", c.Context)
-}
-
-// cancel closes c.done, cancels each of c's children, and, if
-// removeFromParent is true, removes c from its parent's children.
-func (c *cancelCtx) cancel(removeFromParent bool, err error) {
- if err == nil {
- panic("context: internal error: missing cancel error")
- }
- c.mu.Lock()
- if c.err != nil {
- c.mu.Unlock()
- return // already canceled
- }
- c.err = err
- close(c.done)
- for child := range c.children {
- // NOTE: acquiring the child's lock while holding parent's lock.
- child.cancel(false, err)
- }
- c.children = nil
- c.mu.Unlock()
-
- if removeFromParent {
- removeChild(c.Context, c)
- }
-}
-
-// WithDeadline returns a copy of the parent context with the deadline adjusted
-// to be no later than d. If the parent's deadline is already earlier than d,
-// WithDeadline(parent, d) is semantically equivalent to parent. The returned
-// context's Done channel is closed when the deadline expires, when the returned
-// cancel function is called, or when the parent context's Done channel is
-// closed, whichever happens first.
-//
-// Canceling this context releases resources associated with it, so code should
-// call cancel as soon as the operations running in this Context complete.
-func WithDeadline(parent Context, deadline time.Time) (Context, CancelFunc) {
- if cur, ok := parent.Deadline(); ok && cur.Before(deadline) {
- // The current deadline is already sooner than the new one.
- return WithCancel(parent)
- }
- c := &timerCtx{
- cancelCtx: newCancelCtx(parent),
- deadline: deadline,
- }
- propagateCancel(parent, c)
- d := deadline.Sub(time.Now())
- if d <= 0 {
- c.cancel(true, DeadlineExceeded) // deadline has already passed
- return c, func() { c.cancel(true, Canceled) }
- }
- c.mu.Lock()
- defer c.mu.Unlock()
- if c.err == nil {
- c.timer = time.AfterFunc(d, func() {
- c.cancel(true, DeadlineExceeded)
- })
- }
- return c, func() { c.cancel(true, Canceled) }
-}
-
-// A timerCtx carries a timer and a deadline. It embeds a cancelCtx to
-// implement Done and Err. It implements cancel by stopping its timer then
-// delegating to cancelCtx.cancel.
-type timerCtx struct {
- *cancelCtx
- timer *time.Timer // Under cancelCtx.mu.
-
- deadline time.Time
-}
-
-func (c *timerCtx) Deadline() (deadline time.Time, ok bool) {
- return c.deadline, true
-}
-
-func (c *timerCtx) String() string {
- return fmt.Sprintf("%v.WithDeadline(%s [%s])", c.cancelCtx.Context, c.deadline, c.deadline.Sub(time.Now()))
-}
-
-func (c *timerCtx) cancel(removeFromParent bool, err error) {
- c.cancelCtx.cancel(false, err)
- if removeFromParent {
- // Remove this timerCtx from its parent cancelCtx's children.
- removeChild(c.cancelCtx.Context, c)
- }
- c.mu.Lock()
- if c.timer != nil {
- c.timer.Stop()
- c.timer = nil
- }
- c.mu.Unlock()
-}
-
-// WithTimeout returns WithDeadline(parent, time.Now().Add(timeout)).
-//
-// Canceling this context releases resources associated with it, so code should
-// call cancel as soon as the operations running in this Context complete:
-//
-// func slowOperationWithTimeout(ctx context.Context) (Result, error) {
-// ctx, cancel := context.WithTimeout(ctx, 100*time.Millisecond)
-// defer cancel() // releases resources if slowOperation completes before timeout elapses
-// return slowOperation(ctx)
-// }
-func WithTimeout(parent Context, timeout time.Duration) (Context, CancelFunc) {
- return WithDeadline(parent, time.Now().Add(timeout))
-}
-
-// WithValue returns a copy of parent in which the value associated with key is
-// val.
-//
-// Use context Values only for request-scoped data that transits processes and
-// APIs, not for passing optional parameters to functions.
-func WithValue(parent Context, key interface{}, val interface{}) Context {
- return &valueCtx{parent, key, val}
-}
-
-// A valueCtx carries a key-value pair. It implements Value for that key and
-// delegates all other calls to the embedded Context.
-type valueCtx struct {
- Context
- key, val interface{}
-}
-
-func (c *valueCtx) String() string {
- return fmt.Sprintf("%v.WithValue(%#v, %#v)", c.Context, c.key, c.val)
-}
-
-func (c *valueCtx) Value(key interface{}) interface{} {
- if c.key == key {
- return c.val
- }
- return c.Context.Value(key)
-}
diff --git a/vendor/golang.org/x/net/context/pre_go19.go b/vendor/golang.org/x/net/context/pre_go19.go
deleted file mode 100644
index ec5a638033..0000000000
--- a/vendor/golang.org/x/net/context/pre_go19.go
+++ /dev/null
@@ -1,109 +0,0 @@
-// Copyright 2014 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-//go:build !go1.9
-
-package context
-
-import "time"
-
-// A Context carries a deadline, a cancelation signal, and other values across
-// API boundaries.
-//
-// Context's methods may be called by multiple goroutines simultaneously.
-type Context interface {
- // Deadline returns the time when work done on behalf of this context
- // should be canceled. Deadline returns ok==false when no deadline is
- // set. Successive calls to Deadline return the same results.
- Deadline() (deadline time.Time, ok bool)
-
- // Done returns a channel that's closed when work done on behalf of this
- // context should be canceled. Done may return nil if this context can
- // never be canceled. Successive calls to Done return the same value.
- //
- // WithCancel arranges for Done to be closed when cancel is called;
- // WithDeadline arranges for Done to be closed when the deadline
- // expires; WithTimeout arranges for Done to be closed when the timeout
- // elapses.
- //
- // Done is provided for use in select statements:
- //
- // // Stream generates values with DoSomething and sends them to out
- // // until DoSomething returns an error or ctx.Done is closed.
- // func Stream(ctx context.Context, out chan<- Value) error {
- // for {
- // v, err := DoSomething(ctx)
- // if err != nil {
- // return err
- // }
- // select {
- // case <-ctx.Done():
- // return ctx.Err()
- // case out <- v:
- // }
- // }
- // }
- //
- // See http://blog.golang.org/pipelines for more examples of how to use
- // a Done channel for cancelation.
- Done() <-chan struct{}
-
- // Err returns a non-nil error value after Done is closed. Err returns
- // Canceled if the context was canceled or DeadlineExceeded if the
- // context's deadline passed. No other values for Err are defined.
- // After Done is closed, successive calls to Err return the same value.
- Err() error
-
- // Value returns the value associated with this context for key, or nil
- // if no value is associated with key. Successive calls to Value with
- // the same key returns the same result.
- //
- // Use context values only for request-scoped data that transits
- // processes and API boundaries, not for passing optional parameters to
- // functions.
- //
- // A key identifies a specific value in a Context. Functions that wish
- // to store values in Context typically allocate a key in a global
- // variable then use that key as the argument to context.WithValue and
- // Context.Value. A key can be any type that supports equality;
- // packages should define keys as an unexported type to avoid
- // collisions.
- //
- // Packages that define a Context key should provide type-safe accessors
- // for the values stores using that key:
- //
- // // Package user defines a User type that's stored in Contexts.
- // package user
- //
- // import "golang.org/x/net/context"
- //
- // // User is the type of value stored in the Contexts.
- // type User struct {...}
- //
- // // key is an unexported type for keys defined in this package.
- // // This prevents collisions with keys defined in other packages.
- // type key int
- //
- // // userKey is the key for user.User values in Contexts. It is
- // // unexported; clients use user.NewContext and user.FromContext
- // // instead of using this key directly.
- // var userKey key = 0
- //
- // // NewContext returns a new Context that carries value u.
- // func NewContext(ctx context.Context, u *User) context.Context {
- // return context.WithValue(ctx, userKey, u)
- // }
- //
- // // FromContext returns the User value stored in ctx, if any.
- // func FromContext(ctx context.Context) (*User, bool) {
- // u, ok := ctx.Value(userKey).(*User)
- // return u, ok
- // }
- Value(key interface{}) interface{}
-}
-
-// A CancelFunc tells an operation to abandon its work.
-// A CancelFunc does not wait for the work to stop.
-// After the first call, subsequent calls to a CancelFunc do nothing.
-type CancelFunc func()
diff --git a/vendor/golang.org/x/net/html/atom/table.go b/vendor/golang.org/x/net/html/atom/table.go
index 2a938864cb..b460e6f722 100644
--- a/vendor/golang.org/x/net/html/atom/table.go
+++ b/vendor/golang.org/x/net/html/atom/table.go
@@ -11,23 +11,23 @@ const (
AcceptCharset Atom = 0x1a0e
Accesskey Atom = 0x2c09
Acronym Atom = 0xaa07
- Action Atom = 0x27206
- Address Atom = 0x6f307
+ Action Atom = 0x26506
+ Address Atom = 0x6f107
Align Atom = 0xb105
- Allowfullscreen Atom = 0x2080f
+ Allowfullscreen Atom = 0x3280f
Allowpaymentrequest Atom = 0xc113
Allowusermedia Atom = 0xdd0e
Alt Atom = 0xf303
Annotation Atom = 0x1c90a
AnnotationXml Atom = 0x1c90e
- Applet Atom = 0x31906
- Area Atom = 0x35604
- Article Atom = 0x3fc07
+ Applet Atom = 0x30806
+ Area Atom = 0x35004
+ Article Atom = 0x3f607
As Atom = 0x3c02
Aside Atom = 0x10705
Async Atom = 0xff05
Audio Atom = 0x11505
- Autocomplete Atom = 0x2780c
+ Autocomplete Atom = 0x26b0c
Autofocus Atom = 0x12109
Autoplay Atom = 0x13c08
B Atom = 0x101
@@ -43,34 +43,34 @@ const (
Br Atom = 0x202
Button Atom = 0x19106
Canvas Atom = 0x10306
- Caption Atom = 0x23107
- Center Atom = 0x22006
- Challenge Atom = 0x29b09
+ Caption Atom = 0x22407
+ Center Atom = 0x21306
+ Challenge Atom = 0x28e09
Charset Atom = 0x2107
- Checked Atom = 0x47907
+ Checked Atom = 0x5b507
Cite Atom = 0x19c04
- Class Atom = 0x56405
- Code Atom = 0x5c504
+ Class Atom = 0x55805
+ Code Atom = 0x5ee04
Col Atom = 0x1ab03
Colgroup Atom = 0x1ab08
Color Atom = 0x1bf05
Cols Atom = 0x1c404
Colspan Atom = 0x1c407
Command Atom = 0x1d707
- Content Atom = 0x58b07
- Contenteditable Atom = 0x58b0f
- Contextmenu Atom = 0x3800b
+ Content Atom = 0x57b07
+ Contenteditable Atom = 0x57b0f
+ Contextmenu Atom = 0x37a0b
Controls Atom = 0x1de08
- Coords Atom = 0x1ea06
- Crossorigin Atom = 0x1fb0b
- Data Atom = 0x4a504
- Datalist Atom = 0x4a508
- Datetime Atom = 0x2b808
- Dd Atom = 0x2d702
+ Coords Atom = 0x1f006
+ Crossorigin Atom = 0x1fa0b
+ Data Atom = 0x49904
+ Datalist Atom = 0x49908
+ Datetime Atom = 0x2ab08
+ Dd Atom = 0x2bf02
Default Atom = 0x10a07
- Defer Atom = 0x5c705
- Del Atom = 0x45203
- Desc Atom = 0x56104
+ Defer Atom = 0x5f005
+ Del Atom = 0x44c03
+ Desc Atom = 0x55504
Details Atom = 0x7207
Dfn Atom = 0x8703
Dialog Atom = 0xbb06
@@ -78,106 +78,106 @@ const (
Dirname Atom = 0x9307
Disabled Atom = 0x16408
Div Atom = 0x16b03
- Dl Atom = 0x5e602
- Download Atom = 0x46308
+ Dl Atom = 0x5d602
+ Download Atom = 0x45d08
Draggable Atom = 0x17a09
- Dropzone Atom = 0x40508
- Dt Atom = 0x64b02
+ Dropzone Atom = 0x3ff08
+ Dt Atom = 0x64002
Em Atom = 0x6e02
Embed Atom = 0x6e05
- Enctype Atom = 0x28d07
- Face Atom = 0x21e04
- Fieldset Atom = 0x22608
- Figcaption Atom = 0x22e0a
- Figure Atom = 0x24806
+ Enctype Atom = 0x28007
+ Face Atom = 0x21104
+ Fieldset Atom = 0x21908
+ Figcaption Atom = 0x2210a
+ Figure Atom = 0x23b06
Font Atom = 0x3f04
Footer Atom = 0xf606
- For Atom = 0x25403
- ForeignObject Atom = 0x2540d
- Foreignobject Atom = 0x2610d
- Form Atom = 0x26e04
- Formaction Atom = 0x26e0a
- Formenctype Atom = 0x2890b
- Formmethod Atom = 0x2a40a
- Formnovalidate Atom = 0x2ae0e
- Formtarget Atom = 0x2c00a
+ For Atom = 0x24703
+ ForeignObject Atom = 0x2470d
+ Foreignobject Atom = 0x2540d
+ Form Atom = 0x26104
+ Formaction Atom = 0x2610a
+ Formenctype Atom = 0x27c0b
+ Formmethod Atom = 0x2970a
+ Formnovalidate Atom = 0x2a10e
+ Formtarget Atom = 0x2b30a
Frame Atom = 0x8b05
Frameset Atom = 0x8b08
H1 Atom = 0x15c02
- H2 Atom = 0x2de02
- H3 Atom = 0x30d02
- H4 Atom = 0x34502
- H5 Atom = 0x34f02
- H6 Atom = 0x64d02
- Head Atom = 0x33104
- Header Atom = 0x33106
- Headers Atom = 0x33107
+ H2 Atom = 0x56102
+ H3 Atom = 0x2cd02
+ H4 Atom = 0x2fc02
+ H5 Atom = 0x33f02
+ H6 Atom = 0x34902
+ Head Atom = 0x32004
+ Header Atom = 0x32006
+ Headers Atom = 0x32007
Height Atom = 0x5206
- Hgroup Atom = 0x2ca06
- Hidden Atom = 0x2d506
- High Atom = 0x2db04
+ Hgroup Atom = 0x64206
+ Hidden Atom = 0x2bd06
+ High Atom = 0x2ca04
Hr Atom = 0x15702
- Href Atom = 0x2e004
- Hreflang Atom = 0x2e008
+ Href Atom = 0x2cf04
+ Hreflang Atom = 0x2cf08
Html Atom = 0x5604
- HttpEquiv Atom = 0x2e80a
+ HttpEquiv Atom = 0x2d70a
I Atom = 0x601
- Icon Atom = 0x58a04
+ Icon Atom = 0x57a04
Id Atom = 0x10902
- Iframe Atom = 0x2fc06
- Image Atom = 0x30205
- Img Atom = 0x30703
- Input Atom = 0x44b05
- Inputmode Atom = 0x44b09
- Ins Atom = 0x20403
- Integrity Atom = 0x23f09
+ Iframe Atom = 0x2eb06
+ Image Atom = 0x2f105
+ Img Atom = 0x2f603
+ Input Atom = 0x44505
+ Inputmode Atom = 0x44509
+ Ins Atom = 0x20303
+ Integrity Atom = 0x23209
Is Atom = 0x16502
- Isindex Atom = 0x30f07
- Ismap Atom = 0x31605
- Itemid Atom = 0x38b06
+ Isindex Atom = 0x2fe07
+ Ismap Atom = 0x30505
+ Itemid Atom = 0x38506
Itemprop Atom = 0x19d08
- Itemref Atom = 0x3cd07
- Itemscope Atom = 0x67109
- Itemtype Atom = 0x31f08
+ Itemref Atom = 0x3c707
+ Itemscope Atom = 0x66f09
+ Itemtype Atom = 0x30e08
Kbd Atom = 0xb903
Keygen Atom = 0x3206
Keytype Atom = 0xd607
Kind Atom = 0x17704
Label Atom = 0x5905
- Lang Atom = 0x2e404
+ Lang Atom = 0x2d304
Legend Atom = 0x18106
Li Atom = 0xb202
Link Atom = 0x17404
- List Atom = 0x4a904
- Listing Atom = 0x4a907
+ List Atom = 0x49d04
+ Listing Atom = 0x49d07
Loop Atom = 0x5d04
Low Atom = 0xc303
Main Atom = 0x1004
Malignmark Atom = 0xb00a
- Manifest Atom = 0x6d708
- Map Atom = 0x31803
+ Manifest Atom = 0x6d508
+ Map Atom = 0x30703
Mark Atom = 0xb604
- Marquee Atom = 0x32707
- Math Atom = 0x32e04
- Max Atom = 0x33d03
- Maxlength Atom = 0x33d09
+ Marquee Atom = 0x31607
+ Math Atom = 0x31d04
+ Max Atom = 0x33703
+ Maxlength Atom = 0x33709
Media Atom = 0xe605
Mediagroup Atom = 0xe60a
- Menu Atom = 0x38704
- Menuitem Atom = 0x38708
- Meta Atom = 0x4b804
+ Menu Atom = 0x38104
+ Menuitem Atom = 0x38108
+ Meta Atom = 0x4ac04
Meter Atom = 0x9805
- Method Atom = 0x2a806
- Mglyph Atom = 0x30806
- Mi Atom = 0x34702
- Min Atom = 0x34703
- Minlength Atom = 0x34709
- Mn Atom = 0x2b102
+ Method Atom = 0x29b06
+ Mglyph Atom = 0x2f706
+ Mi Atom = 0x34102
+ Min Atom = 0x34103
+ Minlength Atom = 0x34109
+ Mn Atom = 0x2a402
Mo Atom = 0xa402
- Ms Atom = 0x67402
- Mtext Atom = 0x35105
- Multiple Atom = 0x35f08
- Muted Atom = 0x36705
+ Ms Atom = 0x67202
+ Mtext Atom = 0x34b05
+ Multiple Atom = 0x35908
+ Muted Atom = 0x36105
Name Atom = 0x9604
Nav Atom = 0x1303
Nobr Atom = 0x3704
@@ -185,101 +185,101 @@ const (
Noframes Atom = 0x8908
Nomodule Atom = 0xa208
Nonce Atom = 0x1a605
- Noscript Atom = 0x21608
- Novalidate Atom = 0x2b20a
- Object Atom = 0x26806
+ Noscript Atom = 0x2c208
+ Novalidate Atom = 0x2a50a
+ Object Atom = 0x25b06
Ol Atom = 0x13702
Onabort Atom = 0x19507
- Onafterprint Atom = 0x2360c
- Onautocomplete Atom = 0x2760e
- Onautocompleteerror Atom = 0x27613
- Onauxclick Atom = 0x61f0a
- Onbeforeprint Atom = 0x69e0d
- Onbeforeunload Atom = 0x6e70e
- Onblur Atom = 0x56d06
+ Onafterprint Atom = 0x2290c
+ Onautocomplete Atom = 0x2690e
+ Onautocompleteerror Atom = 0x26913
+ Onauxclick Atom = 0x6140a
+ Onbeforeprint Atom = 0x69c0d
+ Onbeforeunload Atom = 0x6e50e
+ Onblur Atom = 0x1ea06
Oncancel Atom = 0x11908
Oncanplay Atom = 0x14d09
Oncanplaythrough Atom = 0x14d10
- Onchange Atom = 0x41b08
- Onclick Atom = 0x2f507
- Onclose Atom = 0x36c07
- Oncontextmenu Atom = 0x37e0d
- Oncopy Atom = 0x39106
- Oncuechange Atom = 0x3970b
- Oncut Atom = 0x3a205
- Ondblclick Atom = 0x3a70a
- Ondrag Atom = 0x3b106
- Ondragend Atom = 0x3b109
- Ondragenter Atom = 0x3ba0b
- Ondragexit Atom = 0x3c50a
- Ondragleave Atom = 0x3df0b
- Ondragover Atom = 0x3ea0a
- Ondragstart Atom = 0x3f40b
- Ondrop Atom = 0x40306
- Ondurationchange Atom = 0x41310
- Onemptied Atom = 0x40a09
- Onended Atom = 0x42307
- Onerror Atom = 0x42a07
- Onfocus Atom = 0x43107
- Onhashchange Atom = 0x43d0c
- Oninput Atom = 0x44907
- Oninvalid Atom = 0x45509
- Onkeydown Atom = 0x45e09
- Onkeypress Atom = 0x46b0a
- Onkeyup Atom = 0x48007
- Onlanguagechange Atom = 0x48d10
- Onload Atom = 0x49d06
- Onloadeddata Atom = 0x49d0c
- Onloadedmetadata Atom = 0x4b010
- Onloadend Atom = 0x4c609
- Onloadstart Atom = 0x4cf0b
- Onmessage Atom = 0x4da09
- Onmessageerror Atom = 0x4da0e
- Onmousedown Atom = 0x4e80b
- Onmouseenter Atom = 0x4f30c
- Onmouseleave Atom = 0x4ff0c
- Onmousemove Atom = 0x50b0b
- Onmouseout Atom = 0x5160a
- Onmouseover Atom = 0x5230b
- Onmouseup Atom = 0x52e09
- Onmousewheel Atom = 0x53c0c
- Onoffline Atom = 0x54809
- Ononline Atom = 0x55108
- Onpagehide Atom = 0x5590a
- Onpageshow Atom = 0x5730a
- Onpaste Atom = 0x57f07
- Onpause Atom = 0x59a07
- Onplay Atom = 0x5a406
- Onplaying Atom = 0x5a409
- Onpopstate Atom = 0x5ad0a
- Onprogress Atom = 0x5b70a
- Onratechange Atom = 0x5cc0c
- Onrejectionhandled Atom = 0x5d812
- Onreset Atom = 0x5ea07
- Onresize Atom = 0x5f108
- Onscroll Atom = 0x60008
- Onsecuritypolicyviolation Atom = 0x60819
- Onseeked Atom = 0x62908
- Onseeking Atom = 0x63109
- Onselect Atom = 0x63a08
- Onshow Atom = 0x64406
- Onsort Atom = 0x64f06
- Onstalled Atom = 0x65909
- Onstorage Atom = 0x66209
- Onsubmit Atom = 0x66b08
- Onsuspend Atom = 0x67b09
+ Onchange Atom = 0x41508
+ Onclick Atom = 0x2e407
+ Onclose Atom = 0x36607
+ Oncontextmenu Atom = 0x3780d
+ Oncopy Atom = 0x38b06
+ Oncuechange Atom = 0x3910b
+ Oncut Atom = 0x39c05
+ Ondblclick Atom = 0x3a10a
+ Ondrag Atom = 0x3ab06
+ Ondragend Atom = 0x3ab09
+ Ondragenter Atom = 0x3b40b
+ Ondragexit Atom = 0x3bf0a
+ Ondragleave Atom = 0x3d90b
+ Ondragover Atom = 0x3e40a
+ Ondragstart Atom = 0x3ee0b
+ Ondrop Atom = 0x3fd06
+ Ondurationchange Atom = 0x40d10
+ Onemptied Atom = 0x40409
+ Onended Atom = 0x41d07
+ Onerror Atom = 0x42407
+ Onfocus Atom = 0x42b07
+ Onhashchange Atom = 0x4370c
+ Oninput Atom = 0x44307
+ Oninvalid Atom = 0x44f09
+ Onkeydown Atom = 0x45809
+ Onkeypress Atom = 0x4650a
+ Onkeyup Atom = 0x47407
+ Onlanguagechange Atom = 0x48110
+ Onload Atom = 0x49106
+ Onloadeddata Atom = 0x4910c
+ Onloadedmetadata Atom = 0x4a410
+ Onloadend Atom = 0x4ba09
+ Onloadstart Atom = 0x4c30b
+ Onmessage Atom = 0x4ce09
+ Onmessageerror Atom = 0x4ce0e
+ Onmousedown Atom = 0x4dc0b
+ Onmouseenter Atom = 0x4e70c
+ Onmouseleave Atom = 0x4f30c
+ Onmousemove Atom = 0x4ff0b
+ Onmouseout Atom = 0x50a0a
+ Onmouseover Atom = 0x5170b
+ Onmouseup Atom = 0x52209
+ Onmousewheel Atom = 0x5300c
+ Onoffline Atom = 0x53c09
+ Ononline Atom = 0x54508
+ Onpagehide Atom = 0x54d0a
+ Onpageshow Atom = 0x5630a
+ Onpaste Atom = 0x56f07
+ Onpause Atom = 0x58a07
+ Onplay Atom = 0x59406
+ Onplaying Atom = 0x59409
+ Onpopstate Atom = 0x59d0a
+ Onprogress Atom = 0x5a70a
+ Onratechange Atom = 0x5bc0c
+ Onrejectionhandled Atom = 0x5c812
+ Onreset Atom = 0x5da07
+ Onresize Atom = 0x5e108
+ Onscroll Atom = 0x5f508
+ Onsecuritypolicyviolation Atom = 0x5fd19
+ Onseeked Atom = 0x61e08
+ Onseeking Atom = 0x62609
+ Onselect Atom = 0x62f08
+ Onshow Atom = 0x63906
+ Onsort Atom = 0x64d06
+ Onstalled Atom = 0x65709
+ Onstorage Atom = 0x66009
+ Onsubmit Atom = 0x66908
+ Onsuspend Atom = 0x67909
Ontimeupdate Atom = 0x400c
- Ontoggle Atom = 0x68408
- Onunhandledrejection Atom = 0x68c14
- Onunload Atom = 0x6ab08
- Onvolumechange Atom = 0x6b30e
- Onwaiting Atom = 0x6c109
- Onwheel Atom = 0x6ca07
+ Ontoggle Atom = 0x68208
+ Onunhandledrejection Atom = 0x68a14
+ Onunload Atom = 0x6a908
+ Onvolumechange Atom = 0x6b10e
+ Onwaiting Atom = 0x6bf09
+ Onwheel Atom = 0x6c807
Open Atom = 0x1a304
Optgroup Atom = 0x5f08
- Optimum Atom = 0x6d107
- Option Atom = 0x6e306
- Output Atom = 0x51d06
+ Optimum Atom = 0x6cf07
+ Option Atom = 0x6e106
+ Output Atom = 0x51106
P Atom = 0xc01
Param Atom = 0xc05
Pattern Atom = 0x6607
@@ -288,466 +288,468 @@ const (
Placeholder Atom = 0x1310b
Plaintext Atom = 0x1b209
Playsinline Atom = 0x1400b
- Poster Atom = 0x2cf06
- Pre Atom = 0x47003
- Preload Atom = 0x48607
- Progress Atom = 0x5b908
- Prompt Atom = 0x53606
- Public Atom = 0x58606
+ Poster Atom = 0x64706
+ Pre Atom = 0x46a03
+ Preload Atom = 0x47a07
+ Progress Atom = 0x5a908
+ Prompt Atom = 0x52a06
+ Public Atom = 0x57606
Q Atom = 0xcf01
Radiogroup Atom = 0x30a
Rb Atom = 0x3a02
- Readonly Atom = 0x35708
- Referrerpolicy Atom = 0x3d10e
- Rel Atom = 0x48703
- Required Atom = 0x24c08
+ Readonly Atom = 0x35108
+ Referrerpolicy Atom = 0x3cb0e
+ Rel Atom = 0x47b03
+ Required Atom = 0x23f08
Reversed Atom = 0x8008
Rows Atom = 0x9c04
Rowspan Atom = 0x9c07
- Rp Atom = 0x23c02
+ Rp Atom = 0x22f02
Rt Atom = 0x19a02
Rtc Atom = 0x19a03
Ruby Atom = 0xfb04
S Atom = 0x2501
Samp Atom = 0x7804
Sandbox Atom = 0x12907
- Scope Atom = 0x67505
- Scoped Atom = 0x67506
- Script Atom = 0x21806
- Seamless Atom = 0x37108
- Section Atom = 0x56807
- Select Atom = 0x63c06
- Selected Atom = 0x63c08
- Shape Atom = 0x1e505
- Size Atom = 0x5f504
- Sizes Atom = 0x5f505
- Slot Atom = 0x1ef04
- Small Atom = 0x20605
- Sortable Atom = 0x65108
- Sorted Atom = 0x33706
- Source Atom = 0x37806
- Spacer Atom = 0x43706
+ Scope Atom = 0x67305
+ Scoped Atom = 0x67306
+ Script Atom = 0x2c406
+ Seamless Atom = 0x36b08
+ Search Atom = 0x55c06
+ Section Atom = 0x1e507
+ Select Atom = 0x63106
+ Selected Atom = 0x63108
+ Shape Atom = 0x1f505
+ Size Atom = 0x5e504
+ Sizes Atom = 0x5e505
+ Slot Atom = 0x20504
+ Small Atom = 0x32605
+ Sortable Atom = 0x64f08
+ Sorted Atom = 0x37206
+ Source Atom = 0x43106
+ Spacer Atom = 0x46e06
Span Atom = 0x9f04
- Spellcheck Atom = 0x4740a
- Src Atom = 0x5c003
- Srcdoc Atom = 0x5c006
- Srclang Atom = 0x5f907
- Srcset Atom = 0x6f906
- Start Atom = 0x3fa05
- Step Atom = 0x58304
+ Spellcheck Atom = 0x5b00a
+ Src Atom = 0x5e903
+ Srcdoc Atom = 0x5e906
+ Srclang Atom = 0x6f707
+ Srcset Atom = 0x6fe06
+ Start Atom = 0x3f405
+ Step Atom = 0x57304
Strike Atom = 0xd206
- Strong Atom = 0x6dd06
- Style Atom = 0x6ff05
- Sub Atom = 0x66d03
- Summary Atom = 0x70407
- Sup Atom = 0x70b03
- Svg Atom = 0x70e03
- System Atom = 0x71106
- Tabindex Atom = 0x4be08
- Table Atom = 0x59505
- Target Atom = 0x2c406
+ Strong Atom = 0x6db06
+ Style Atom = 0x70405
+ Sub Atom = 0x66b03
+ Summary Atom = 0x70907
+ Sup Atom = 0x71003
+ Svg Atom = 0x71303
+ System Atom = 0x71606
+ Tabindex Atom = 0x4b208
+ Table Atom = 0x58505
+ Target Atom = 0x2b706
Tbody Atom = 0x2705
Td Atom = 0x9202
- Template Atom = 0x71408
- Textarea Atom = 0x35208
+ Template Atom = 0x71908
+ Textarea Atom = 0x34c08
Tfoot Atom = 0xf505
Th Atom = 0x15602
- Thead Atom = 0x33005
+ Thead Atom = 0x31f05
Time Atom = 0x4204
Title Atom = 0x11005
Tr Atom = 0xcc02
Track Atom = 0x1ba05
- Translate Atom = 0x1f209
+ Translate Atom = 0x20809
Tt Atom = 0x6802
Type Atom = 0xd904
- Typemustmatch Atom = 0x2900d
+ Typemustmatch Atom = 0x2830d
U Atom = 0xb01
Ul Atom = 0xa702
Updateviacache Atom = 0x460e
- Usemap Atom = 0x59e06
+ Usemap Atom = 0x58e06
Value Atom = 0x1505
Var Atom = 0x16d03
- Video Atom = 0x2f105
- Wbr Atom = 0x57c03
- Width Atom = 0x64905
- Workertype Atom = 0x71c0a
- Wrap Atom = 0x72604
+ Video Atom = 0x2e005
+ Wbr Atom = 0x56c03
+ Width Atom = 0x63e05
+ Workertype Atom = 0x7210a
+ Wrap Atom = 0x72b04
Xmp Atom = 0x12f03
)
-const hash0 = 0x81cdf10e
+const hash0 = 0x84f70e16
const maxAtomLen = 25
var table = [1 << 9]Atom{
- 0x1: 0xe60a, // mediagroup
- 0x2: 0x2e404, // lang
- 0x4: 0x2c09, // accesskey
- 0x5: 0x8b08, // frameset
- 0x7: 0x63a08, // onselect
- 0x8: 0x71106, // system
- 0xa: 0x64905, // width
- 0xc: 0x2890b, // formenctype
- 0xd: 0x13702, // ol
- 0xe: 0x3970b, // oncuechange
- 0x10: 0x14b03, // bdo
- 0x11: 0x11505, // audio
- 0x12: 0x17a09, // draggable
- 0x14: 0x2f105, // video
- 0x15: 0x2b102, // mn
- 0x16: 0x38704, // menu
- 0x17: 0x2cf06, // poster
- 0x19: 0xf606, // footer
- 0x1a: 0x2a806, // method
- 0x1b: 0x2b808, // datetime
- 0x1c: 0x19507, // onabort
- 0x1d: 0x460e, // updateviacache
- 0x1e: 0xff05, // async
- 0x1f: 0x49d06, // onload
- 0x21: 0x11908, // oncancel
- 0x22: 0x62908, // onseeked
- 0x23: 0x30205, // image
- 0x24: 0x5d812, // onrejectionhandled
- 0x26: 0x17404, // link
- 0x27: 0x51d06, // output
- 0x28: 0x33104, // head
- 0x29: 0x4ff0c, // onmouseleave
- 0x2a: 0x57f07, // onpaste
- 0x2b: 0x5a409, // onplaying
- 0x2c: 0x1c407, // colspan
- 0x2f: 0x1bf05, // color
- 0x30: 0x5f504, // size
- 0x31: 0x2e80a, // http-equiv
- 0x33: 0x601, // i
- 0x34: 0x5590a, // onpagehide
- 0x35: 0x68c14, // onunhandledrejection
- 0x37: 0x42a07, // onerror
- 0x3a: 0x3b08, // basefont
- 0x3f: 0x1303, // nav
- 0x40: 0x17704, // kind
- 0x41: 0x35708, // readonly
- 0x42: 0x30806, // mglyph
- 0x44: 0xb202, // li
- 0x46: 0x2d506, // hidden
- 0x47: 0x70e03, // svg
- 0x48: 0x58304, // step
- 0x49: 0x23f09, // integrity
- 0x4a: 0x58606, // public
- 0x4c: 0x1ab03, // col
- 0x4d: 0x1870a, // blockquote
- 0x4e: 0x34f02, // h5
- 0x50: 0x5b908, // progress
- 0x51: 0x5f505, // sizes
- 0x52: 0x34502, // h4
- 0x56: 0x33005, // thead
- 0x57: 0xd607, // keytype
- 0x58: 0x5b70a, // onprogress
- 0x59: 0x44b09, // inputmode
- 0x5a: 0x3b109, // ondragend
- 0x5d: 0x3a205, // oncut
- 0x5e: 0x43706, // spacer
- 0x5f: 0x1ab08, // colgroup
- 0x62: 0x16502, // is
- 0x65: 0x3c02, // as
- 0x66: 0x54809, // onoffline
- 0x67: 0x33706, // sorted
- 0x69: 0x48d10, // onlanguagechange
- 0x6c: 0x43d0c, // onhashchange
- 0x6d: 0x9604, // name
- 0x6e: 0xf505, // tfoot
- 0x6f: 0x56104, // desc
- 0x70: 0x33d03, // max
- 0x72: 0x1ea06, // coords
- 0x73: 0x30d02, // h3
- 0x74: 0x6e70e, // onbeforeunload
- 0x75: 0x9c04, // rows
- 0x76: 0x63c06, // select
- 0x77: 0x9805, // meter
- 0x78: 0x38b06, // itemid
- 0x79: 0x53c0c, // onmousewheel
- 0x7a: 0x5c006, // srcdoc
- 0x7d: 0x1ba05, // track
- 0x7f: 0x31f08, // itemtype
- 0x82: 0xa402, // mo
- 0x83: 0x41b08, // onchange
- 0x84: 0x33107, // headers
- 0x85: 0x5cc0c, // onratechange
- 0x86: 0x60819, // onsecuritypolicyviolation
- 0x88: 0x4a508, // datalist
- 0x89: 0x4e80b, // onmousedown
- 0x8a: 0x1ef04, // slot
- 0x8b: 0x4b010, // onloadedmetadata
- 0x8c: 0x1a06, // accept
- 0x8d: 0x26806, // object
- 0x91: 0x6b30e, // onvolumechange
- 0x92: 0x2107, // charset
- 0x93: 0x27613, // onautocompleteerror
- 0x94: 0xc113, // allowpaymentrequest
- 0x95: 0x2804, // body
- 0x96: 0x10a07, // default
- 0x97: 0x63c08, // selected
- 0x98: 0x21e04, // face
- 0x99: 0x1e505, // shape
- 0x9b: 0x68408, // ontoggle
- 0x9e: 0x64b02, // dt
- 0x9f: 0xb604, // mark
- 0xa1: 0xb01, // u
- 0xa4: 0x6ab08, // onunload
- 0xa5: 0x5d04, // loop
- 0xa6: 0x16408, // disabled
- 0xaa: 0x42307, // onended
- 0xab: 0xb00a, // malignmark
- 0xad: 0x67b09, // onsuspend
- 0xae: 0x35105, // mtext
- 0xaf: 0x64f06, // onsort
- 0xb0: 0x19d08, // itemprop
- 0xb3: 0x67109, // itemscope
- 0xb4: 0x17305, // blink
- 0xb6: 0x3b106, // ondrag
- 0xb7: 0xa702, // ul
- 0xb8: 0x26e04, // form
- 0xb9: 0x12907, // sandbox
- 0xba: 0x8b05, // frame
- 0xbb: 0x1505, // value
- 0xbc: 0x66209, // onstorage
- 0xbf: 0xaa07, // acronym
- 0xc0: 0x19a02, // rt
- 0xc2: 0x202, // br
- 0xc3: 0x22608, // fieldset
- 0xc4: 0x2900d, // typemustmatch
- 0xc5: 0xa208, // nomodule
- 0xc6: 0x6c07, // noembed
- 0xc7: 0x69e0d, // onbeforeprint
- 0xc8: 0x19106, // button
- 0xc9: 0x2f507, // onclick
- 0xca: 0x70407, // summary
- 0xcd: 0xfb04, // ruby
- 0xce: 0x56405, // class
- 0xcf: 0x3f40b, // ondragstart
- 0xd0: 0x23107, // caption
- 0xd4: 0xdd0e, // allowusermedia
- 0xd5: 0x4cf0b, // onloadstart
- 0xd9: 0x16b03, // div
- 0xda: 0x4a904, // list
- 0xdb: 0x32e04, // math
- 0xdc: 0x44b05, // input
- 0xdf: 0x3ea0a, // ondragover
- 0xe0: 0x2de02, // h2
- 0xe2: 0x1b209, // plaintext
- 0xe4: 0x4f30c, // onmouseenter
- 0xe7: 0x47907, // checked
- 0xe8: 0x47003, // pre
- 0xea: 0x35f08, // multiple
- 0xeb: 0xba03, // bdi
- 0xec: 0x33d09, // maxlength
- 0xed: 0xcf01, // q
- 0xee: 0x61f0a, // onauxclick
- 0xf0: 0x57c03, // wbr
- 0xf2: 0x3b04, // base
- 0xf3: 0x6e306, // option
- 0xf5: 0x41310, // ondurationchange
- 0xf7: 0x8908, // noframes
- 0xf9: 0x40508, // dropzone
- 0xfb: 0x67505, // scope
- 0xfc: 0x8008, // reversed
- 0xfd: 0x3ba0b, // ondragenter
- 0xfe: 0x3fa05, // start
- 0xff: 0x12f03, // xmp
- 0x100: 0x5f907, // srclang
- 0x101: 0x30703, // img
- 0x104: 0x101, // b
- 0x105: 0x25403, // for
- 0x106: 0x10705, // aside
- 0x107: 0x44907, // oninput
- 0x108: 0x35604, // area
- 0x109: 0x2a40a, // formmethod
- 0x10a: 0x72604, // wrap
- 0x10c: 0x23c02, // rp
- 0x10d: 0x46b0a, // onkeypress
- 0x10e: 0x6802, // tt
- 0x110: 0x34702, // mi
- 0x111: 0x36705, // muted
- 0x112: 0xf303, // alt
- 0x113: 0x5c504, // code
- 0x114: 0x6e02, // em
- 0x115: 0x3c50a, // ondragexit
- 0x117: 0x9f04, // span
- 0x119: 0x6d708, // manifest
- 0x11a: 0x38708, // menuitem
- 0x11b: 0x58b07, // content
- 0x11d: 0x6c109, // onwaiting
- 0x11f: 0x4c609, // onloadend
- 0x121: 0x37e0d, // oncontextmenu
- 0x123: 0x56d06, // onblur
- 0x124: 0x3fc07, // article
- 0x125: 0x9303, // dir
- 0x126: 0xef04, // ping
- 0x127: 0x24c08, // required
- 0x128: 0x45509, // oninvalid
- 0x129: 0xb105, // align
- 0x12b: 0x58a04, // icon
- 0x12c: 0x64d02, // h6
- 0x12d: 0x1c404, // cols
- 0x12e: 0x22e0a, // figcaption
- 0x12f: 0x45e09, // onkeydown
- 0x130: 0x66b08, // onsubmit
- 0x131: 0x14d09, // oncanplay
- 0x132: 0x70b03, // sup
- 0x133: 0xc01, // p
- 0x135: 0x40a09, // onemptied
- 0x136: 0x39106, // oncopy
- 0x137: 0x19c04, // cite
- 0x138: 0x3a70a, // ondblclick
- 0x13a: 0x50b0b, // onmousemove
- 0x13c: 0x66d03, // sub
- 0x13d: 0x48703, // rel
- 0x13e: 0x5f08, // optgroup
- 0x142: 0x9c07, // rowspan
- 0x143: 0x37806, // source
- 0x144: 0x21608, // noscript
- 0x145: 0x1a304, // open
- 0x146: 0x20403, // ins
- 0x147: 0x2540d, // foreignObject
- 0x148: 0x5ad0a, // onpopstate
- 0x14a: 0x28d07, // enctype
- 0x14b: 0x2760e, // onautocomplete
- 0x14c: 0x35208, // textarea
- 0x14e: 0x2780c, // autocomplete
- 0x14f: 0x15702, // hr
- 0x150: 0x1de08, // controls
- 0x151: 0x10902, // id
- 0x153: 0x2360c, // onafterprint
- 0x155: 0x2610d, // foreignobject
- 0x156: 0x32707, // marquee
- 0x157: 0x59a07, // onpause
- 0x158: 0x5e602, // dl
- 0x159: 0x5206, // height
- 0x15a: 0x34703, // min
- 0x15b: 0x9307, // dirname
- 0x15c: 0x1f209, // translate
- 0x15d: 0x5604, // html
- 0x15e: 0x34709, // minlength
- 0x15f: 0x48607, // preload
- 0x160: 0x71408, // template
- 0x161: 0x3df0b, // ondragleave
- 0x162: 0x3a02, // rb
- 0x164: 0x5c003, // src
- 0x165: 0x6dd06, // strong
- 0x167: 0x7804, // samp
- 0x168: 0x6f307, // address
- 0x169: 0x55108, // ononline
- 0x16b: 0x1310b, // placeholder
- 0x16c: 0x2c406, // target
- 0x16d: 0x20605, // small
- 0x16e: 0x6ca07, // onwheel
- 0x16f: 0x1c90a, // annotation
- 0x170: 0x4740a, // spellcheck
- 0x171: 0x7207, // details
- 0x172: 0x10306, // canvas
- 0x173: 0x12109, // autofocus
- 0x174: 0xc05, // param
- 0x176: 0x46308, // download
- 0x177: 0x45203, // del
- 0x178: 0x36c07, // onclose
- 0x179: 0xb903, // kbd
- 0x17a: 0x31906, // applet
- 0x17b: 0x2e004, // href
- 0x17c: 0x5f108, // onresize
- 0x17e: 0x49d0c, // onloadeddata
- 0x180: 0xcc02, // tr
- 0x181: 0x2c00a, // formtarget
- 0x182: 0x11005, // title
- 0x183: 0x6ff05, // style
- 0x184: 0xd206, // strike
- 0x185: 0x59e06, // usemap
- 0x186: 0x2fc06, // iframe
- 0x187: 0x1004, // main
- 0x189: 0x7b07, // picture
- 0x18c: 0x31605, // ismap
- 0x18e: 0x4a504, // data
- 0x18f: 0x5905, // label
- 0x191: 0x3d10e, // referrerpolicy
- 0x192: 0x15602, // th
- 0x194: 0x53606, // prompt
- 0x195: 0x56807, // section
- 0x197: 0x6d107, // optimum
- 0x198: 0x2db04, // high
- 0x199: 0x15c02, // h1
- 0x19a: 0x65909, // onstalled
- 0x19b: 0x16d03, // var
- 0x19c: 0x4204, // time
- 0x19e: 0x67402, // ms
- 0x19f: 0x33106, // header
- 0x1a0: 0x4da09, // onmessage
- 0x1a1: 0x1a605, // nonce
- 0x1a2: 0x26e0a, // formaction
- 0x1a3: 0x22006, // center
- 0x1a4: 0x3704, // nobr
- 0x1a5: 0x59505, // table
- 0x1a6: 0x4a907, // listing
- 0x1a7: 0x18106, // legend
- 0x1a9: 0x29b09, // challenge
- 0x1aa: 0x24806, // figure
- 0x1ab: 0xe605, // media
- 0x1ae: 0xd904, // type
- 0x1af: 0x3f04, // font
- 0x1b0: 0x4da0e, // onmessageerror
- 0x1b1: 0x37108, // seamless
- 0x1b2: 0x8703, // dfn
- 0x1b3: 0x5c705, // defer
- 0x1b4: 0xc303, // low
- 0x1b5: 0x19a03, // rtc
- 0x1b6: 0x5230b, // onmouseover
- 0x1b7: 0x2b20a, // novalidate
- 0x1b8: 0x71c0a, // workertype
- 0x1ba: 0x3cd07, // itemref
- 0x1bd: 0x1, // a
- 0x1be: 0x31803, // map
- 0x1bf: 0x400c, // ontimeupdate
- 0x1c0: 0x15e07, // bgsound
- 0x1c1: 0x3206, // keygen
- 0x1c2: 0x2705, // tbody
- 0x1c5: 0x64406, // onshow
- 0x1c7: 0x2501, // s
- 0x1c8: 0x6607, // pattern
- 0x1cc: 0x14d10, // oncanplaythrough
- 0x1ce: 0x2d702, // dd
- 0x1cf: 0x6f906, // srcset
- 0x1d0: 0x17003, // big
- 0x1d2: 0x65108, // sortable
- 0x1d3: 0x48007, // onkeyup
- 0x1d5: 0x5a406, // onplay
- 0x1d7: 0x4b804, // meta
- 0x1d8: 0x40306, // ondrop
- 0x1da: 0x60008, // onscroll
- 0x1db: 0x1fb0b, // crossorigin
- 0x1dc: 0x5730a, // onpageshow
- 0x1dd: 0x4, // abbr
- 0x1de: 0x9202, // td
- 0x1df: 0x58b0f, // contenteditable
- 0x1e0: 0x27206, // action
- 0x1e1: 0x1400b, // playsinline
- 0x1e2: 0x43107, // onfocus
- 0x1e3: 0x2e008, // hreflang
- 0x1e5: 0x5160a, // onmouseout
- 0x1e6: 0x5ea07, // onreset
- 0x1e7: 0x13c08, // autoplay
- 0x1e8: 0x63109, // onseeking
- 0x1ea: 0x67506, // scoped
- 0x1ec: 0x30a, // radiogroup
- 0x1ee: 0x3800b, // contextmenu
- 0x1ef: 0x52e09, // onmouseup
- 0x1f1: 0x2ca06, // hgroup
- 0x1f2: 0x2080f, // allowfullscreen
- 0x1f3: 0x4be08, // tabindex
- 0x1f6: 0x30f07, // isindex
- 0x1f7: 0x1a0e, // accept-charset
- 0x1f8: 0x2ae0e, // formnovalidate
- 0x1fb: 0x1c90e, // annotation-xml
- 0x1fc: 0x6e05, // embed
- 0x1fd: 0x21806, // script
- 0x1fe: 0xbb06, // dialog
- 0x1ff: 0x1d707, // command
+ 0x1: 0x3ff08, // dropzone
+ 0x2: 0x3b08, // basefont
+ 0x3: 0x23209, // integrity
+ 0x4: 0x43106, // source
+ 0x5: 0x2c09, // accesskey
+ 0x6: 0x1a06, // accept
+ 0x7: 0x6c807, // onwheel
+ 0xb: 0x47407, // onkeyup
+ 0xc: 0x32007, // headers
+ 0xd: 0x67306, // scoped
+ 0xe: 0x67909, // onsuspend
+ 0xf: 0x8908, // noframes
+ 0x10: 0x1fa0b, // crossorigin
+ 0x11: 0x2e407, // onclick
+ 0x12: 0x3f405, // start
+ 0x13: 0x37a0b, // contextmenu
+ 0x14: 0x5e903, // src
+ 0x15: 0x1c404, // cols
+ 0x16: 0xbb06, // dialog
+ 0x17: 0x47a07, // preload
+ 0x18: 0x3c707, // itemref
+ 0x1b: 0x2f105, // image
+ 0x1d: 0x4ba09, // onloadend
+ 0x1e: 0x45d08, // download
+ 0x1f: 0x46a03, // pre
+ 0x23: 0x2970a, // formmethod
+ 0x24: 0x71303, // svg
+ 0x25: 0xcf01, // q
+ 0x26: 0x64002, // dt
+ 0x27: 0x1de08, // controls
+ 0x2a: 0x2804, // body
+ 0x2b: 0xd206, // strike
+ 0x2c: 0x3910b, // oncuechange
+ 0x2d: 0x4c30b, // onloadstart
+ 0x2e: 0x2fe07, // isindex
+ 0x2f: 0xb202, // li
+ 0x30: 0x1400b, // playsinline
+ 0x31: 0x34102, // mi
+ 0x32: 0x30806, // applet
+ 0x33: 0x4ce09, // onmessage
+ 0x35: 0x13702, // ol
+ 0x36: 0x1a304, // open
+ 0x39: 0x14d09, // oncanplay
+ 0x3a: 0x6bf09, // onwaiting
+ 0x3b: 0x11908, // oncancel
+ 0x3c: 0x6a908, // onunload
+ 0x3e: 0x53c09, // onoffline
+ 0x3f: 0x1a0e, // accept-charset
+ 0x40: 0x32004, // head
+ 0x42: 0x3ab09, // ondragend
+ 0x43: 0x1310b, // placeholder
+ 0x44: 0x2b30a, // formtarget
+ 0x45: 0x2540d, // foreignobject
+ 0x47: 0x400c, // ontimeupdate
+ 0x48: 0xdd0e, // allowusermedia
+ 0x4a: 0x69c0d, // onbeforeprint
+ 0x4b: 0x5604, // html
+ 0x4c: 0x9f04, // span
+ 0x4d: 0x64206, // hgroup
+ 0x4e: 0x16408, // disabled
+ 0x4f: 0x4204, // time
+ 0x51: 0x42b07, // onfocus
+ 0x53: 0xb00a, // malignmark
+ 0x55: 0x4650a, // onkeypress
+ 0x56: 0x55805, // class
+ 0x57: 0x1ab08, // colgroup
+ 0x58: 0x33709, // maxlength
+ 0x59: 0x5a908, // progress
+ 0x5b: 0x70405, // style
+ 0x5c: 0x2a10e, // formnovalidate
+ 0x5e: 0x38b06, // oncopy
+ 0x60: 0x26104, // form
+ 0x61: 0xf606, // footer
+ 0x64: 0x30a, // radiogroup
+ 0x66: 0xfb04, // ruby
+ 0x67: 0x4ff0b, // onmousemove
+ 0x68: 0x19d08, // itemprop
+ 0x69: 0x2d70a, // http-equiv
+ 0x6a: 0x15602, // th
+ 0x6c: 0x6e02, // em
+ 0x6d: 0x38108, // menuitem
+ 0x6e: 0x63106, // select
+ 0x6f: 0x48110, // onlanguagechange
+ 0x70: 0x31f05, // thead
+ 0x71: 0x15c02, // h1
+ 0x72: 0x5e906, // srcdoc
+ 0x75: 0x9604, // name
+ 0x76: 0x19106, // button
+ 0x77: 0x55504, // desc
+ 0x78: 0x17704, // kind
+ 0x79: 0x1bf05, // color
+ 0x7c: 0x58e06, // usemap
+ 0x7d: 0x30e08, // itemtype
+ 0x7f: 0x6d508, // manifest
+ 0x81: 0x5300c, // onmousewheel
+ 0x82: 0x4dc0b, // onmousedown
+ 0x84: 0xc05, // param
+ 0x85: 0x2e005, // video
+ 0x86: 0x4910c, // onloadeddata
+ 0x87: 0x6f107, // address
+ 0x8c: 0xef04, // ping
+ 0x8d: 0x24703, // for
+ 0x8f: 0x62f08, // onselect
+ 0x90: 0x30703, // map
+ 0x92: 0xc01, // p
+ 0x93: 0x8008, // reversed
+ 0x94: 0x54d0a, // onpagehide
+ 0x95: 0x3206, // keygen
+ 0x96: 0x34109, // minlength
+ 0x97: 0x3e40a, // ondragover
+ 0x98: 0x42407, // onerror
+ 0x9a: 0x2107, // charset
+ 0x9b: 0x29b06, // method
+ 0x9c: 0x101, // b
+ 0x9d: 0x68208, // ontoggle
+ 0x9e: 0x2bd06, // hidden
+ 0xa0: 0x3f607, // article
+ 0xa2: 0x63906, // onshow
+ 0xa3: 0x64d06, // onsort
+ 0xa5: 0x57b0f, // contenteditable
+ 0xa6: 0x66908, // onsubmit
+ 0xa8: 0x44f09, // oninvalid
+ 0xaa: 0x202, // br
+ 0xab: 0x10902, // id
+ 0xac: 0x5d04, // loop
+ 0xad: 0x5630a, // onpageshow
+ 0xb0: 0x2cf04, // href
+ 0xb2: 0x2210a, // figcaption
+ 0xb3: 0x2690e, // onautocomplete
+ 0xb4: 0x49106, // onload
+ 0xb6: 0x9c04, // rows
+ 0xb7: 0x1a605, // nonce
+ 0xb8: 0x68a14, // onunhandledrejection
+ 0xbb: 0x21306, // center
+ 0xbc: 0x59406, // onplay
+ 0xbd: 0x33f02, // h5
+ 0xbe: 0x49d07, // listing
+ 0xbf: 0x57606, // public
+ 0xc2: 0x23b06, // figure
+ 0xc3: 0x57a04, // icon
+ 0xc4: 0x1ab03, // col
+ 0xc5: 0x47b03, // rel
+ 0xc6: 0xe605, // media
+ 0xc7: 0x12109, // autofocus
+ 0xc8: 0x19a02, // rt
+ 0xca: 0x2d304, // lang
+ 0xcc: 0x49908, // datalist
+ 0xce: 0x2eb06, // iframe
+ 0xcf: 0x36105, // muted
+ 0xd0: 0x6140a, // onauxclick
+ 0xd2: 0x3c02, // as
+ 0xd6: 0x3fd06, // ondrop
+ 0xd7: 0x1c90a, // annotation
+ 0xd8: 0x21908, // fieldset
+ 0xdb: 0x2cf08, // hreflang
+ 0xdc: 0x4e70c, // onmouseenter
+ 0xdd: 0x2a402, // mn
+ 0xde: 0xe60a, // mediagroup
+ 0xdf: 0x9805, // meter
+ 0xe0: 0x56c03, // wbr
+ 0xe2: 0x63e05, // width
+ 0xe3: 0x2290c, // onafterprint
+ 0xe4: 0x30505, // ismap
+ 0xe5: 0x1505, // value
+ 0xe7: 0x1303, // nav
+ 0xe8: 0x54508, // ononline
+ 0xe9: 0xb604, // mark
+ 0xea: 0xc303, // low
+ 0xeb: 0x3ee0b, // ondragstart
+ 0xef: 0x12f03, // xmp
+ 0xf0: 0x22407, // caption
+ 0xf1: 0xd904, // type
+ 0xf2: 0x70907, // summary
+ 0xf3: 0x6802, // tt
+ 0xf4: 0x20809, // translate
+ 0xf5: 0x1870a, // blockquote
+ 0xf8: 0x15702, // hr
+ 0xfa: 0x2705, // tbody
+ 0xfc: 0x7b07, // picture
+ 0xfd: 0x5206, // height
+ 0xfe: 0x19c04, // cite
+ 0xff: 0x2501, // s
+ 0x101: 0xff05, // async
+ 0x102: 0x56f07, // onpaste
+ 0x103: 0x19507, // onabort
+ 0x104: 0x2b706, // target
+ 0x105: 0x14b03, // bdo
+ 0x106: 0x1f006, // coords
+ 0x107: 0x5e108, // onresize
+ 0x108: 0x71908, // template
+ 0x10a: 0x3a02, // rb
+ 0x10b: 0x2a50a, // novalidate
+ 0x10c: 0x460e, // updateviacache
+ 0x10d: 0x71003, // sup
+ 0x10e: 0x6c07, // noembed
+ 0x10f: 0x16b03, // div
+ 0x110: 0x6f707, // srclang
+ 0x111: 0x17a09, // draggable
+ 0x112: 0x67305, // scope
+ 0x113: 0x5905, // label
+ 0x114: 0x22f02, // rp
+ 0x115: 0x23f08, // required
+ 0x116: 0x3780d, // oncontextmenu
+ 0x117: 0x5e504, // size
+ 0x118: 0x5b00a, // spellcheck
+ 0x119: 0x3f04, // font
+ 0x11a: 0x9c07, // rowspan
+ 0x11b: 0x10a07, // default
+ 0x11d: 0x44307, // oninput
+ 0x11e: 0x38506, // itemid
+ 0x11f: 0x5ee04, // code
+ 0x120: 0xaa07, // acronym
+ 0x121: 0x3b04, // base
+ 0x125: 0x2470d, // foreignObject
+ 0x126: 0x2ca04, // high
+ 0x127: 0x3cb0e, // referrerpolicy
+ 0x128: 0x33703, // max
+ 0x129: 0x59d0a, // onpopstate
+ 0x12a: 0x2fc02, // h4
+ 0x12b: 0x4ac04, // meta
+ 0x12c: 0x17305, // blink
+ 0x12e: 0x5f508, // onscroll
+ 0x12f: 0x59409, // onplaying
+ 0x130: 0xc113, // allowpaymentrequest
+ 0x131: 0x19a03, // rtc
+ 0x132: 0x72b04, // wrap
+ 0x134: 0x8b08, // frameset
+ 0x135: 0x32605, // small
+ 0x137: 0x32006, // header
+ 0x138: 0x40409, // onemptied
+ 0x139: 0x34902, // h6
+ 0x13a: 0x35908, // multiple
+ 0x13c: 0x52a06, // prompt
+ 0x13f: 0x28e09, // challenge
+ 0x141: 0x4370c, // onhashchange
+ 0x142: 0x57b07, // content
+ 0x143: 0x1c90e, // annotation-xml
+ 0x144: 0x36607, // onclose
+ 0x145: 0x14d10, // oncanplaythrough
+ 0x148: 0x5170b, // onmouseover
+ 0x149: 0x64f08, // sortable
+ 0x14a: 0xa402, // mo
+ 0x14b: 0x2cd02, // h3
+ 0x14c: 0x2c406, // script
+ 0x14d: 0x41d07, // onended
+ 0x14f: 0x64706, // poster
+ 0x150: 0x7210a, // workertype
+ 0x153: 0x1f505, // shape
+ 0x154: 0x4, // abbr
+ 0x155: 0x1, // a
+ 0x156: 0x2bf02, // dd
+ 0x157: 0x71606, // system
+ 0x158: 0x4ce0e, // onmessageerror
+ 0x159: 0x36b08, // seamless
+ 0x15a: 0x2610a, // formaction
+ 0x15b: 0x6e106, // option
+ 0x15c: 0x31d04, // math
+ 0x15d: 0x62609, // onseeking
+ 0x15e: 0x39c05, // oncut
+ 0x15f: 0x44c03, // del
+ 0x160: 0x11005, // title
+ 0x161: 0x11505, // audio
+ 0x162: 0x63108, // selected
+ 0x165: 0x3b40b, // ondragenter
+ 0x166: 0x46e06, // spacer
+ 0x167: 0x4a410, // onloadedmetadata
+ 0x168: 0x44505, // input
+ 0x16a: 0x58505, // table
+ 0x16b: 0x41508, // onchange
+ 0x16e: 0x5f005, // defer
+ 0x171: 0x50a0a, // onmouseout
+ 0x172: 0x20504, // slot
+ 0x175: 0x3704, // nobr
+ 0x177: 0x1d707, // command
+ 0x17a: 0x7207, // details
+ 0x17b: 0x38104, // menu
+ 0x17c: 0xb903, // kbd
+ 0x17d: 0x57304, // step
+ 0x17e: 0x20303, // ins
+ 0x17f: 0x13c08, // autoplay
+ 0x182: 0x34103, // min
+ 0x183: 0x17404, // link
+ 0x185: 0x40d10, // ondurationchange
+ 0x186: 0x9202, // td
+ 0x187: 0x8b05, // frame
+ 0x18a: 0x2ab08, // datetime
+ 0x18b: 0x44509, // inputmode
+ 0x18c: 0x35108, // readonly
+ 0x18d: 0x21104, // face
+ 0x18f: 0x5e505, // sizes
+ 0x191: 0x4b208, // tabindex
+ 0x192: 0x6db06, // strong
+ 0x193: 0xba03, // bdi
+ 0x194: 0x6fe06, // srcset
+ 0x196: 0x67202, // ms
+ 0x197: 0x5b507, // checked
+ 0x198: 0xb105, // align
+ 0x199: 0x1e507, // section
+ 0x19b: 0x6e05, // embed
+ 0x19d: 0x15e07, // bgsound
+ 0x1a2: 0x49d04, // list
+ 0x1a3: 0x61e08, // onseeked
+ 0x1a4: 0x66009, // onstorage
+ 0x1a5: 0x2f603, // img
+ 0x1a6: 0xf505, // tfoot
+ 0x1a9: 0x26913, // onautocompleteerror
+ 0x1aa: 0x5fd19, // onsecuritypolicyviolation
+ 0x1ad: 0x9303, // dir
+ 0x1ae: 0x9307, // dirname
+ 0x1b0: 0x5a70a, // onprogress
+ 0x1b2: 0x65709, // onstalled
+ 0x1b5: 0x66f09, // itemscope
+ 0x1b6: 0x49904, // data
+ 0x1b7: 0x3d90b, // ondragleave
+ 0x1b8: 0x56102, // h2
+ 0x1b9: 0x2f706, // mglyph
+ 0x1ba: 0x16502, // is
+ 0x1bb: 0x6e50e, // onbeforeunload
+ 0x1bc: 0x2830d, // typemustmatch
+ 0x1bd: 0x3ab06, // ondrag
+ 0x1be: 0x5da07, // onreset
+ 0x1c0: 0x51106, // output
+ 0x1c1: 0x12907, // sandbox
+ 0x1c2: 0x1b209, // plaintext
+ 0x1c4: 0x34c08, // textarea
+ 0x1c7: 0xd607, // keytype
+ 0x1c8: 0x34b05, // mtext
+ 0x1c9: 0x6b10e, // onvolumechange
+ 0x1ca: 0x1ea06, // onblur
+ 0x1cb: 0x58a07, // onpause
+ 0x1cd: 0x5bc0c, // onratechange
+ 0x1ce: 0x10705, // aside
+ 0x1cf: 0x6cf07, // optimum
+ 0x1d1: 0x45809, // onkeydown
+ 0x1d2: 0x1c407, // colspan
+ 0x1d3: 0x1004, // main
+ 0x1d4: 0x66b03, // sub
+ 0x1d5: 0x25b06, // object
+ 0x1d6: 0x55c06, // search
+ 0x1d7: 0x37206, // sorted
+ 0x1d8: 0x17003, // big
+ 0x1d9: 0xb01, // u
+ 0x1db: 0x26b0c, // autocomplete
+ 0x1dc: 0xcc02, // tr
+ 0x1dd: 0xf303, // alt
+ 0x1df: 0x7804, // samp
+ 0x1e0: 0x5c812, // onrejectionhandled
+ 0x1e1: 0x4f30c, // onmouseleave
+ 0x1e2: 0x28007, // enctype
+ 0x1e3: 0xa208, // nomodule
+ 0x1e5: 0x3280f, // allowfullscreen
+ 0x1e6: 0x5f08, // optgroup
+ 0x1e8: 0x27c0b, // formenctype
+ 0x1e9: 0x18106, // legend
+ 0x1ea: 0x10306, // canvas
+ 0x1eb: 0x6607, // pattern
+ 0x1ec: 0x2c208, // noscript
+ 0x1ed: 0x601, // i
+ 0x1ee: 0x5d602, // dl
+ 0x1ef: 0xa702, // ul
+ 0x1f2: 0x52209, // onmouseup
+ 0x1f4: 0x1ba05, // track
+ 0x1f7: 0x3a10a, // ondblclick
+ 0x1f8: 0x3bf0a, // ondragexit
+ 0x1fa: 0x8703, // dfn
+ 0x1fc: 0x26506, // action
+ 0x1fd: 0x35004, // area
+ 0x1fe: 0x31607, // marquee
+ 0x1ff: 0x16d03, // var
}
const atomText = "abbradiogrouparamainavalueaccept-charsetbodyaccesskeygenobrb" +
@@ -758,26 +760,26 @@ const atomText = "abbradiogrouparamainavalueaccept-charsetbodyaccesskeygenobrb"
"dboxmplaceholderautoplaysinlinebdoncanplaythrough1bgsoundisa" +
"bledivarbigblinkindraggablegendblockquotebuttonabortcitempro" +
"penoncecolgrouplaintextrackcolorcolspannotation-xmlcommandco" +
- "ntrolshapecoordslotranslatecrossoriginsmallowfullscreenoscri" +
- "ptfacenterfieldsetfigcaptionafterprintegrityfigurequiredfore" +
- "ignObjectforeignobjectformactionautocompleteerrorformenctype" +
- "mustmatchallengeformmethodformnovalidatetimeformtargethgroup" +
- "osterhiddenhigh2hreflanghttp-equivideonclickiframeimageimgly" +
- "ph3isindexismappletitemtypemarqueematheadersortedmaxlength4m" +
- "inlength5mtextareadonlymultiplemutedoncloseamlessourceoncont" +
- "extmenuitemidoncopyoncuechangeoncutondblclickondragendondrag" +
- "enterondragexitemreferrerpolicyondragleaveondragoverondragst" +
- "articleondropzonemptiedondurationchangeonendedonerroronfocus" +
- "paceronhashchangeoninputmodeloninvalidonkeydownloadonkeypres" +
- "spellcheckedonkeyupreloadonlanguagechangeonloadeddatalisting" +
- "onloadedmetadatabindexonloadendonloadstartonmessageerroronmo" +
- "usedownonmouseenteronmouseleaveonmousemoveonmouseoutputonmou" +
- "seoveronmouseupromptonmousewheelonofflineononlineonpagehides" +
- "classectionbluronpageshowbronpastepublicontenteditableonpaus" +
- "emaponplayingonpopstateonprogressrcdocodeferonratechangeonre" +
- "jectionhandledonresetonresizesrclangonscrollonsecuritypolicy" +
- "violationauxclickonseekedonseekingonselectedonshowidth6onsor" +
- "tableonstalledonstorageonsubmitemscopedonsuspendontoggleonun" +
- "handledrejectionbeforeprintonunloadonvolumechangeonwaitingon" +
- "wheeloptimumanifestrongoptionbeforeunloaddressrcsetstylesumm" +
- "arysupsvgsystemplateworkertypewrap"
+ "ntrolsectionblurcoordshapecrossoriginslotranslatefacenterfie" +
+ "ldsetfigcaptionafterprintegrityfigurequiredforeignObjectfore" +
+ "ignobjectformactionautocompleteerrorformenctypemustmatchalle" +
+ "ngeformmethodformnovalidatetimeformtargethiddenoscripthigh3h" +
+ "reflanghttp-equivideonclickiframeimageimglyph4isindexismappl" +
+ "etitemtypemarqueematheadersmallowfullscreenmaxlength5minleng" +
+ "th6mtextareadonlymultiplemutedoncloseamlessortedoncontextmen" +
+ "uitemidoncopyoncuechangeoncutondblclickondragendondragentero" +
+ "ndragexitemreferrerpolicyondragleaveondragoverondragstarticl" +
+ "eondropzonemptiedondurationchangeonendedonerroronfocusourceo" +
+ "nhashchangeoninputmodeloninvalidonkeydownloadonkeypresspacer" +
+ "onkeyupreloadonlanguagechangeonloadeddatalistingonloadedmeta" +
+ "databindexonloadendonloadstartonmessageerroronmousedownonmou" +
+ "seenteronmouseleaveonmousemoveonmouseoutputonmouseoveronmous" +
+ "eupromptonmousewheelonofflineononlineonpagehidesclassearch2o" +
+ "npageshowbronpastepublicontenteditableonpausemaponplayingonp" +
+ "opstateonprogresspellcheckedonratechangeonrejectionhandledon" +
+ "resetonresizesrcdocodeferonscrollonsecuritypolicyviolationau" +
+ "xclickonseekedonseekingonselectedonshowidthgrouposteronsorta" +
+ "bleonstalledonstorageonsubmitemscopedonsuspendontoggleonunha" +
+ "ndledrejectionbeforeprintonunloadonvolumechangeonwaitingonwh" +
+ "eeloptimumanifestrongoptionbeforeunloaddressrclangsrcsetstyl" +
+ "esummarysupsvgsystemplateworkertypewrap"
diff --git a/vendor/golang.org/x/net/html/doc.go b/vendor/golang.org/x/net/html/doc.go
index 3a7e5ab176..885c4c5936 100644
--- a/vendor/golang.org/x/net/html/doc.go
+++ b/vendor/golang.org/x/net/html/doc.go
@@ -78,16 +78,11 @@ example, to process each anchor node in depth-first order:
if err != nil {
// ...
}
- var f func(*html.Node)
- f = func(n *html.Node) {
+ for n := range doc.Descendants() {
if n.Type == html.ElementNode && n.Data == "a" {
// Do something with n...
}
- for c := n.FirstChild; c != nil; c = c.NextSibling {
- f(c)
- }
}
- f(doc)
The relevant specifications include:
https://html.spec.whatwg.org/multipage/syntax.html and
diff --git a/vendor/golang.org/x/net/html/doctype.go b/vendor/golang.org/x/net/html/doctype.go
index c484e5a94f..bca3ae9a0c 100644
--- a/vendor/golang.org/x/net/html/doctype.go
+++ b/vendor/golang.org/x/net/html/doctype.go
@@ -87,7 +87,7 @@ func parseDoctype(s string) (n *Node, quirks bool) {
}
}
if lastAttr := n.Attr[len(n.Attr)-1]; lastAttr.Key == "system" &&
- strings.ToLower(lastAttr.Val) == "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd" {
+ strings.EqualFold(lastAttr.Val, "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd") {
quirks = true
}
}
diff --git a/vendor/golang.org/x/net/html/foreign.go b/vendor/golang.org/x/net/html/foreign.go
index 9da9e9dc42..e8515d8e88 100644
--- a/vendor/golang.org/x/net/html/foreign.go
+++ b/vendor/golang.org/x/net/html/foreign.go
@@ -40,8 +40,7 @@ func htmlIntegrationPoint(n *Node) bool {
if n.Data == "annotation-xml" {
for _, a := range n.Attr {
if a.Key == "encoding" {
- val := strings.ToLower(a.Val)
- if val == "text/html" || val == "application/xhtml+xml" {
+ if strings.EqualFold(a.Val, "text/html") || strings.EqualFold(a.Val, "application/xhtml+xml") {
return true
}
}
diff --git a/vendor/golang.org/x/net/html/iter.go b/vendor/golang.org/x/net/html/iter.go
new file mode 100644
index 0000000000..54be8fd30f
--- /dev/null
+++ b/vendor/golang.org/x/net/html/iter.go
@@ -0,0 +1,56 @@
+// Copyright 2024 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build go1.23
+
+package html
+
+import "iter"
+
+// Ancestors returns an iterator over the ancestors of n, starting with n.Parent.
+//
+// Mutating a Node or its parents while iterating may have unexpected results.
+func (n *Node) Ancestors() iter.Seq[*Node] {
+ _ = n.Parent // eager nil check
+
+ return func(yield func(*Node) bool) {
+ for p := n.Parent; p != nil && yield(p); p = p.Parent {
+ }
+ }
+}
+
+// ChildNodes returns an iterator over the immediate children of n,
+// starting with n.FirstChild.
+//
+// Mutating a Node or its children while iterating may have unexpected results.
+func (n *Node) ChildNodes() iter.Seq[*Node] {
+ _ = n.FirstChild // eager nil check
+
+ return func(yield func(*Node) bool) {
+ for c := n.FirstChild; c != nil && yield(c); c = c.NextSibling {
+ }
+ }
+
+}
+
+// Descendants returns an iterator over all nodes recursively beneath
+// n, excluding n itself. Nodes are visited in depth-first preorder.
+//
+// Mutating a Node or its descendants while iterating may have unexpected results.
+func (n *Node) Descendants() iter.Seq[*Node] {
+ _ = n.FirstChild // eager nil check
+
+ return func(yield func(*Node) bool) {
+ n.descendants(yield)
+ }
+}
+
+func (n *Node) descendants(yield func(*Node) bool) bool {
+ for c := range n.ChildNodes() {
+ if !yield(c) || !c.descendants(yield) {
+ return false
+ }
+ }
+ return true
+}
diff --git a/vendor/golang.org/x/net/html/node.go b/vendor/golang.org/x/net/html/node.go
index 1350eef22c..77741a1950 100644
--- a/vendor/golang.org/x/net/html/node.go
+++ b/vendor/golang.org/x/net/html/node.go
@@ -38,6 +38,10 @@ var scopeMarker = Node{Type: scopeMarkerNode}
// that it looks like "a".
- if z.err == nil && z.buf[z.raw.end-2] == '/' {
+ // Look for a self-closing token (e.g.
).
+ //
+ // Originally, we did this by just checking that the last character of the
+ // tag (ignoring the closing bracket) was a solidus (/) character, but this
+ // is not always accurate.
+ //
+ // We need to be careful that we don't misinterpret a non-self-closing tag
+ // as self-closing, as can happen if the tag contains unquoted attribute
+ // values (i.e.
).
+ //
+ // To avoid this, we check that the last non-bracket character of the tag
+ // (z.raw.end-2) isn't the same character as the last non-quote character of
+ // the last attribute of the tag (z.pendingAttr[1].end-1), if the tag has
+ // attributes.
+ nAttrs := len(z.attr)
+ if z.err == nil && z.buf[z.raw.end-2] == '/' && (nAttrs == 0 || z.raw.end-2 != z.attr[nAttrs-1][1].end-1) {
return SelfClosingTagToken
}
return StartTagToken
diff --git a/vendor/golang.org/x/net/http2/client_conn_pool.go b/vendor/golang.org/x/net/http2/client_conn_pool.go
index 780968d6c1..e81b73e6a7 100644
--- a/vendor/golang.org/x/net/http2/client_conn_pool.go
+++ b/vendor/golang.org/x/net/http2/client_conn_pool.go
@@ -8,8 +8,8 @@ package http2
import (
"context"
- "crypto/tls"
"errors"
+ "net"
"net/http"
"sync"
)
@@ -158,7 +158,7 @@ func (c *dialCall) dial(ctx context.Context, addr string) {
// This code decides which ones live or die.
// The return value used is whether c was used.
// c is never closed.
-func (p *clientConnPool) addConnIfNeeded(key string, t *Transport, c *tls.Conn) (used bool, err error) {
+func (p *clientConnPool) addConnIfNeeded(key string, t *Transport, c net.Conn) (used bool, err error) {
p.mu.Lock()
for _, cc := range p.conns[key] {
if cc.CanTakeNewRequest() {
@@ -194,8 +194,8 @@ type addConnCall struct {
err error
}
-func (c *addConnCall) run(t *Transport, key string, tc *tls.Conn) {
- cc, err := t.NewClientConn(tc)
+func (c *addConnCall) run(t *Transport, key string, nc net.Conn) {
+ cc, err := t.NewClientConn(nc)
p := c.p
p.mu.Lock()
diff --git a/vendor/golang.org/x/net/http2/config.go b/vendor/golang.org/x/net/http2/config.go
new file mode 100644
index 0000000000..ca645d9a1a
--- /dev/null
+++ b/vendor/golang.org/x/net/http2/config.go
@@ -0,0 +1,122 @@
+// Copyright 2024 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package http2
+
+import (
+ "math"
+ "net/http"
+ "time"
+)
+
+// http2Config is a package-internal version of net/http.HTTP2Config.
+//
+// http.HTTP2Config was added in Go 1.24.
+// When running with a version of net/http that includes HTTP2Config,
+// we merge the configuration with the fields in Transport or Server
+// to produce an http2Config.
+//
+// Zero valued fields in http2Config are interpreted as in the
+// net/http.HTTPConfig documentation.
+//
+// Precedence order for reconciling configurations is:
+//
+// - Use the net/http.{Server,Transport}.HTTP2Config value, when non-zero.
+// - Otherwise use the http2.{Server.Transport} value.
+// - If the resulting value is zero or out of range, use a default.
+type http2Config struct {
+ MaxConcurrentStreams uint32
+ MaxDecoderHeaderTableSize uint32
+ MaxEncoderHeaderTableSize uint32
+ MaxReadFrameSize uint32
+ MaxUploadBufferPerConnection int32
+ MaxUploadBufferPerStream int32
+ SendPingTimeout time.Duration
+ PingTimeout time.Duration
+ WriteByteTimeout time.Duration
+ PermitProhibitedCipherSuites bool
+ CountError func(errType string)
+}
+
+// configFromServer merges configuration settings from
+// net/http.Server.HTTP2Config and http2.Server.
+func configFromServer(h1 *http.Server, h2 *Server) http2Config {
+ conf := http2Config{
+ MaxConcurrentStreams: h2.MaxConcurrentStreams,
+ MaxEncoderHeaderTableSize: h2.MaxEncoderHeaderTableSize,
+ MaxDecoderHeaderTableSize: h2.MaxDecoderHeaderTableSize,
+ MaxReadFrameSize: h2.MaxReadFrameSize,
+ MaxUploadBufferPerConnection: h2.MaxUploadBufferPerConnection,
+ MaxUploadBufferPerStream: h2.MaxUploadBufferPerStream,
+ SendPingTimeout: h2.ReadIdleTimeout,
+ PingTimeout: h2.PingTimeout,
+ WriteByteTimeout: h2.WriteByteTimeout,
+ PermitProhibitedCipherSuites: h2.PermitProhibitedCipherSuites,
+ CountError: h2.CountError,
+ }
+ fillNetHTTPServerConfig(&conf, h1)
+ setConfigDefaults(&conf, true)
+ return conf
+}
+
+// configFromTransport merges configuration settings from h2 and h2.t1.HTTP2
+// (the net/http Transport).
+func configFromTransport(h2 *Transport) http2Config {
+ conf := http2Config{
+ MaxEncoderHeaderTableSize: h2.MaxEncoderHeaderTableSize,
+ MaxDecoderHeaderTableSize: h2.MaxDecoderHeaderTableSize,
+ MaxReadFrameSize: h2.MaxReadFrameSize,
+ SendPingTimeout: h2.ReadIdleTimeout,
+ PingTimeout: h2.PingTimeout,
+ WriteByteTimeout: h2.WriteByteTimeout,
+ }
+
+ // Unlike most config fields, where out-of-range values revert to the default,
+ // Transport.MaxReadFrameSize clips.
+ if conf.MaxReadFrameSize < minMaxFrameSize {
+ conf.MaxReadFrameSize = minMaxFrameSize
+ } else if conf.MaxReadFrameSize > maxFrameSize {
+ conf.MaxReadFrameSize = maxFrameSize
+ }
+
+ if h2.t1 != nil {
+ fillNetHTTPTransportConfig(&conf, h2.t1)
+ }
+ setConfigDefaults(&conf, false)
+ return conf
+}
+
+func setDefault[T ~int | ~int32 | ~uint32 | ~int64](v *T, minval, maxval, defval T) {
+ if *v < minval || *v > maxval {
+ *v = defval
+ }
+}
+
+func setConfigDefaults(conf *http2Config, server bool) {
+ setDefault(&conf.MaxConcurrentStreams, 1, math.MaxUint32, defaultMaxStreams)
+ setDefault(&conf.MaxEncoderHeaderTableSize, 1, math.MaxUint32, initialHeaderTableSize)
+ setDefault(&conf.MaxDecoderHeaderTableSize, 1, math.MaxUint32, initialHeaderTableSize)
+ if server {
+ setDefault(&conf.MaxUploadBufferPerConnection, initialWindowSize, math.MaxInt32, 1<<20)
+ } else {
+ setDefault(&conf.MaxUploadBufferPerConnection, initialWindowSize, math.MaxInt32, transportDefaultConnFlow)
+ }
+ if server {
+ setDefault(&conf.MaxUploadBufferPerStream, 1, math.MaxInt32, 1<<20)
+ } else {
+ setDefault(&conf.MaxUploadBufferPerStream, 1, math.MaxInt32, transportDefaultStreamFlow)
+ }
+ setDefault(&conf.MaxReadFrameSize, minMaxFrameSize, maxFrameSize, defaultMaxReadFrameSize)
+ setDefault(&conf.PingTimeout, 1, math.MaxInt64, 15*time.Second)
+}
+
+// adjustHTTP1MaxHeaderSize converts a limit in bytes on the size of an HTTP/1 header
+// to an HTTP/2 MAX_HEADER_LIST_SIZE value.
+func adjustHTTP1MaxHeaderSize(n int64) int64 {
+ // http2's count is in a slightly different unit and includes 32 bytes per pair.
+ // So, take the net/http.Server value and pad it up a bit, assuming 10 headers.
+ const perFieldOverhead = 32 // per http2 spec
+ const typicalHeaders = 10 // conservative
+ return n + typicalHeaders*perFieldOverhead
+}
diff --git a/vendor/golang.org/x/net/http2/config_go124.go b/vendor/golang.org/x/net/http2/config_go124.go
new file mode 100644
index 0000000000..5b516c55ff
--- /dev/null
+++ b/vendor/golang.org/x/net/http2/config_go124.go
@@ -0,0 +1,61 @@
+// Copyright 2024 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build go1.24
+
+package http2
+
+import "net/http"
+
+// fillNetHTTPServerConfig sets fields in conf from srv.HTTP2.
+func fillNetHTTPServerConfig(conf *http2Config, srv *http.Server) {
+ fillNetHTTPConfig(conf, srv.HTTP2)
+}
+
+// fillNetHTTPTransportConfig sets fields in conf from tr.HTTP2.
+func fillNetHTTPTransportConfig(conf *http2Config, tr *http.Transport) {
+ fillNetHTTPConfig(conf, tr.HTTP2)
+}
+
+func fillNetHTTPConfig(conf *http2Config, h2 *http.HTTP2Config) {
+ if h2 == nil {
+ return
+ }
+ if h2.MaxConcurrentStreams != 0 {
+ conf.MaxConcurrentStreams = uint32(h2.MaxConcurrentStreams)
+ }
+ if h2.MaxEncoderHeaderTableSize != 0 {
+ conf.MaxEncoderHeaderTableSize = uint32(h2.MaxEncoderHeaderTableSize)
+ }
+ if h2.MaxDecoderHeaderTableSize != 0 {
+ conf.MaxDecoderHeaderTableSize = uint32(h2.MaxDecoderHeaderTableSize)
+ }
+ if h2.MaxConcurrentStreams != 0 {
+ conf.MaxConcurrentStreams = uint32(h2.MaxConcurrentStreams)
+ }
+ if h2.MaxReadFrameSize != 0 {
+ conf.MaxReadFrameSize = uint32(h2.MaxReadFrameSize)
+ }
+ if h2.MaxReceiveBufferPerConnection != 0 {
+ conf.MaxUploadBufferPerConnection = int32(h2.MaxReceiveBufferPerConnection)
+ }
+ if h2.MaxReceiveBufferPerStream != 0 {
+ conf.MaxUploadBufferPerStream = int32(h2.MaxReceiveBufferPerStream)
+ }
+ if h2.SendPingTimeout != 0 {
+ conf.SendPingTimeout = h2.SendPingTimeout
+ }
+ if h2.PingTimeout != 0 {
+ conf.PingTimeout = h2.PingTimeout
+ }
+ if h2.WriteByteTimeout != 0 {
+ conf.WriteByteTimeout = h2.WriteByteTimeout
+ }
+ if h2.PermitProhibitedCipherSuites {
+ conf.PermitProhibitedCipherSuites = true
+ }
+ if h2.CountError != nil {
+ conf.CountError = h2.CountError
+ }
+}
diff --git a/vendor/golang.org/x/net/http2/config_pre_go124.go b/vendor/golang.org/x/net/http2/config_pre_go124.go
new file mode 100644
index 0000000000..060fd6c64c
--- /dev/null
+++ b/vendor/golang.org/x/net/http2/config_pre_go124.go
@@ -0,0 +1,16 @@
+// Copyright 2024 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build !go1.24
+
+package http2
+
+import "net/http"
+
+// Pre-Go 1.24 fallback.
+// The Server.HTTP2 and Transport.HTTP2 config fields were added in Go 1.24.
+
+func fillNetHTTPServerConfig(conf *http2Config, srv *http.Server) {}
+
+func fillNetHTTPTransportConfig(conf *http2Config, tr *http.Transport) {}
diff --git a/vendor/golang.org/x/net/http2/frame.go b/vendor/golang.org/x/net/http2/frame.go
index 105c3b279c..97bd8b06f7 100644
--- a/vendor/golang.org/x/net/http2/frame.go
+++ b/vendor/golang.org/x/net/http2/frame.go
@@ -225,6 +225,11 @@ var fhBytes = sync.Pool{
},
}
+func invalidHTTP1LookingFrameHeader() FrameHeader {
+ fh, _ := readFrameHeader(make([]byte, frameHeaderLen), strings.NewReader("HTTP/1.1 "))
+ return fh
+}
+
// ReadFrameHeader reads 9 bytes from r and returns a FrameHeader.
// Most users should use Framer.ReadFrame instead.
func ReadFrameHeader(r io.Reader) (FrameHeader, error) {
@@ -503,10 +508,16 @@ func (fr *Framer) ReadFrame() (Frame, error) {
return nil, err
}
if fh.Length > fr.maxReadSize {
+ if fh == invalidHTTP1LookingFrameHeader() {
+ return nil, fmt.Errorf("http2: failed reading the frame payload: %w, note that the frame header looked like an HTTP/1.1 header", err)
+ }
return nil, ErrFrameTooLarge
}
payload := fr.getReadBuf(fh.Length)
if _, err := io.ReadFull(fr.r, payload); err != nil {
+ if fh == invalidHTTP1LookingFrameHeader() {
+ return nil, fmt.Errorf("http2: failed reading the frame payload: %w, note that the frame header looked like an HTTP/1.1 header", err)
+ }
return nil, err
}
f, err := typeFrameParser(fh.Type)(fr.frameCache, fh, fr.countError, payload)
@@ -1490,7 +1501,7 @@ func (mh *MetaHeadersFrame) checkPseudos() error {
pf := mh.PseudoFields()
for i, hf := range pf {
switch hf.Name {
- case ":method", ":path", ":scheme", ":authority":
+ case ":method", ":path", ":scheme", ":authority", ":protocol":
isRequest = true
case ":status":
isResponse = true
@@ -1498,7 +1509,7 @@ func (mh *MetaHeadersFrame) checkPseudos() error {
return pseudoHeaderError(hf.Name)
}
// Check for duplicates.
- // This would be a bad algorithm, but N is 4.
+ // This would be a bad algorithm, but N is 5.
// And this doesn't allocate.
for _, hf2 := range pf[:i] {
if hf.Name == hf2.Name {
diff --git a/vendor/golang.org/x/net/http2/http2.go b/vendor/golang.org/x/net/http2/http2.go
index 003e649f30..6c18ea230b 100644
--- a/vendor/golang.org/x/net/http2/http2.go
+++ b/vendor/golang.org/x/net/http2/http2.go
@@ -19,8 +19,9 @@ import (
"bufio"
"context"
"crypto/tls"
+ "errors"
"fmt"
- "io"
+ "net"
"net/http"
"os"
"sort"
@@ -37,6 +38,15 @@ var (
logFrameWrites bool
logFrameReads bool
inTests bool
+
+ // Enabling extended CONNECT by causes browsers to attempt to use
+ // WebSockets-over-HTTP/2. This results in problems when the server's websocket
+ // package doesn't support extended CONNECT.
+ //
+ // Disable extended CONNECT by default for now.
+ //
+ // Issue #71128.
+ disableExtendedConnectProtocol = true
)
func init() {
@@ -49,6 +59,9 @@ func init() {
logFrameWrites = true
logFrameReads = true
}
+ if strings.Contains(e, "http2xconnect=1") {
+ disableExtendedConnectProtocol = false
+ }
}
const (
@@ -140,6 +153,10 @@ func (s Setting) Valid() error {
if s.Val < 16384 || s.Val > 1<<24-1 {
return ConnectionError(ErrCodeProtocol)
}
+ case SettingEnableConnectProtocol:
+ if s.Val != 1 && s.Val != 0 {
+ return ConnectionError(ErrCodeProtocol)
+ }
}
return nil
}
@@ -149,21 +166,23 @@ func (s Setting) Valid() error {
type SettingID uint16
const (
- SettingHeaderTableSize SettingID = 0x1
- SettingEnablePush SettingID = 0x2
- SettingMaxConcurrentStreams SettingID = 0x3
- SettingInitialWindowSize SettingID = 0x4
- SettingMaxFrameSize SettingID = 0x5
- SettingMaxHeaderListSize SettingID = 0x6
+ SettingHeaderTableSize SettingID = 0x1
+ SettingEnablePush SettingID = 0x2
+ SettingMaxConcurrentStreams SettingID = 0x3
+ SettingInitialWindowSize SettingID = 0x4
+ SettingMaxFrameSize SettingID = 0x5
+ SettingMaxHeaderListSize SettingID = 0x6
+ SettingEnableConnectProtocol SettingID = 0x8
)
var settingName = map[SettingID]string{
- SettingHeaderTableSize: "HEADER_TABLE_SIZE",
- SettingEnablePush: "ENABLE_PUSH",
- SettingMaxConcurrentStreams: "MAX_CONCURRENT_STREAMS",
- SettingInitialWindowSize: "INITIAL_WINDOW_SIZE",
- SettingMaxFrameSize: "MAX_FRAME_SIZE",
- SettingMaxHeaderListSize: "MAX_HEADER_LIST_SIZE",
+ SettingHeaderTableSize: "HEADER_TABLE_SIZE",
+ SettingEnablePush: "ENABLE_PUSH",
+ SettingMaxConcurrentStreams: "MAX_CONCURRENT_STREAMS",
+ SettingInitialWindowSize: "INITIAL_WINDOW_SIZE",
+ SettingMaxFrameSize: "MAX_FRAME_SIZE",
+ SettingMaxHeaderListSize: "MAX_HEADER_LIST_SIZE",
+ SettingEnableConnectProtocol: "ENABLE_CONNECT_PROTOCOL",
}
func (s SettingID) String() string {
@@ -237,13 +256,19 @@ func (cw closeWaiter) Wait() {
// Its buffered writer is lazily allocated as needed, to minimize
// idle memory usage with many connections.
type bufferedWriter struct {
- _ incomparable
- w io.Writer // immutable
- bw *bufio.Writer // non-nil when data is buffered
+ _ incomparable
+ group synctestGroupInterface // immutable
+ conn net.Conn // immutable
+ bw *bufio.Writer // non-nil when data is buffered
+ byteTimeout time.Duration // immutable, WriteByteTimeout
}
-func newBufferedWriter(w io.Writer) *bufferedWriter {
- return &bufferedWriter{w: w}
+func newBufferedWriter(group synctestGroupInterface, conn net.Conn, timeout time.Duration) *bufferedWriter {
+ return &bufferedWriter{
+ group: group,
+ conn: conn,
+ byteTimeout: timeout,
+ }
}
// bufWriterPoolBufferSize is the size of bufio.Writer's
@@ -270,7 +295,7 @@ func (w *bufferedWriter) Available() int {
func (w *bufferedWriter) Write(p []byte) (n int, err error) {
if w.bw == nil {
bw := bufWriterPool.Get().(*bufio.Writer)
- bw.Reset(w.w)
+ bw.Reset((*bufferedWriterTimeoutWriter)(w))
w.bw = bw
}
return w.bw.Write(p)
@@ -288,6 +313,38 @@ func (w *bufferedWriter) Flush() error {
return err
}
+type bufferedWriterTimeoutWriter bufferedWriter
+
+func (w *bufferedWriterTimeoutWriter) Write(p []byte) (n int, err error) {
+ return writeWithByteTimeout(w.group, w.conn, w.byteTimeout, p)
+}
+
+// writeWithByteTimeout writes to conn.
+// If more than timeout passes without any bytes being written to the connection,
+// the write fails.
+func writeWithByteTimeout(group synctestGroupInterface, conn net.Conn, timeout time.Duration, p []byte) (n int, err error) {
+ if timeout <= 0 {
+ return conn.Write(p)
+ }
+ for {
+ var now time.Time
+ if group == nil {
+ now = time.Now()
+ } else {
+ now = group.Now()
+ }
+ conn.SetWriteDeadline(now.Add(timeout))
+ nn, err := conn.Write(p[n:])
+ n += nn
+ if n == len(p) || nn == 0 || !errors.Is(err, os.ErrDeadlineExceeded) {
+ // Either we finished the write, made no progress, or hit the deadline.
+ // Whichever it is, we're done now.
+ conn.SetWriteDeadline(time.Time{})
+ return n, err
+ }
+ }
+}
+
func mustUint31(v int32) uint32 {
if v < 0 || v > 2147483647 {
panic("out of range")
@@ -358,23 +415,6 @@ func (s *sorter) SortStrings(ss []string) {
s.v = save
}
-// validPseudoPath reports whether v is a valid :path pseudo-header
-// value. It must be either:
-//
-// - a non-empty string starting with '/'
-// - the string '*', for OPTIONS requests.
-//
-// For now this is only used a quick check for deciding when to clean
-// up Opaque URLs before sending requests from the Transport.
-// See golang.org/issue/16847
-//
-// We used to enforce that the path also didn't start with "//", but
-// Google's GFE accepts such paths and Chrome sends them, so ignore
-// that part of the spec. See golang.org/issue/19103.
-func validPseudoPath(v string) bool {
- return (len(v) > 0 && v[0] == '/') || v == "*"
-}
-
// incomparable is a zero-width, non-comparable type. Adding it to a struct
// makes that struct also non-comparable, and generally doesn't add
// any size (as long as it's first).
diff --git a/vendor/golang.org/x/net/http2/server.go b/vendor/golang.org/x/net/http2/server.go
index 6c349f3ec6..51fca38f61 100644
--- a/vendor/golang.org/x/net/http2/server.go
+++ b/vendor/golang.org/x/net/http2/server.go
@@ -29,6 +29,7 @@ import (
"bufio"
"bytes"
"context"
+ "crypto/rand"
"crypto/tls"
"errors"
"fmt"
@@ -49,13 +50,18 @@ import (
"golang.org/x/net/http/httpguts"
"golang.org/x/net/http2/hpack"
+ "golang.org/x/net/internal/httpcommon"
)
const (
- prefaceTimeout = 10 * time.Second
- firstSettingsTimeout = 2 * time.Second // should be in-flight with preface anyway
- handlerChunkWriteSize = 4 << 10
- defaultMaxStreams = 250 // TODO: make this 100 as the GFE seems to?
+ prefaceTimeout = 10 * time.Second
+ firstSettingsTimeout = 2 * time.Second // should be in-flight with preface anyway
+ handlerChunkWriteSize = 4 << 10
+ defaultMaxStreams = 250 // TODO: make this 100 as the GFE seems to?
+
+ // maxQueuedControlFrames is the maximum number of control frames like
+ // SETTINGS, PING and RST_STREAM that will be queued for writing before
+ // the connection is closed to prevent memory exhaustion attacks.
maxQueuedControlFrames = 10000
)
@@ -127,6 +133,22 @@ type Server struct {
// If zero or negative, there is no timeout.
IdleTimeout time.Duration
+ // ReadIdleTimeout is the timeout after which a health check using a ping
+ // frame will be carried out if no frame is received on the connection.
+ // If zero, no health check is performed.
+ ReadIdleTimeout time.Duration
+
+ // PingTimeout is the timeout after which the connection will be closed
+ // if a response to a ping is not received.
+ // If zero, a default of 15 seconds is used.
+ PingTimeout time.Duration
+
+ // WriteByteTimeout is the timeout after which a connection will be
+ // closed if no data can be written to it. The timeout begins when data is
+ // available to write, and is extended whenever any bytes are written.
+ // If zero or negative, there is no timeout.
+ WriteByteTimeout time.Duration
+
// MaxUploadBufferPerConnection is the size of the initial flow
// control window for each connections. The HTTP/2 spec does not
// allow this to be smaller than 65535 or larger than 2^32-1.
@@ -189,57 +211,6 @@ func (s *Server) afterFunc(d time.Duration, f func()) timer {
return timeTimer{time.AfterFunc(d, f)}
}
-func (s *Server) initialConnRecvWindowSize() int32 {
- if s.MaxUploadBufferPerConnection >= initialWindowSize {
- return s.MaxUploadBufferPerConnection
- }
- return 1 << 20
-}
-
-func (s *Server) initialStreamRecvWindowSize() int32 {
- if s.MaxUploadBufferPerStream > 0 {
- return s.MaxUploadBufferPerStream
- }
- return 1 << 20
-}
-
-func (s *Server) maxReadFrameSize() uint32 {
- if v := s.MaxReadFrameSize; v >= minMaxFrameSize && v <= maxFrameSize {
- return v
- }
- return defaultMaxReadFrameSize
-}
-
-func (s *Server) maxConcurrentStreams() uint32 {
- if v := s.MaxConcurrentStreams; v > 0 {
- return v
- }
- return defaultMaxStreams
-}
-
-func (s *Server) maxDecoderHeaderTableSize() uint32 {
- if v := s.MaxDecoderHeaderTableSize; v > 0 {
- return v
- }
- return initialHeaderTableSize
-}
-
-func (s *Server) maxEncoderHeaderTableSize() uint32 {
- if v := s.MaxEncoderHeaderTableSize; v > 0 {
- return v
- }
- return initialHeaderTableSize
-}
-
-// maxQueuedControlFrames is the maximum number of control frames like
-// SETTINGS, PING and RST_STREAM that will be queued for writing before
-// the connection is closed to prevent memory exhaustion attacks.
-func (s *Server) maxQueuedControlFrames() int {
- // TODO: if anybody asks, add a Server field, and remember to define the
- // behavior of negative values.
- return maxQueuedControlFrames
-}
-
type serverInternalState struct {
mu sync.Mutex
activeConns map[*serverConn]struct{}
@@ -336,7 +307,7 @@ func ConfigureServer(s *http.Server, conf *Server) error {
if s.TLSNextProto == nil {
s.TLSNextProto = map[string]func(*http.Server, *tls.Conn, http.Handler){}
}
- protoHandler := func(hs *http.Server, c *tls.Conn, h http.Handler) {
+ protoHandler := func(hs *http.Server, c net.Conn, h http.Handler, sawClientPreface bool) {
if testHookOnConn != nil {
testHookOnConn()
}
@@ -353,12 +324,31 @@ func ConfigureServer(s *http.Server, conf *Server) error {
ctx = bc.BaseContext()
}
conf.ServeConn(c, &ServeConnOpts{
- Context: ctx,
- Handler: h,
- BaseConfig: hs,
+ Context: ctx,
+ Handler: h,
+ BaseConfig: hs,
+ SawClientPreface: sawClientPreface,
})
}
- s.TLSNextProto[NextProtoTLS] = protoHandler
+ s.TLSNextProto[NextProtoTLS] = func(hs *http.Server, c *tls.Conn, h http.Handler) {
+ protoHandler(hs, c, h, false)
+ }
+ // The "unencrypted_http2" TLSNextProto key is used to pass off non-TLS HTTP/2 conns.
+ //
+ // A connection passed in this method has already had the HTTP/2 preface read from it.
+ s.TLSNextProto[nextProtoUnencryptedHTTP2] = func(hs *http.Server, c *tls.Conn, h http.Handler) {
+ nc, err := unencryptedNetConnFromTLSConn(c)
+ if err != nil {
+ if lg := hs.ErrorLog; lg != nil {
+ lg.Print(err)
+ } else {
+ log.Print(err)
+ }
+ go c.Close()
+ return
+ }
+ protoHandler(hs, nc, h, true)
+ }
return nil
}
@@ -440,13 +430,15 @@ func (s *Server) serveConn(c net.Conn, opts *ServeConnOpts, newf func(*serverCon
baseCtx, cancel := serverConnBaseContext(c, opts)
defer cancel()
+ http1srv := opts.baseConfig()
+ conf := configFromServer(http1srv, s)
sc := &serverConn{
srv: s,
- hs: opts.baseConfig(),
+ hs: http1srv,
conn: c,
baseCtx: baseCtx,
remoteAddrStr: c.RemoteAddr().String(),
- bw: newBufferedWriter(c),
+ bw: newBufferedWriter(s.group, c, conf.WriteByteTimeout),
handler: opts.handler(),
streams: make(map[uint32]*stream),
readFrameCh: make(chan readFrameResult),
@@ -456,9 +448,12 @@ func (s *Server) serveConn(c net.Conn, opts *ServeConnOpts, newf func(*serverCon
bodyReadCh: make(chan bodyReadMsg), // buffering doesn't matter either way
doneServing: make(chan struct{}),
clientMaxStreams: math.MaxUint32, // Section 6.5.2: "Initially, there is no limit to this value"
- advMaxStreams: s.maxConcurrentStreams(),
+ advMaxStreams: conf.MaxConcurrentStreams,
initialStreamSendWindowSize: initialWindowSize,
+ initialStreamRecvWindowSize: conf.MaxUploadBufferPerStream,
maxFrameSize: initialMaxFrameSize,
+ pingTimeout: conf.PingTimeout,
+ countErrorFunc: conf.CountError,
serveG: newGoroutineLock(),
pushEnabled: true,
sawClientPreface: opts.SawClientPreface,
@@ -491,15 +486,15 @@ func (s *Server) serveConn(c net.Conn, opts *ServeConnOpts, newf func(*serverCon
sc.flow.add(initialWindowSize)
sc.inflow.init(initialWindowSize)
sc.hpackEncoder = hpack.NewEncoder(&sc.headerWriteBuf)
- sc.hpackEncoder.SetMaxDynamicTableSizeLimit(s.maxEncoderHeaderTableSize())
+ sc.hpackEncoder.SetMaxDynamicTableSizeLimit(conf.MaxEncoderHeaderTableSize)
fr := NewFramer(sc.bw, c)
- if s.CountError != nil {
- fr.countError = s.CountError
+ if conf.CountError != nil {
+ fr.countError = conf.CountError
}
- fr.ReadMetaHeaders = hpack.NewDecoder(s.maxDecoderHeaderTableSize(), nil)
+ fr.ReadMetaHeaders = hpack.NewDecoder(conf.MaxDecoderHeaderTableSize, nil)
fr.MaxHeaderListSize = sc.maxHeaderListSize()
- fr.SetMaxReadFrameSize(s.maxReadFrameSize())
+ fr.SetMaxReadFrameSize(conf.MaxReadFrameSize)
sc.framer = fr
if tc, ok := c.(connectionStater); ok {
@@ -532,7 +527,7 @@ func (s *Server) serveConn(c net.Conn, opts *ServeConnOpts, newf func(*serverCon
// So for now, do nothing here again.
}
- if !s.PermitProhibitedCipherSuites && isBadCipher(sc.tlsState.CipherSuite) {
+ if !conf.PermitProhibitedCipherSuites && isBadCipher(sc.tlsState.CipherSuite) {
// "Endpoints MAY choose to generate a connection error
// (Section 5.4.1) of type INADEQUATE_SECURITY if one of
// the prohibited cipher suites are negotiated."
@@ -569,7 +564,7 @@ func (s *Server) serveConn(c net.Conn, opts *ServeConnOpts, newf func(*serverCon
opts.UpgradeRequest = nil
}
- sc.serve()
+ sc.serve(conf)
}
func serverConnBaseContext(c net.Conn, opts *ServeConnOpts) (ctx context.Context, cancel func()) {
@@ -609,6 +604,7 @@ type serverConn struct {
tlsState *tls.ConnectionState // shared by all handlers, like net/http
remoteAddrStr string
writeSched WriteScheduler
+ countErrorFunc func(errType string)
// Everything following is owned by the serve loop; use serveG.check():
serveG goroutineLock // used to verify funcs are on serve()
@@ -628,6 +624,7 @@ type serverConn struct {
streams map[uint32]*stream
unstartedHandlers []unstartedHandler
initialStreamSendWindowSize int32
+ initialStreamRecvWindowSize int32
maxFrameSize int32
peerMaxHeaderListSize uint32 // zero means unknown (default)
canonHeader map[string]string // http2-lower-case -> Go-Canonical-Case
@@ -638,9 +635,14 @@ type serverConn struct {
inGoAway bool // we've started to or sent GOAWAY
inFrameScheduleLoop bool // whether we're in the scheduleFrameWrite loop
needToSendGoAway bool // we need to schedule a GOAWAY frame write
+ pingSent bool
+ sentPingData [8]byte
goAwayCode ErrCode
shutdownTimer timer // nil until used
idleTimer timer // nil if unused
+ readIdleTimeout time.Duration
+ pingTimeout time.Duration
+ readIdleTimer timer // nil if unused
// Owned by the writeFrameAsync goroutine:
headerWriteBuf bytes.Buffer
@@ -655,11 +657,7 @@ func (sc *serverConn) maxHeaderListSize() uint32 {
if n <= 0 {
n = http.DefaultMaxHeaderBytes
}
- // http2's count is in a slightly different unit and includes 32 bytes per pair.
- // So, take the net/http.Server value and pad it up a bit, assuming 10 headers.
- const perFieldOverhead = 32 // per http2 spec
- const typicalHeaders = 10 // conservative
- return uint32(n + typicalHeaders*perFieldOverhead)
+ return uint32(adjustHTTP1MaxHeaderSize(int64(n)))
}
func (sc *serverConn) curOpenStreams() uint32 {
@@ -815,8 +813,7 @@ const maxCachedCanonicalHeadersKeysSize = 2048
func (sc *serverConn) canonicalHeader(v string) string {
sc.serveG.check()
- buildCommonHeaderMapsOnce()
- cv, ok := commonCanonHeader[v]
+ cv, ok := httpcommon.CachedCanonicalHeader(v)
if ok {
return cv
}
@@ -923,7 +920,7 @@ func (sc *serverConn) notePanic() {
}
}
-func (sc *serverConn) serve() {
+func (sc *serverConn) serve(conf http2Config) {
sc.serveG.check()
defer sc.notePanic()
defer sc.conn.Close()
@@ -935,20 +932,24 @@ func (sc *serverConn) serve() {
sc.vlogf("http2: server connection from %v on %p", sc.conn.RemoteAddr(), sc.hs)
}
+ settings := writeSettings{
+ {SettingMaxFrameSize, conf.MaxReadFrameSize},
+ {SettingMaxConcurrentStreams, sc.advMaxStreams},
+ {SettingMaxHeaderListSize, sc.maxHeaderListSize()},
+ {SettingHeaderTableSize, conf.MaxDecoderHeaderTableSize},
+ {SettingInitialWindowSize, uint32(sc.initialStreamRecvWindowSize)},
+ }
+ if !disableExtendedConnectProtocol {
+ settings = append(settings, Setting{SettingEnableConnectProtocol, 1})
+ }
sc.writeFrame(FrameWriteRequest{
- write: writeSettings{
- {SettingMaxFrameSize, sc.srv.maxReadFrameSize()},
- {SettingMaxConcurrentStreams, sc.advMaxStreams},
- {SettingMaxHeaderListSize, sc.maxHeaderListSize()},
- {SettingHeaderTableSize, sc.srv.maxDecoderHeaderTableSize()},
- {SettingInitialWindowSize, uint32(sc.srv.initialStreamRecvWindowSize())},
- },
+ write: settings,
})
sc.unackedSettings++
// Each connection starts with initialWindowSize inflow tokens.
// If a higher value is configured, we add more tokens.
- if diff := sc.srv.initialConnRecvWindowSize() - initialWindowSize; diff > 0 {
+ if diff := conf.MaxUploadBufferPerConnection - initialWindowSize; diff > 0 {
sc.sendWindowUpdate(nil, int(diff))
}
@@ -968,11 +969,18 @@ func (sc *serverConn) serve() {
defer sc.idleTimer.Stop()
}
+ if conf.SendPingTimeout > 0 {
+ sc.readIdleTimeout = conf.SendPingTimeout
+ sc.readIdleTimer = sc.srv.afterFunc(conf.SendPingTimeout, sc.onReadIdleTimer)
+ defer sc.readIdleTimer.Stop()
+ }
+
go sc.readFrames() // closed by defer sc.conn.Close above
settingsTimer := sc.srv.afterFunc(firstSettingsTimeout, sc.onSettingsTimer)
defer settingsTimer.Stop()
+ lastFrameTime := sc.srv.now()
loopNum := 0
for {
loopNum++
@@ -986,6 +994,7 @@ func (sc *serverConn) serve() {
case res := <-sc.wroteFrameCh:
sc.wroteFrame(res)
case res := <-sc.readFrameCh:
+ lastFrameTime = sc.srv.now()
// Process any written frames before reading new frames from the client since a
// written frame could have triggered a new stream to be started.
if sc.writingFrameAsync {
@@ -1017,6 +1026,8 @@ func (sc *serverConn) serve() {
case idleTimerMsg:
sc.vlogf("connection is idle")
sc.goAway(ErrCodeNo)
+ case readIdleTimerMsg:
+ sc.handlePingTimer(lastFrameTime)
case shutdownTimerMsg:
sc.vlogf("GOAWAY close timer fired; closing conn from %v", sc.conn.RemoteAddr())
return
@@ -1039,7 +1050,7 @@ func (sc *serverConn) serve() {
// If the peer is causing us to generate a lot of control frames,
// but not reading them from us, assume they are trying to make us
// run out of memory.
- if sc.queuedControlFrames > sc.srv.maxQueuedControlFrames() {
+ if sc.queuedControlFrames > maxQueuedControlFrames {
sc.vlogf("http2: too many control frames in send queue, closing connection")
return
}
@@ -1055,12 +1066,42 @@ func (sc *serverConn) serve() {
}
}
+func (sc *serverConn) handlePingTimer(lastFrameReadTime time.Time) {
+ if sc.pingSent {
+ sc.logf("timeout waiting for PING response")
+ if f := sc.countErrorFunc; f != nil {
+ f("conn_close_lost_ping")
+ }
+ sc.conn.Close()
+ return
+ }
+
+ pingAt := lastFrameReadTime.Add(sc.readIdleTimeout)
+ now := sc.srv.now()
+ if pingAt.After(now) {
+ // We received frames since arming the ping timer.
+ // Reset it for the next possible timeout.
+ sc.readIdleTimer.Reset(pingAt.Sub(now))
+ return
+ }
+
+ sc.pingSent = true
+ // Ignore crypto/rand.Read errors: It generally can't fail, and worse case if it does
+ // is we send a PING frame containing 0s.
+ _, _ = rand.Read(sc.sentPingData[:])
+ sc.writeFrame(FrameWriteRequest{
+ write: &writePing{data: sc.sentPingData},
+ })
+ sc.readIdleTimer.Reset(sc.pingTimeout)
+}
+
type serverMessage int
// Message values sent to serveMsgCh.
var (
settingsTimerMsg = new(serverMessage)
idleTimerMsg = new(serverMessage)
+ readIdleTimerMsg = new(serverMessage)
shutdownTimerMsg = new(serverMessage)
gracefulShutdownMsg = new(serverMessage)
handlerDoneMsg = new(serverMessage)
@@ -1068,6 +1109,7 @@ var (
func (sc *serverConn) onSettingsTimer() { sc.sendServeMsg(settingsTimerMsg) }
func (sc *serverConn) onIdleTimer() { sc.sendServeMsg(idleTimerMsg) }
+func (sc *serverConn) onReadIdleTimer() { sc.sendServeMsg(readIdleTimerMsg) }
func (sc *serverConn) onShutdownTimer() { sc.sendServeMsg(shutdownTimerMsg) }
func (sc *serverConn) sendServeMsg(msg interface{}) {
@@ -1320,6 +1362,10 @@ func (sc *serverConn) wroteFrame(res frameWriteResult) {
sc.writingFrame = false
sc.writingFrameAsync = false
+ if res.err != nil {
+ sc.conn.Close()
+ }
+
wr := res.wr
if writeEndsStream(wr.write) {
@@ -1594,6 +1640,11 @@ func (sc *serverConn) processFrame(f Frame) error {
func (sc *serverConn) processPing(f *PingFrame) error {
sc.serveG.check()
if f.IsAck() {
+ if sc.pingSent && sc.sentPingData == f.Data {
+ // This is a response to a PING we sent.
+ sc.pingSent = false
+ sc.readIdleTimer.Reset(sc.readIdleTimeout)
+ }
// 6.7 PING: " An endpoint MUST NOT respond to PING frames
// containing this flag."
return nil
@@ -1757,6 +1808,9 @@ func (sc *serverConn) processSetting(s Setting) error {
sc.maxFrameSize = int32(s.Val) // the maximum valid s.Val is < 2^31
case SettingMaxHeaderListSize:
sc.peerMaxHeaderListSize = s.Val
+ case SettingEnableConnectProtocol:
+ // Receipt of this parameter by a server does not
+ // have any impact
default:
// Unknown setting: "An endpoint that receives a SETTINGS
// frame with any unknown or unsupported identifier MUST
@@ -2160,7 +2214,7 @@ func (sc *serverConn) newStream(id, pusherID uint32, state streamState) *stream
st.cw.Init()
st.flow.conn = &sc.flow // link to conn-level counter
st.flow.add(sc.initialStreamSendWindowSize)
- st.inflow.init(sc.srv.initialStreamRecvWindowSize())
+ st.inflow.init(sc.initialStreamRecvWindowSize)
if sc.hs.WriteTimeout > 0 {
st.writeDeadline = sc.srv.afterFunc(sc.hs.WriteTimeout, st.onWriteTimeout)
}
@@ -2182,19 +2236,25 @@ func (sc *serverConn) newStream(id, pusherID uint32, state streamState) *stream
func (sc *serverConn) newWriterAndRequest(st *stream, f *MetaHeadersFrame) (*responseWriter, *http.Request, error) {
sc.serveG.check()
- rp := requestParam{
- method: f.PseudoValue("method"),
- scheme: f.PseudoValue("scheme"),
- authority: f.PseudoValue("authority"),
- path: f.PseudoValue("path"),
+ rp := httpcommon.ServerRequestParam{
+ Method: f.PseudoValue("method"),
+ Scheme: f.PseudoValue("scheme"),
+ Authority: f.PseudoValue("authority"),
+ Path: f.PseudoValue("path"),
+ Protocol: f.PseudoValue("protocol"),
}
- isConnect := rp.method == "CONNECT"
+ // extended connect is disabled, so we should not see :protocol
+ if disableExtendedConnectProtocol && rp.Protocol != "" {
+ return nil, nil, sc.countError("bad_connect", streamError(f.StreamID, ErrCodeProtocol))
+ }
+
+ isConnect := rp.Method == "CONNECT"
if isConnect {
- if rp.path != "" || rp.scheme != "" || rp.authority == "" {
+ if rp.Protocol == "" && (rp.Path != "" || rp.Scheme != "" || rp.Authority == "") {
return nil, nil, sc.countError("bad_connect", streamError(f.StreamID, ErrCodeProtocol))
}
- } else if rp.method == "" || rp.path == "" || (rp.scheme != "https" && rp.scheme != "http") {
+ } else if rp.Method == "" || rp.Path == "" || (rp.Scheme != "https" && rp.Scheme != "http") {
// See 8.1.2.6 Malformed Requests and Responses:
//
// Malformed requests or responses that are detected
@@ -2208,12 +2268,16 @@ func (sc *serverConn) newWriterAndRequest(st *stream, f *MetaHeadersFrame) (*res
return nil, nil, sc.countError("bad_path_method", streamError(f.StreamID, ErrCodeProtocol))
}
- rp.header = make(http.Header)
+ header := make(http.Header)
+ rp.Header = header
for _, hf := range f.RegularFields() {
- rp.header.Add(sc.canonicalHeader(hf.Name), hf.Value)
+ header.Add(sc.canonicalHeader(hf.Name), hf.Value)
+ }
+ if rp.Authority == "" {
+ rp.Authority = header.Get("Host")
}
- if rp.authority == "" {
- rp.authority = rp.header.Get("Host")
+ if rp.Protocol != "" {
+ header.Set(":protocol", rp.Protocol)
}
rw, req, err := sc.newWriterAndRequestNoBody(st, rp)
@@ -2222,7 +2286,7 @@ func (sc *serverConn) newWriterAndRequest(st *stream, f *MetaHeadersFrame) (*res
}
bodyOpen := !f.StreamEnded()
if bodyOpen {
- if vv, ok := rp.header["Content-Length"]; ok {
+ if vv, ok := rp.Header["Content-Length"]; ok {
if cl, err := strconv.ParseUint(vv[0], 10, 63); err == nil {
req.ContentLength = int64(cl)
} else {
@@ -2238,83 +2302,38 @@ func (sc *serverConn) newWriterAndRequest(st *stream, f *MetaHeadersFrame) (*res
return rw, req, nil
}
-type requestParam struct {
- method string
- scheme, authority, path string
- header http.Header
-}
-
-func (sc *serverConn) newWriterAndRequestNoBody(st *stream, rp requestParam) (*responseWriter, *http.Request, error) {
+func (sc *serverConn) newWriterAndRequestNoBody(st *stream, rp httpcommon.ServerRequestParam) (*responseWriter, *http.Request, error) {
sc.serveG.check()
var tlsState *tls.ConnectionState // nil if not scheme https
- if rp.scheme == "https" {
+ if rp.Scheme == "https" {
tlsState = sc.tlsState
}
- needsContinue := httpguts.HeaderValuesContainsToken(rp.header["Expect"], "100-continue")
- if needsContinue {
- rp.header.Del("Expect")
- }
- // Merge Cookie headers into one "; "-delimited value.
- if cookies := rp.header["Cookie"]; len(cookies) > 1 {
- rp.header.Set("Cookie", strings.Join(cookies, "; "))
- }
-
- // Setup Trailers
- var trailer http.Header
- for _, v := range rp.header["Trailer"] {
- for _, key := range strings.Split(v, ",") {
- key = http.CanonicalHeaderKey(textproto.TrimString(key))
- switch key {
- case "Transfer-Encoding", "Trailer", "Content-Length":
- // Bogus. (copy of http1 rules)
- // Ignore.
- default:
- if trailer == nil {
- trailer = make(http.Header)
- }
- trailer[key] = nil
- }
- }
- }
- delete(rp.header, "Trailer")
-
- var url_ *url.URL
- var requestURI string
- if rp.method == "CONNECT" {
- url_ = &url.URL{Host: rp.authority}
- requestURI = rp.authority // mimic HTTP/1 server behavior
- } else {
- var err error
- url_, err = url.ParseRequestURI(rp.path)
- if err != nil {
- return nil, nil, sc.countError("bad_path", streamError(st.id, ErrCodeProtocol))
- }
- requestURI = rp.path
+ res := httpcommon.NewServerRequest(rp)
+ if res.InvalidReason != "" {
+ return nil, nil, sc.countError(res.InvalidReason, streamError(st.id, ErrCodeProtocol))
}
body := &requestBody{
conn: sc,
stream: st,
- needsContinue: needsContinue,
+ needsContinue: res.NeedsContinue,
}
- req := &http.Request{
- Method: rp.method,
- URL: url_,
+ req := (&http.Request{
+ Method: rp.Method,
+ URL: res.URL,
RemoteAddr: sc.remoteAddrStr,
- Header: rp.header,
- RequestURI: requestURI,
+ Header: rp.Header,
+ RequestURI: res.RequestURI,
Proto: "HTTP/2.0",
ProtoMajor: 2,
ProtoMinor: 0,
TLS: tlsState,
- Host: rp.authority,
+ Host: rp.Authority,
Body: body,
- Trailer: trailer,
- }
- req = req.WithContext(st.ctx)
-
+ Trailer: res.Trailer,
+ }).WithContext(st.ctx)
rw := sc.newResponseWriter(st, req)
return rw, req, nil
}
@@ -2855,6 +2874,11 @@ func (w *responseWriter) SetWriteDeadline(deadline time.Time) error {
return nil
}
+func (w *responseWriter) EnableFullDuplex() error {
+ // We always support full duplex responses, so this is a no-op.
+ return nil
+}
+
func (w *responseWriter) Flush() {
w.FlushError()
}
@@ -3204,12 +3228,12 @@ func (sc *serverConn) startPush(msg *startPushRequest) {
// we start in "half closed (remote)" for simplicity.
// See further comments at the definition of stateHalfClosedRemote.
promised := sc.newStream(promisedID, msg.parent.id, stateHalfClosedRemote)
- rw, req, err := sc.newWriterAndRequestNoBody(promised, requestParam{
- method: msg.method,
- scheme: msg.url.Scheme,
- authority: msg.url.Host,
- path: msg.url.RequestURI(),
- header: cloneHeader(msg.header), // clone since handler runs concurrently with writing the PUSH_PROMISE
+ rw, req, err := sc.newWriterAndRequestNoBody(promised, httpcommon.ServerRequestParam{
+ Method: msg.method,
+ Scheme: msg.url.Scheme,
+ Authority: msg.url.Host,
+ Path: msg.url.RequestURI(),
+ Header: cloneHeader(msg.header), // clone since handler runs concurrently with writing the PUSH_PROMISE
})
if err != nil {
// Should not happen, since we've already validated msg.url.
@@ -3301,7 +3325,7 @@ func (sc *serverConn) countError(name string, err error) error {
if sc == nil || sc.srv == nil {
return err
}
- f := sc.srv.CountError
+ f := sc.countErrorFunc
if f == nil {
return err
}
diff --git a/vendor/golang.org/x/net/http2/transport.go b/vendor/golang.org/x/net/http2/transport.go
index 98a49c6b6e..f26356b9cd 100644
--- a/vendor/golang.org/x/net/http2/transport.go
+++ b/vendor/golang.org/x/net/http2/transport.go
@@ -25,8 +25,6 @@ import (
"net/http"
"net/http/httptrace"
"net/textproto"
- "os"
- "sort"
"strconv"
"strings"
"sync"
@@ -36,6 +34,7 @@ import (
"golang.org/x/net/http/httpguts"
"golang.org/x/net/http2/hpack"
"golang.org/x/net/idna"
+ "golang.org/x/net/internal/httpcommon"
)
const (
@@ -203,6 +202,20 @@ func (t *Transport) markNewGoroutine() {
}
}
+func (t *Transport) now() time.Time {
+ if t != nil && t.transportTestHooks != nil {
+ return t.transportTestHooks.group.Now()
+ }
+ return time.Now()
+}
+
+func (t *Transport) timeSince(when time.Time) time.Duration {
+ if t != nil && t.transportTestHooks != nil {
+ return t.now().Sub(when)
+ }
+ return time.Since(when)
+}
+
// newTimer creates a new time.Timer, or a synthetic timer in tests.
func (t *Transport) newTimer(d time.Duration) timer {
if t.transportTestHooks != nil {
@@ -227,40 +240,26 @@ func (t *Transport) contextWithTimeout(ctx context.Context, d time.Duration) (co
}
func (t *Transport) maxHeaderListSize() uint32 {
- if t.MaxHeaderListSize == 0 {
+ n := int64(t.MaxHeaderListSize)
+ if t.t1 != nil && t.t1.MaxResponseHeaderBytes != 0 {
+ n = t.t1.MaxResponseHeaderBytes
+ if n > 0 {
+ n = adjustHTTP1MaxHeaderSize(n)
+ }
+ }
+ if n <= 0 {
return 10 << 20
}
- if t.MaxHeaderListSize == 0xffffffff {
+ if n >= 0xffffffff {
return 0
}
- return t.MaxHeaderListSize
-}
-
-func (t *Transport) maxFrameReadSize() uint32 {
- if t.MaxReadFrameSize == 0 {
- return 0 // use the default provided by the peer
- }
- if t.MaxReadFrameSize < minMaxFrameSize {
- return minMaxFrameSize
- }
- if t.MaxReadFrameSize > maxFrameSize {
- return maxFrameSize
- }
- return t.MaxReadFrameSize
+ return uint32(n)
}
func (t *Transport) disableCompression() bool {
return t.DisableCompression || (t.t1 != nil && t.t1.DisableCompression)
}
-func (t *Transport) pingTimeout() time.Duration {
- if t.PingTimeout == 0 {
- return 15 * time.Second
- }
- return t.PingTimeout
-
-}
-
// ConfigureTransport configures a net/http HTTP/1 Transport to use HTTP/2.
// It returns an error if t1 has already been HTTP/2-enabled.
//
@@ -296,8 +295,8 @@ func configureTransports(t1 *http.Transport) (*Transport, error) {
if !strSliceContains(t1.TLSClientConfig.NextProtos, "http/1.1") {
t1.TLSClientConfig.NextProtos = append(t1.TLSClientConfig.NextProtos, "http/1.1")
}
- upgradeFn := func(authority string, c *tls.Conn) http.RoundTripper {
- addr := authorityAddr("https", authority)
+ upgradeFn := func(scheme, authority string, c net.Conn) http.RoundTripper {
+ addr := authorityAddr(scheme, authority)
if used, err := connPool.addConnIfNeeded(addr, t2, c); err != nil {
go c.Close()
return erringRoundTripper{err}
@@ -308,18 +307,37 @@ func configureTransports(t1 *http.Transport) (*Transport, error) {
// was unknown)
go c.Close()
}
+ if scheme == "http" {
+ return (*unencryptedTransport)(t2)
+ }
return t2
}
- if m := t1.TLSNextProto; len(m) == 0 {
- t1.TLSNextProto = map[string]func(string, *tls.Conn) http.RoundTripper{
- "h2": upgradeFn,
+ if t1.TLSNextProto == nil {
+ t1.TLSNextProto = make(map[string]func(string, *tls.Conn) http.RoundTripper)
+ }
+ t1.TLSNextProto[NextProtoTLS] = func(authority string, c *tls.Conn) http.RoundTripper {
+ return upgradeFn("https", authority, c)
+ }
+ // The "unencrypted_http2" TLSNextProto key is used to pass off non-TLS HTTP/2 conns.
+ t1.TLSNextProto[nextProtoUnencryptedHTTP2] = func(authority string, c *tls.Conn) http.RoundTripper {
+ nc, err := unencryptedNetConnFromTLSConn(c)
+ if err != nil {
+ go c.Close()
+ return erringRoundTripper{err}
}
- } else {
- m["h2"] = upgradeFn
+ return upgradeFn("http", authority, nc)
}
return t2, nil
}
+// unencryptedTransport is a Transport with a RoundTrip method that
+// always permits http:// URLs.
+type unencryptedTransport Transport
+
+func (t *unencryptedTransport) RoundTrip(req *http.Request) (*http.Response, error) {
+ return (*Transport)(t).RoundTripOpt(req, RoundTripOpt{allowHTTP: true})
+}
+
func (t *Transport) connPool() ClientConnPool {
t.connPoolOnce.Do(t.initConnPool)
return t.connPoolOrDef
@@ -339,7 +357,7 @@ type ClientConn struct {
t *Transport
tconn net.Conn // usually *tls.Conn, except specialized impls
tlsState *tls.ConnectionState // nil only for specialized impls
- reused uint32 // whether conn is being reused; atomic
+ atomicReused uint32 // whether conn is being reused; atomic
singleUse bool // whether being used for a single http.Request
getConnCalled bool // used by clientConnPool
@@ -350,31 +368,55 @@ type ClientConn struct {
idleTimeout time.Duration // or 0 for never
idleTimer timer
- mu sync.Mutex // guards following
- cond *sync.Cond // hold mu; broadcast on flow/closed changes
- flow outflow // our conn-level flow control quota (cs.outflow is per stream)
- inflow inflow // peer's conn-level flow control
- doNotReuse bool // whether conn is marked to not be reused for any future requests
- closing bool
- closed bool
- seenSettings bool // true if we've seen a settings frame, false otherwise
- wantSettingsAck bool // we sent a SETTINGS frame and haven't heard back
- goAway *GoAwayFrame // if non-nil, the GoAwayFrame we received
- goAwayDebug string // goAway frame's debug data, retained as a string
- streams map[uint32]*clientStream // client-initiated
- streamsReserved int // incr by ReserveNewRequest; decr on RoundTrip
- nextStreamID uint32
- pendingRequests int // requests blocked and waiting to be sent because len(streams) == maxConcurrentStreams
- pings map[[8]byte]chan struct{} // in flight ping data to notification channel
- br *bufio.Reader
- lastActive time.Time
- lastIdle time.Time // time last idle
+ mu sync.Mutex // guards following
+ cond *sync.Cond // hold mu; broadcast on flow/closed changes
+ flow outflow // our conn-level flow control quota (cs.outflow is per stream)
+ inflow inflow // peer's conn-level flow control
+ doNotReuse bool // whether conn is marked to not be reused for any future requests
+ closing bool
+ closed bool
+ closedOnIdle bool // true if conn was closed for idleness
+ seenSettings bool // true if we've seen a settings frame, false otherwise
+ seenSettingsChan chan struct{} // closed when seenSettings is true or frame reading fails
+ wantSettingsAck bool // we sent a SETTINGS frame and haven't heard back
+ goAway *GoAwayFrame // if non-nil, the GoAwayFrame we received
+ goAwayDebug string // goAway frame's debug data, retained as a string
+ streams map[uint32]*clientStream // client-initiated
+ streamsReserved int // incr by ReserveNewRequest; decr on RoundTrip
+ nextStreamID uint32
+ pendingRequests int // requests blocked and waiting to be sent because len(streams) == maxConcurrentStreams
+ pings map[[8]byte]chan struct{} // in flight ping data to notification channel
+ br *bufio.Reader
+ lastActive time.Time
+ lastIdle time.Time // time last idle
// Settings from peer: (also guarded by wmu)
- maxFrameSize uint32
- maxConcurrentStreams uint32
- peerMaxHeaderListSize uint64
- peerMaxHeaderTableSize uint32
- initialWindowSize uint32
+ maxFrameSize uint32
+ maxConcurrentStreams uint32
+ peerMaxHeaderListSize uint64
+ peerMaxHeaderTableSize uint32
+ initialWindowSize uint32
+ initialStreamRecvWindowSize int32
+ readIdleTimeout time.Duration
+ pingTimeout time.Duration
+ extendedConnectAllowed bool
+
+ // rstStreamPingsBlocked works around an unfortunate gRPC behavior.
+ // gRPC strictly limits the number of PING frames that it will receive.
+ // The default is two pings per two hours, but the limit resets every time
+ // the gRPC endpoint sends a HEADERS or DATA frame. See golang/go#70575.
+ //
+ // rstStreamPingsBlocked is set after receiving a response to a PING frame
+ // bundled with an RST_STREAM (see pendingResets below), and cleared after
+ // receiving a HEADERS or DATA frame.
+ rstStreamPingsBlocked bool
+
+ // pendingResets is the number of RST_STREAM frames we have sent to the peer,
+ // without confirming that the peer has received them. When we send a RST_STREAM,
+ // we bundle it with a PING frame, unless a PING is already in flight. We count
+ // the reset stream against the connection's concurrency limit until we get
+ // a PING response. This limits the number of requests we'll try to send to a
+ // completely unresponsive connection.
+ pendingResets int
// reqHeaderMu is a 1-element semaphore channel controlling access to sending new requests.
// Write to reqHeaderMu to lock it, read from it to unlock.
@@ -432,12 +474,12 @@ type clientStream struct {
sentHeaders bool
// owned by clientConnReadLoop:
- firstByte bool // got the first response byte
- pastHeaders bool // got first MetaHeadersFrame (actual headers)
- pastTrailers bool // got optional second MetaHeadersFrame (trailers)
- num1xx uint8 // number of 1xx responses seen
- readClosed bool // peer sent an END_STREAM flag
- readAborted bool // read loop reset the stream
+ firstByte bool // got the first response byte
+ pastHeaders bool // got first MetaHeadersFrame (actual headers)
+ pastTrailers bool // got optional second MetaHeadersFrame (trailers)
+ readClosed bool // peer sent an END_STREAM flag
+ readAborted bool // read loop reset the stream
+ totalHeaderSize int64 // total size of 1xx headers seen
trailer http.Header // accumulated trailers
resTrailer *http.Header // client's Response.Trailer
@@ -499,6 +541,7 @@ func (cs *clientStream) closeReqBodyLocked() {
}
type stickyErrWriter struct {
+ group synctestGroupInterface
conn net.Conn
timeout time.Duration
err *error
@@ -508,22 +551,9 @@ func (sew stickyErrWriter) Write(p []byte) (n int, err error) {
if *sew.err != nil {
return 0, *sew.err
}
- for {
- if sew.timeout != 0 {
- sew.conn.SetWriteDeadline(time.Now().Add(sew.timeout))
- }
- nn, err := sew.conn.Write(p[n:])
- n += nn
- if n < len(p) && nn > 0 && errors.Is(err, os.ErrDeadlineExceeded) {
- // Keep extending the deadline so long as we're making progress.
- continue
- }
- if sew.timeout != 0 {
- sew.conn.SetWriteDeadline(time.Time{})
- }
- *sew.err = err
- return n, err
- }
+ n, err = writeWithByteTimeout(sew.group, sew.conn, sew.timeout, p)
+ *sew.err = err
+ return n, err
}
// noCachedConnError is the concrete type of ErrNoCachedConn, which
@@ -554,6 +584,8 @@ type RoundTripOpt struct {
// no cached connection is available, RoundTripOpt
// will return ErrNoCachedConn.
OnlyCachedConn bool
+
+ allowHTTP bool // allow http:// URLs
}
func (t *Transport) RoundTrip(req *http.Request) (*http.Response, error) {
@@ -586,7 +618,14 @@ func authorityAddr(scheme string, authority string) (addr string) {
// RoundTripOpt is like RoundTrip, but takes options.
func (t *Transport) RoundTripOpt(req *http.Request, opt RoundTripOpt) (*http.Response, error) {
- if !(req.URL.Scheme == "https" || (req.URL.Scheme == "http" && t.AllowHTTP)) {
+ switch req.URL.Scheme {
+ case "https":
+ // Always okay.
+ case "http":
+ if !t.AllowHTTP && !opt.allowHTTP {
+ return nil, errors.New("http2: unencrypted HTTP/2 not enabled")
+ }
+ default:
return nil, errors.New("http2: unsupported scheme")
}
@@ -597,7 +636,7 @@ func (t *Transport) RoundTripOpt(req *http.Request, opt RoundTripOpt) (*http.Res
t.vlogf("http2: Transport failed to get client conn for %s: %v", addr, err)
return nil, err
}
- reused := !atomic.CompareAndSwapUint32(&cc.reused, 0, 1)
+ reused := !atomic.CompareAndSwapUint32(&cc.atomicReused, 0, 1)
traceGotConn(req, cc, reused)
res, err := cc.RoundTrip(req)
if err != nil && retry <= 6 {
@@ -622,6 +661,22 @@ func (t *Transport) RoundTripOpt(req *http.Request, opt RoundTripOpt) (*http.Res
}
}
}
+ if err == errClientConnNotEstablished {
+ // This ClientConn was created recently,
+ // this is the first request to use it,
+ // and the connection is closed and not usable.
+ //
+ // In this state, cc.idleTimer will remove the conn from the pool
+ // when it fires. Stop the timer and remove it here so future requests
+ // won't try to use this connection.
+ //
+ // If the timer has already fired and we're racing it, the redundant
+ // call to MarkDead is harmless.
+ if cc.idleTimer != nil {
+ cc.idleTimer.Stop()
+ }
+ t.connPool().MarkDead(cc)
+ }
if err != nil {
t.vlogf("RoundTrip failure: %v", err)
return nil, err
@@ -640,9 +695,10 @@ func (t *Transport) CloseIdleConnections() {
}
var (
- errClientConnClosed = errors.New("http2: client conn is closed")
- errClientConnUnusable = errors.New("http2: client conn not usable")
- errClientConnGotGoAway = errors.New("http2: Transport received Server's graceful shutdown GOAWAY")
+ errClientConnClosed = errors.New("http2: client conn is closed")
+ errClientConnUnusable = errors.New("http2: client conn not usable")
+ errClientConnNotEstablished = errors.New("http2: client conn could not be established")
+ errClientConnGotGoAway = errors.New("http2: Transport received Server's graceful shutdown GOAWAY")
)
// shouldRetryRequest is called by RoundTrip when a request fails to get
@@ -758,44 +814,38 @@ func (t *Transport) expectContinueTimeout() time.Duration {
return t.t1.ExpectContinueTimeout
}
-func (t *Transport) maxDecoderHeaderTableSize() uint32 {
- if v := t.MaxDecoderHeaderTableSize; v > 0 {
- return v
- }
- return initialHeaderTableSize
-}
-
-func (t *Transport) maxEncoderHeaderTableSize() uint32 {
- if v := t.MaxEncoderHeaderTableSize; v > 0 {
- return v
- }
- return initialHeaderTableSize
-}
-
func (t *Transport) NewClientConn(c net.Conn) (*ClientConn, error) {
return t.newClientConn(c, t.disableKeepAlives())
}
func (t *Transport) newClientConn(c net.Conn, singleUse bool) (*ClientConn, error) {
+ conf := configFromTransport(t)
cc := &ClientConn{
- t: t,
- tconn: c,
- readerDone: make(chan struct{}),
- nextStreamID: 1,
- maxFrameSize: 16 << 10, // spec default
- initialWindowSize: 65535, // spec default
- maxConcurrentStreams: initialMaxConcurrentStreams, // "infinite", per spec. Use a smaller value until we have received server settings.
- peerMaxHeaderListSize: 0xffffffffffffffff, // "infinite", per spec. Use 2^64-1 instead.
- streams: make(map[uint32]*clientStream),
- singleUse: singleUse,
- wantSettingsAck: true,
- pings: make(map[[8]byte]chan struct{}),
- reqHeaderMu: make(chan struct{}, 1),
- }
+ t: t,
+ tconn: c,
+ readerDone: make(chan struct{}),
+ nextStreamID: 1,
+ maxFrameSize: 16 << 10, // spec default
+ initialWindowSize: 65535, // spec default
+ initialStreamRecvWindowSize: conf.MaxUploadBufferPerStream,
+ maxConcurrentStreams: initialMaxConcurrentStreams, // "infinite", per spec. Use a smaller value until we have received server settings.
+ peerMaxHeaderListSize: 0xffffffffffffffff, // "infinite", per spec. Use 2^64-1 instead.
+ streams: make(map[uint32]*clientStream),
+ singleUse: singleUse,
+ seenSettingsChan: make(chan struct{}),
+ wantSettingsAck: true,
+ readIdleTimeout: conf.SendPingTimeout,
+ pingTimeout: conf.PingTimeout,
+ pings: make(map[[8]byte]chan struct{}),
+ reqHeaderMu: make(chan struct{}, 1),
+ lastActive: t.now(),
+ }
+ var group synctestGroupInterface
if t.transportTestHooks != nil {
t.markNewGoroutine()
t.transportTestHooks.newclientconn(cc)
c = cc.tconn
+ group = t.group
}
if VerboseLogs {
t.vlogf("http2: Transport creating client conn %p to %v", cc, c.RemoteAddr())
@@ -807,30 +857,25 @@ func (t *Transport) newClientConn(c net.Conn, singleUse bool) (*ClientConn, erro
// TODO: adjust this writer size to account for frame size +
// MTU + crypto/tls record padding.
cc.bw = bufio.NewWriter(stickyErrWriter{
+ group: group,
conn: c,
- timeout: t.WriteByteTimeout,
+ timeout: conf.WriteByteTimeout,
err: &cc.werr,
})
cc.br = bufio.NewReader(c)
cc.fr = NewFramer(cc.bw, cc.br)
- if t.maxFrameReadSize() != 0 {
- cc.fr.SetMaxReadFrameSize(t.maxFrameReadSize())
- }
+ cc.fr.SetMaxReadFrameSize(conf.MaxReadFrameSize)
if t.CountError != nil {
cc.fr.countError = t.CountError
}
- maxHeaderTableSize := t.maxDecoderHeaderTableSize()
+ maxHeaderTableSize := conf.MaxDecoderHeaderTableSize
cc.fr.ReadMetaHeaders = hpack.NewDecoder(maxHeaderTableSize, nil)
cc.fr.MaxHeaderListSize = t.maxHeaderListSize()
cc.henc = hpack.NewEncoder(&cc.hbuf)
- cc.henc.SetMaxDynamicTableSizeLimit(t.maxEncoderHeaderTableSize())
+ cc.henc.SetMaxDynamicTableSizeLimit(conf.MaxEncoderHeaderTableSize)
cc.peerMaxHeaderTableSize = initialHeaderTableSize
- if t.AllowHTTP {
- cc.nextStreamID = 3
- }
-
if cs, ok := c.(connectionStater); ok {
state := cs.ConnectionState()
cc.tlsState = &state
@@ -838,11 +883,9 @@ func (t *Transport) newClientConn(c net.Conn, singleUse bool) (*ClientConn, erro
initialSettings := []Setting{
{ID: SettingEnablePush, Val: 0},
- {ID: SettingInitialWindowSize, Val: transportDefaultStreamFlow},
- }
- if max := t.maxFrameReadSize(); max != 0 {
- initialSettings = append(initialSettings, Setting{ID: SettingMaxFrameSize, Val: max})
+ {ID: SettingInitialWindowSize, Val: uint32(cc.initialStreamRecvWindowSize)},
}
+ initialSettings = append(initialSettings, Setting{ID: SettingMaxFrameSize, Val: conf.MaxReadFrameSize})
if max := t.maxHeaderListSize(); max != 0 {
initialSettings = append(initialSettings, Setting{ID: SettingMaxHeaderListSize, Val: max})
}
@@ -852,8 +895,8 @@ func (t *Transport) newClientConn(c net.Conn, singleUse bool) (*ClientConn, erro
cc.bw.Write(clientPreface)
cc.fr.WriteSettings(initialSettings...)
- cc.fr.WriteWindowUpdate(0, transportDefaultConnFlow)
- cc.inflow.init(transportDefaultConnFlow + initialWindowSize)
+ cc.fr.WriteWindowUpdate(0, uint32(conf.MaxUploadBufferPerConnection))
+ cc.inflow.init(conf.MaxUploadBufferPerConnection + initialWindowSize)
cc.bw.Flush()
if cc.werr != nil {
cc.Close()
@@ -871,7 +914,7 @@ func (t *Transport) newClientConn(c net.Conn, singleUse bool) (*ClientConn, erro
}
func (cc *ClientConn) healthCheck() {
- pingTimeout := cc.t.pingTimeout()
+ pingTimeout := cc.pingTimeout
// We don't need to periodically ping in the health check, because the readLoop of ClientConn will
// trigger the healthCheck again if there is no frame received.
ctx, cancel := cc.t.contextWithTimeout(context.Background(), pingTimeout)
@@ -999,7 +1042,7 @@ func (cc *ClientConn) State() ClientConnState {
return ClientConnState{
Closed: cc.closed,
Closing: cc.closing || cc.singleUse || cc.doNotReuse || cc.goAway != nil,
- StreamsActive: len(cc.streams),
+ StreamsActive: len(cc.streams) + cc.pendingResets,
StreamsReserved: cc.streamsReserved,
StreamsPending: cc.pendingRequests,
LastIdle: cc.lastIdle,
@@ -1031,16 +1074,40 @@ func (cc *ClientConn) idleStateLocked() (st clientConnIdleState) {
// writing it.
maxConcurrentOkay = true
} else {
- maxConcurrentOkay = int64(len(cc.streams)+cc.streamsReserved+1) <= int64(cc.maxConcurrentStreams)
+ // We can take a new request if the total of
+ // - active streams;
+ // - reservation slots for new streams; and
+ // - streams for which we have sent a RST_STREAM and a PING,
+ // but received no subsequent frame
+ // is less than the concurrency limit.
+ maxConcurrentOkay = cc.currentRequestCountLocked() < int(cc.maxConcurrentStreams)
}
st.canTakeNewRequest = cc.goAway == nil && !cc.closed && !cc.closing && maxConcurrentOkay &&
!cc.doNotReuse &&
int64(cc.nextStreamID)+2*int64(cc.pendingRequests) < math.MaxInt32 &&
!cc.tooIdleLocked()
+
+ // If this connection has never been used for a request and is closed,
+ // then let it take a request (which will fail).
+ // If the conn was closed for idleness, we're racing the idle timer;
+ // don't try to use the conn. (Issue #70515.)
+ //
+ // This avoids a situation where an error early in a connection's lifetime
+ // goes unreported.
+ if cc.nextStreamID == 1 && cc.streamsReserved == 0 && cc.closed && !cc.closedOnIdle {
+ st.canTakeNewRequest = true
+ }
+
return
}
+// currentRequestCountLocked reports the number of concurrency slots currently in use,
+// including active streams, reserved slots, and reset streams waiting for acknowledgement.
+func (cc *ClientConn) currentRequestCountLocked() int {
+ return len(cc.streams) + cc.streamsReserved + cc.pendingResets
+}
+
func (cc *ClientConn) canTakeNewRequestLocked() bool {
st := cc.idleStateLocked()
return st.canTakeNewRequest
@@ -1053,7 +1120,7 @@ func (cc *ClientConn) tooIdleLocked() bool {
// times are compared based on their wall time. We don't want
// to reuse a connection that's been sitting idle during
// VM/laptop suspend if monotonic time was also frozen.
- return cc.idleTimeout != 0 && !cc.lastIdle.IsZero() && time.Since(cc.lastIdle.Round(0)) > cc.idleTimeout
+ return cc.idleTimeout != 0 && !cc.lastIdle.IsZero() && cc.t.timeSince(cc.lastIdle.Round(0)) > cc.idleTimeout
}
// onIdleTimeout is called from a time.AfterFunc goroutine. It will
@@ -1091,6 +1158,7 @@ func (cc *ClientConn) closeIfIdle() {
return
}
cc.closed = true
+ cc.closedOnIdle = true
nextID := cc.nextStreamID
// TODO: do clients send GOAWAY too? maybe? Just Close:
cc.mu.Unlock()
@@ -1207,23 +1275,6 @@ func (cc *ClientConn) closeForLostPing() {
// exported. At least they'll be DeepEqual for h1-vs-h2 comparisons tests.
var errRequestCanceled = errors.New("net/http: request canceled")
-func commaSeparatedTrailers(req *http.Request) (string, error) {
- keys := make([]string, 0, len(req.Trailer))
- for k := range req.Trailer {
- k = canonicalHeader(k)
- switch k {
- case "Transfer-Encoding", "Trailer", "Content-Length":
- return "", fmt.Errorf("invalid Trailer key %q", k)
- }
- keys = append(keys, k)
- }
- if len(keys) > 0 {
- sort.Strings(keys)
- return strings.Join(keys, ","), nil
- }
- return "", nil
-}
-
func (cc *ClientConn) responseHeaderTimeout() time.Duration {
if cc.t.t1 != nil {
return cc.t.t1.ResponseHeaderTimeout
@@ -1235,22 +1286,6 @@ func (cc *ClientConn) responseHeaderTimeout() time.Duration {
return 0
}
-// checkConnHeaders checks whether req has any invalid connection-level headers.
-// per RFC 7540 section 8.1.2.2: Connection-Specific Header Fields.
-// Certain headers are special-cased as okay but not transmitted later.
-func checkConnHeaders(req *http.Request) error {
- if v := req.Header.Get("Upgrade"); v != "" {
- return fmt.Errorf("http2: invalid Upgrade request header: %q", req.Header["Upgrade"])
- }
- if vv := req.Header["Transfer-Encoding"]; len(vv) > 0 && (len(vv) > 1 || vv[0] != "" && vv[0] != "chunked") {
- return fmt.Errorf("http2: invalid Transfer-Encoding request header: %q", vv)
- }
- if vv := req.Header["Connection"]; len(vv) > 0 && (len(vv) > 1 || vv[0] != "" && !asciiEqualFold(vv[0], "close") && !asciiEqualFold(vv[0], "keep-alive")) {
- return fmt.Errorf("http2: invalid Connection request header: %q", vv)
- }
- return nil
-}
-
// actualContentLength returns a sanitized version of
// req.ContentLength, where 0 actually means zero (not unknown) and -1
// means unknown.
@@ -1296,25 +1331,7 @@ func (cc *ClientConn) roundTrip(req *http.Request, streamf func(*clientStream))
donec: make(chan struct{}),
}
- // TODO(bradfitz): this is a copy of the logic in net/http. Unify somewhere?
- if !cc.t.disableCompression() &&
- req.Header.Get("Accept-Encoding") == "" &&
- req.Header.Get("Range") == "" &&
- !cs.isHead {
- // Request gzip only, not deflate. Deflate is ambiguous and
- // not as universally supported anyway.
- // See: https://zlib.net/zlib_faq.html#faq39
- //
- // Note that we don't request this for HEAD requests,
- // due to a bug in nginx:
- // http://trac.nginx.org/nginx/ticket/358
- // https://golang.org/issue/5522
- //
- // We don't request gzip if the request is for a range, since
- // auto-decoding a portion of a gzipped document will just fail
- // anyway. See https://golang.org/issue/8923
- cs.requestedGzip = true
- }
+ cs.requestedGzip = httpcommon.IsRequestGzip(req.Method, req.Header, cc.t.disableCompression())
go cs.doRequest(req, streamf)
@@ -1415,6 +1432,8 @@ func (cs *clientStream) doRequest(req *http.Request, streamf func(*clientStream)
cs.cleanupWriteRequest(err)
}
+var errExtendedConnectNotSupported = errors.New("net/http: extended connect not supported by peer")
+
// writeRequest sends a request.
//
// It returns nil after the request is written, the response read,
@@ -1426,8 +1445,11 @@ func (cs *clientStream) writeRequest(req *http.Request, streamf func(*clientStre
cc := cs.cc
ctx := cs.ctx
- if err := checkConnHeaders(req); err != nil {
- return err
+ // wait for setting frames to be received, a server can change this value later,
+ // but we just wait for the first settings frame
+ var isExtendedConnect bool
+ if req.Method == "CONNECT" && req.Header.Get(":protocol") != "" {
+ isExtendedConnect = true
}
// Acquire the new-request lock by writing to reqHeaderMu.
@@ -1436,6 +1458,18 @@ func (cs *clientStream) writeRequest(req *http.Request, streamf func(*clientStre
if cc.reqHeaderMu == nil {
panic("RoundTrip on uninitialized ClientConn") // for tests
}
+ if isExtendedConnect {
+ select {
+ case <-cs.reqCancel:
+ return errRequestCanceled
+ case <-ctx.Done():
+ return ctx.Err()
+ case <-cc.seenSettingsChan:
+ if !cc.extendedConnectAllowed {
+ return errExtendedConnectNotSupported
+ }
+ }
+ }
select {
case cc.reqHeaderMu <- struct{}{}:
case <-cs.reqCancel:
@@ -1574,26 +1608,39 @@ func (cs *clientStream) encodeAndWriteHeaders(req *http.Request) error {
// we send: HEADERS{1}, CONTINUATION{0,} + DATA{0,} (DATA is
// sent by writeRequestBody below, along with any Trailers,
// again in form HEADERS{1}, CONTINUATION{0,})
- trailers, err := commaSeparatedTrailers(req)
- if err != nil {
- return err
- }
- hasTrailers := trailers != ""
- contentLen := actualContentLength(req)
- hasBody := contentLen != 0
- hdrs, err := cc.encodeHeaders(req, cs.requestedGzip, trailers, contentLen)
+ cc.hbuf.Reset()
+ res, err := encodeRequestHeaders(req, cs.requestedGzip, cc.peerMaxHeaderListSize, func(name, value string) {
+ cc.writeHeader(name, value)
+ })
if err != nil {
- return err
+ return fmt.Errorf("http2: %w", err)
}
+ hdrs := cc.hbuf.Bytes()
// Write the request.
- endStream := !hasBody && !hasTrailers
+ endStream := !res.HasBody && !res.HasTrailers
cs.sentHeaders = true
err = cc.writeHeaders(cs.ID, endStream, int(cc.maxFrameSize), hdrs)
traceWroteHeaders(cs.trace)
return err
}
+func encodeRequestHeaders(req *http.Request, addGzipHeader bool, peerMaxHeaderListSize uint64, headerf func(name, value string)) (httpcommon.EncodeHeadersResult, error) {
+ return httpcommon.EncodeHeaders(req.Context(), httpcommon.EncodeHeadersParam{
+ Request: httpcommon.Request{
+ Header: req.Header,
+ Trailer: req.Trailer,
+ URL: req.URL,
+ Host: req.Host,
+ Method: req.Method,
+ ActualContentLength: actualContentLength(req),
+ },
+ AddGzipHeader: addGzipHeader,
+ PeerMaxHeaderListSize: peerMaxHeaderListSize,
+ DefaultUserAgent: defaultUserAgent,
+ }, headerf)
+}
+
// cleanupWriteRequest performs post-request tasks.
//
// If err (the result of writeRequest) is non-nil and the stream is not closed,
@@ -1617,6 +1664,7 @@ func (cs *clientStream) cleanupWriteRequest(err error) {
cs.reqBodyClosed = make(chan struct{})
}
bodyClosed := cs.reqBodyClosed
+ closeOnIdle := cc.singleUse || cc.doNotReuse || cc.t.disableKeepAlives() || cc.goAway != nil
cc.mu.Unlock()
if mustCloseBody {
cs.reqBody.Close()
@@ -1641,16 +1689,44 @@ func (cs *clientStream) cleanupWriteRequest(err error) {
if cs.sentHeaders {
if se, ok := err.(StreamError); ok {
if se.Cause != errFromPeer {
- cc.writeStreamReset(cs.ID, se.Code, err)
+ cc.writeStreamReset(cs.ID, se.Code, false, err)
}
} else {
- cc.writeStreamReset(cs.ID, ErrCodeCancel, err)
+ // We're cancelling an in-flight request.
+ //
+ // This could be due to the server becoming unresponsive.
+ // To avoid sending too many requests on a dead connection,
+ // we let the request continue to consume a concurrency slot
+ // until we can confirm the server is still responding.
+ // We do this by sending a PING frame along with the RST_STREAM
+ // (unless a ping is already in flight).
+ //
+ // For simplicity, we don't bother tracking the PING payload:
+ // We reset cc.pendingResets any time we receive a PING ACK.
+ //
+ // We skip this if the conn is going to be closed on idle,
+ // because it's short lived and will probably be closed before
+ // we get the ping response.
+ ping := false
+ if !closeOnIdle {
+ cc.mu.Lock()
+ // rstStreamPingsBlocked works around a gRPC behavior:
+ // see comment on the field for details.
+ if !cc.rstStreamPingsBlocked {
+ if cc.pendingResets == 0 {
+ ping = true
+ }
+ cc.pendingResets++
+ }
+ cc.mu.Unlock()
+ }
+ cc.writeStreamReset(cs.ID, ErrCodeCancel, ping, err)
}
}
cs.bufPipe.CloseWithError(err) // no-op if already closed
} else {
if cs.sentHeaders && !cs.sentEndStream {
- cc.writeStreamReset(cs.ID, ErrCodeNo, nil)
+ cc.writeStreamReset(cs.ID, ErrCodeNo, false, nil)
}
cs.bufPipe.CloseWithError(errRequestCanceled)
}
@@ -1672,12 +1748,17 @@ func (cs *clientStream) cleanupWriteRequest(err error) {
// Must hold cc.mu.
func (cc *ClientConn) awaitOpenSlotForStreamLocked(cs *clientStream) error {
for {
- cc.lastActive = time.Now()
+ if cc.closed && cc.nextStreamID == 1 && cc.streamsReserved == 0 {
+ // This is the very first request sent to this connection.
+ // Return a fatal error which aborts the retry loop.
+ return errClientConnNotEstablished
+ }
+ cc.lastActive = cc.t.now()
if cc.closed || !cc.canTakeNewRequestLocked() {
return errClientConnUnusable
}
cc.lastIdle = time.Time{}
- if int64(len(cc.streams)) < int64(cc.maxConcurrentStreams) {
+ if cc.currentRequestCountLocked() < int(cc.maxConcurrentStreams) {
return nil
}
cc.pendingRequests++
@@ -1947,214 +2028,6 @@ func (cs *clientStream) awaitFlowControl(maxBytes int) (taken int32, err error)
}
}
-func validateHeaders(hdrs http.Header) string {
- for k, vv := range hdrs {
- if !httpguts.ValidHeaderFieldName(k) {
- return fmt.Sprintf("name %q", k)
- }
- for _, v := range vv {
- if !httpguts.ValidHeaderFieldValue(v) {
- // Don't include the value in the error,
- // because it may be sensitive.
- return fmt.Sprintf("value for header %q", k)
- }
- }
- }
- return ""
-}
-
-var errNilRequestURL = errors.New("http2: Request.URI is nil")
-
-// requires cc.wmu be held.
-func (cc *ClientConn) encodeHeaders(req *http.Request, addGzipHeader bool, trailers string, contentLength int64) ([]byte, error) {
- cc.hbuf.Reset()
- if req.URL == nil {
- return nil, errNilRequestURL
- }
-
- host := req.Host
- if host == "" {
- host = req.URL.Host
- }
- host, err := httpguts.PunycodeHostPort(host)
- if err != nil {
- return nil, err
- }
- if !httpguts.ValidHostHeader(host) {
- return nil, errors.New("http2: invalid Host header")
- }
-
- var path string
- if req.Method != "CONNECT" {
- path = req.URL.RequestURI()
- if !validPseudoPath(path) {
- orig := path
- path = strings.TrimPrefix(path, req.URL.Scheme+"://"+host)
- if !validPseudoPath(path) {
- if req.URL.Opaque != "" {
- return nil, fmt.Errorf("invalid request :path %q from URL.Opaque = %q", orig, req.URL.Opaque)
- } else {
- return nil, fmt.Errorf("invalid request :path %q", orig)
- }
- }
- }
- }
-
- // Check for any invalid headers+trailers and return an error before we
- // potentially pollute our hpack state. (We want to be able to
- // continue to reuse the hpack encoder for future requests)
- if err := validateHeaders(req.Header); err != "" {
- return nil, fmt.Errorf("invalid HTTP header %s", err)
- }
- if err := validateHeaders(req.Trailer); err != "" {
- return nil, fmt.Errorf("invalid HTTP trailer %s", err)
- }
-
- enumerateHeaders := func(f func(name, value string)) {
- // 8.1.2.3 Request Pseudo-Header Fields
- // The :path pseudo-header field includes the path and query parts of the
- // target URI (the path-absolute production and optionally a '?' character
- // followed by the query production, see Sections 3.3 and 3.4 of
- // [RFC3986]).
- f(":authority", host)
- m := req.Method
- if m == "" {
- m = http.MethodGet
- }
- f(":method", m)
- if req.Method != "CONNECT" {
- f(":path", path)
- f(":scheme", req.URL.Scheme)
- }
- if trailers != "" {
- f("trailer", trailers)
- }
-
- var didUA bool
- for k, vv := range req.Header {
- if asciiEqualFold(k, "host") || asciiEqualFold(k, "content-length") {
- // Host is :authority, already sent.
- // Content-Length is automatic, set below.
- continue
- } else if asciiEqualFold(k, "connection") ||
- asciiEqualFold(k, "proxy-connection") ||
- asciiEqualFold(k, "transfer-encoding") ||
- asciiEqualFold(k, "upgrade") ||
- asciiEqualFold(k, "keep-alive") {
- // Per 8.1.2.2 Connection-Specific Header
- // Fields, don't send connection-specific
- // fields. We have already checked if any
- // are error-worthy so just ignore the rest.
- continue
- } else if asciiEqualFold(k, "user-agent") {
- // Match Go's http1 behavior: at most one
- // User-Agent. If set to nil or empty string,
- // then omit it. Otherwise if not mentioned,
- // include the default (below).
- didUA = true
- if len(vv) < 1 {
- continue
- }
- vv = vv[:1]
- if vv[0] == "" {
- continue
- }
- } else if asciiEqualFold(k, "cookie") {
- // Per 8.1.2.5 To allow for better compression efficiency, the
- // Cookie header field MAY be split into separate header fields,
- // each with one or more cookie-pairs.
- for _, v := range vv {
- for {
- p := strings.IndexByte(v, ';')
- if p < 0 {
- break
- }
- f("cookie", v[:p])
- p++
- // strip space after semicolon if any.
- for p+1 <= len(v) && v[p] == ' ' {
- p++
- }
- v = v[p:]
- }
- if len(v) > 0 {
- f("cookie", v)
- }
- }
- continue
- }
-
- for _, v := range vv {
- f(k, v)
- }
- }
- if shouldSendReqContentLength(req.Method, contentLength) {
- f("content-length", strconv.FormatInt(contentLength, 10))
- }
- if addGzipHeader {
- f("accept-encoding", "gzip")
- }
- if !didUA {
- f("user-agent", defaultUserAgent)
- }
- }
-
- // Do a first pass over the headers counting bytes to ensure
- // we don't exceed cc.peerMaxHeaderListSize. This is done as a
- // separate pass before encoding the headers to prevent
- // modifying the hpack state.
- hlSize := uint64(0)
- enumerateHeaders(func(name, value string) {
- hf := hpack.HeaderField{Name: name, Value: value}
- hlSize += uint64(hf.Size())
- })
-
- if hlSize > cc.peerMaxHeaderListSize {
- return nil, errRequestHeaderListSize
- }
-
- trace := httptrace.ContextClientTrace(req.Context())
- traceHeaders := traceHasWroteHeaderField(trace)
-
- // Header list size is ok. Write the headers.
- enumerateHeaders(func(name, value string) {
- name, ascii := lowerHeader(name)
- if !ascii {
- // Skip writing invalid headers. Per RFC 7540, Section 8.1.2, header
- // field names have to be ASCII characters (just as in HTTP/1.x).
- return
- }
- cc.writeHeader(name, value)
- if traceHeaders {
- traceWroteHeaderField(trace, name, value)
- }
- })
-
- return cc.hbuf.Bytes(), nil
-}
-
-// shouldSendReqContentLength reports whether the http2.Transport should send
-// a "content-length" request header. This logic is basically a copy of the net/http
-// transferWriter.shouldSendContentLength.
-// The contentLength is the corrected contentLength (so 0 means actually 0, not unknown).
-// -1 means unknown.
-func shouldSendReqContentLength(method string, contentLength int64) bool {
- if contentLength > 0 {
- return true
- }
- if contentLength < 0 {
- return false
- }
- // For zero bodies, whether we send a content-length depends on the method.
- // It also kinda doesn't matter for http2 either way, with END_STREAM.
- switch method {
- case "POST", "PUT", "PATCH":
- return true
- default:
- return false
- }
-}
-
// requires cc.wmu be held.
func (cc *ClientConn) encodeTrailers(trailer http.Header) ([]byte, error) {
cc.hbuf.Reset()
@@ -2171,7 +2044,7 @@ func (cc *ClientConn) encodeTrailers(trailer http.Header) ([]byte, error) {
}
for k, vv := range trailer {
- lowKey, ascii := lowerHeader(k)
+ lowKey, ascii := httpcommon.LowerHeader(k)
if !ascii {
// Skip writing invalid headers. Per RFC 7540, Section 8.1.2, header
// field names have to be ASCII characters (just as in HTTP/1.x).
@@ -2203,7 +2076,7 @@ type resAndError struct {
func (cc *ClientConn) addStreamLocked(cs *clientStream) {
cs.flow.add(int32(cc.initialWindowSize))
cs.flow.setConnFlow(&cc.flow)
- cs.inflow.init(transportDefaultStreamFlow)
+ cs.inflow.init(cc.initialStreamRecvWindowSize)
cs.ID = cc.nextStreamID
cc.nextStreamID += 2
cc.streams[cs.ID] = cs
@@ -2219,10 +2092,10 @@ func (cc *ClientConn) forgetStreamID(id uint32) {
if len(cc.streams) != slen-1 {
panic("forgetting unknown stream id")
}
- cc.lastActive = time.Now()
+ cc.lastActive = cc.t.now()
if len(cc.streams) == 0 && cc.idleTimer != nil {
cc.idleTimer.Reset(cc.idleTimeout)
- cc.lastIdle = time.Now()
+ cc.lastIdle = cc.t.now()
}
// Wake up writeRequestBody via clientStream.awaitFlowControl and
// wake up RoundTrip if there is a pending request.
@@ -2282,7 +2155,6 @@ func isEOFOrNetReadError(err error) bool {
func (rl *clientConnReadLoop) cleanup() {
cc := rl.cc
- cc.t.connPool().MarkDead(cc)
defer cc.closeConn()
defer close(cc.readerDone)
@@ -2306,6 +2178,27 @@ func (rl *clientConnReadLoop) cleanup() {
}
cc.closed = true
+ // If the connection has never been used, and has been open for only a short time,
+ // leave it in the connection pool for a little while.
+ //
+ // This avoids a situation where new connections are constantly created,
+ // added to the pool, fail, and are removed from the pool, without any error
+ // being surfaced to the user.
+ unusedWaitTime := 5 * time.Second
+ if cc.idleTimeout > 0 && unusedWaitTime > cc.idleTimeout {
+ unusedWaitTime = cc.idleTimeout
+ }
+ idleTime := cc.t.now().Sub(cc.lastActive)
+ if atomic.LoadUint32(&cc.atomicReused) == 0 && idleTime < unusedWaitTime && !cc.closedOnIdle {
+ cc.idleTimer = cc.t.afterFunc(unusedWaitTime-idleTime, func() {
+ cc.t.connPool().MarkDead(cc)
+ })
+ } else {
+ cc.mu.Unlock() // avoid any deadlocks in MarkDead
+ cc.t.connPool().MarkDead(cc)
+ cc.mu.Lock()
+ }
+
for _, cs := range cc.streams {
select {
case <-cs.peerClosed:
@@ -2317,6 +2210,13 @@ func (rl *clientConnReadLoop) cleanup() {
}
cc.cond.Broadcast()
cc.mu.Unlock()
+
+ if !cc.seenSettings {
+ // If we have a pending request that wants extended CONNECT,
+ // let it continue and fail with the connection error.
+ cc.extendedConnectAllowed = true
+ close(cc.seenSettingsChan)
+ }
}
// countReadFrameError calls Transport.CountError with a string
@@ -2349,7 +2249,7 @@ func (cc *ClientConn) countReadFrameError(err error) {
func (rl *clientConnReadLoop) run() error {
cc := rl.cc
gotSettings := false
- readIdleTimeout := cc.t.ReadIdleTimeout
+ readIdleTimeout := cc.readIdleTimeout
var t timer
if readIdleTimeout != 0 {
t = cc.t.afterFunc(readIdleTimeout, cc.healthCheck)
@@ -2363,7 +2263,7 @@ func (rl *clientConnReadLoop) run() error {
cc.vlogf("http2: Transport readFrame error on conn %p: (%T) %v", cc, err, err)
}
if se, ok := err.(StreamError); ok {
- if cs := rl.streamByID(se.StreamID); cs != nil {
+ if cs := rl.streamByID(se.StreamID, notHeaderOrDataFrame); cs != nil {
if se.Cause == nil {
se.Cause = cc.fr.errDetail
}
@@ -2415,7 +2315,7 @@ func (rl *clientConnReadLoop) run() error {
}
func (rl *clientConnReadLoop) processHeaders(f *MetaHeadersFrame) error {
- cs := rl.streamByID(f.StreamID)
+ cs := rl.streamByID(f.StreamID, headerOrDataFrame)
if cs == nil {
// We'd get here if we canceled a request while the
// server had its response still in flight. So if this
@@ -2503,7 +2403,7 @@ func (rl *clientConnReadLoop) handleResponse(cs *clientStream, f *MetaHeadersFra
Status: status + " " + http.StatusText(statusCode),
}
for _, hf := range regularFields {
- key := canonicalHeader(hf.Name)
+ key := httpcommon.CanonicalHeader(hf.Name)
if key == "Trailer" {
t := res.Trailer
if t == nil {
@@ -2511,7 +2411,7 @@ func (rl *clientConnReadLoop) handleResponse(cs *clientStream, f *MetaHeadersFra
res.Trailer = t
}
foreachHeaderElement(hf.Value, func(v string) {
- t[canonicalHeader(v)] = nil
+ t[httpcommon.CanonicalHeader(v)] = nil
})
} else {
vv := header[key]
@@ -2533,15 +2433,34 @@ func (rl *clientConnReadLoop) handleResponse(cs *clientStream, f *MetaHeadersFra
if f.StreamEnded() {
return nil, errors.New("1xx informational response with END_STREAM flag")
}
- cs.num1xx++
- const max1xxResponses = 5 // arbitrary bound on number of informational responses, same as net/http
- if cs.num1xx > max1xxResponses {
- return nil, errors.New("http2: too many 1xx informational responses")
- }
if fn := cs.get1xxTraceFunc(); fn != nil {
+ // If the 1xx response is being delivered to the user,
+ // then they're responsible for limiting the number
+ // of responses.
if err := fn(statusCode, textproto.MIMEHeader(header)); err != nil {
return nil, err
}
+ } else {
+ // If the user didn't examine the 1xx response, then we
+ // limit the size of all 1xx headers.
+ //
+ // This differs a bit from the HTTP/1 implementation, which
+ // limits the size of all 1xx headers plus the final response.
+ // Use the larger limit of MaxHeaderListSize and
+ // net/http.Transport.MaxResponseHeaderBytes.
+ limit := int64(cs.cc.t.maxHeaderListSize())
+ if t1 := cs.cc.t.t1; t1 != nil && t1.MaxResponseHeaderBytes > limit {
+ limit = t1.MaxResponseHeaderBytes
+ }
+ for _, h := range f.Fields {
+ cs.totalHeaderSize += int64(h.Size())
+ }
+ if cs.totalHeaderSize > limit {
+ if VerboseLogs {
+ log.Printf("http2: 1xx informational responses too large")
+ }
+ return nil, errors.New("header list too large")
+ }
}
if statusCode == 100 {
traceGot100Continue(cs.trace)
@@ -2616,7 +2535,7 @@ func (rl *clientConnReadLoop) processTrailers(cs *clientStream, f *MetaHeadersFr
trailer := make(http.Header)
for _, hf := range f.RegularFields() {
- key := canonicalHeader(hf.Name)
+ key := httpcommon.CanonicalHeader(hf.Name)
trailer[key] = append(trailer[key], hf.Value)
}
cs.trailer = trailer
@@ -2725,7 +2644,7 @@ func (b transportResponseBody) Close() error {
func (rl *clientConnReadLoop) processData(f *DataFrame) error {
cc := rl.cc
- cs := rl.streamByID(f.StreamID)
+ cs := rl.streamByID(f.StreamID, headerOrDataFrame)
data := f.Data()
if cs == nil {
cc.mu.Lock()
@@ -2860,9 +2779,22 @@ func (rl *clientConnReadLoop) endStreamError(cs *clientStream, err error) {
cs.abortStream(err)
}
-func (rl *clientConnReadLoop) streamByID(id uint32) *clientStream {
+// Constants passed to streamByID for documentation purposes.
+const (
+ headerOrDataFrame = true
+ notHeaderOrDataFrame = false
+)
+
+// streamByID returns the stream with the given id, or nil if no stream has that id.
+// If headerOrData is true, it clears rst.StreamPingsBlocked.
+func (rl *clientConnReadLoop) streamByID(id uint32, headerOrData bool) *clientStream {
rl.cc.mu.Lock()
defer rl.cc.mu.Unlock()
+ if headerOrData {
+ // Work around an unfortunate gRPC behavior.
+ // See comment on ClientConn.rstStreamPingsBlocked for details.
+ rl.cc.rstStreamPingsBlocked = false
+ }
cs := rl.cc.streams[id]
if cs != nil && !cs.readAborted {
return cs
@@ -2956,6 +2888,21 @@ func (rl *clientConnReadLoop) processSettingsNoWrite(f *SettingsFrame) error {
case SettingHeaderTableSize:
cc.henc.SetMaxDynamicTableSize(s.Val)
cc.peerMaxHeaderTableSize = s.Val
+ case SettingEnableConnectProtocol:
+ if err := s.Valid(); err != nil {
+ return err
+ }
+ // If the peer wants to send us SETTINGS_ENABLE_CONNECT_PROTOCOL,
+ // we require that it do so in the first SETTINGS frame.
+ //
+ // When we attempt to use extended CONNECT, we wait for the first
+ // SETTINGS frame to see if the server supports it. If we let the
+ // server enable the feature with a later SETTINGS frame, then
+ // users will see inconsistent results depending on whether we've
+ // seen that frame or not.
+ if !cc.seenSettings {
+ cc.extendedConnectAllowed = s.Val == 1
+ }
default:
cc.vlogf("Unhandled Setting: %v", s)
}
@@ -2973,6 +2920,7 @@ func (rl *clientConnReadLoop) processSettingsNoWrite(f *SettingsFrame) error {
// connection can establish to our default.
cc.maxConcurrentStreams = defaultMaxConcurrentStreams
}
+ close(cc.seenSettingsChan)
cc.seenSettings = true
}
@@ -2981,7 +2929,7 @@ func (rl *clientConnReadLoop) processSettingsNoWrite(f *SettingsFrame) error {
func (rl *clientConnReadLoop) processWindowUpdate(f *WindowUpdateFrame) error {
cc := rl.cc
- cs := rl.streamByID(f.StreamID)
+ cs := rl.streamByID(f.StreamID, notHeaderOrDataFrame)
if f.StreamID != 0 && cs == nil {
return nil
}
@@ -3010,7 +2958,7 @@ func (rl *clientConnReadLoop) processWindowUpdate(f *WindowUpdateFrame) error {
}
func (rl *clientConnReadLoop) processResetStream(f *RSTStreamFrame) error {
- cs := rl.streamByID(f.StreamID)
+ cs := rl.streamByID(f.StreamID, notHeaderOrDataFrame)
if cs == nil {
// TODO: return error if server tries to RST_STREAM an idle stream
return nil
@@ -3085,6 +3033,12 @@ func (rl *clientConnReadLoop) processPing(f *PingFrame) error {
close(c)
delete(cc.pings, f.Data)
}
+ if cc.pendingResets > 0 {
+ // See clientStream.cleanupWriteRequest.
+ cc.pendingResets = 0
+ cc.rstStreamPingsBlocked = true
+ cc.cond.Broadcast()
+ }
return nil
}
cc := rl.cc
@@ -3107,20 +3061,27 @@ func (rl *clientConnReadLoop) processPushPromise(f *PushPromiseFrame) error {
return ConnectionError(ErrCodeProtocol)
}
-func (cc *ClientConn) writeStreamReset(streamID uint32, code ErrCode, err error) {
+// writeStreamReset sends a RST_STREAM frame.
+// When ping is true, it also sends a PING frame with a random payload.
+func (cc *ClientConn) writeStreamReset(streamID uint32, code ErrCode, ping bool, err error) {
// TODO: map err to more interesting error codes, once the
// HTTP community comes up with some. But currently for
// RST_STREAM there's no equivalent to GOAWAY frame's debug
// data, and the error codes are all pretty vague ("cancel").
cc.wmu.Lock()
cc.fr.WriteRSTStream(streamID, code)
+ if ping {
+ var payload [8]byte
+ rand.Read(payload[:])
+ cc.fr.WritePing(false, payload)
+ }
cc.bw.Flush()
cc.wmu.Unlock()
}
var (
errResponseHeaderListSize = errors.New("http2: response header list larger than advertised limit")
- errRequestHeaderListSize = errors.New("http2: request header list larger than peer's advertised limit")
+ errRequestHeaderListSize = httpcommon.ErrRequestHeaderListSize
)
func (cc *ClientConn) logf(format string, args ...interface{}) {
@@ -3267,7 +3228,7 @@ func traceGotConn(req *http.Request, cc *ClientConn, reused bool) {
cc.mu.Lock()
ci.WasIdle = len(cc.streams) == 0 && reused
if ci.WasIdle && !cc.lastActive.IsZero() {
- ci.IdleTime = time.Since(cc.lastActive)
+ ci.IdleTime = cc.t.timeSince(cc.lastActive)
}
cc.mu.Unlock()
@@ -3304,16 +3265,6 @@ func traceFirstResponseByte(trace *httptrace.ClientTrace) {
}
}
-func traceHasWroteHeaderField(trace *httptrace.ClientTrace) bool {
- return trace != nil && trace.WroteHeaderField != nil
-}
-
-func traceWroteHeaderField(trace *httptrace.ClientTrace, k, v string) {
- if trace != nil && trace.WroteHeaderField != nil {
- trace.WroteHeaderField(k, []string{v})
- }
-}
-
func traceGot1xxResponseFunc(trace *httptrace.ClientTrace) func(int, textproto.MIMEHeader) error {
if trace != nil {
return trace.Got1xxResponse
diff --git a/vendor/golang.org/x/net/http2/unencrypted.go b/vendor/golang.org/x/net/http2/unencrypted.go
new file mode 100644
index 0000000000..b2de211613
--- /dev/null
+++ b/vendor/golang.org/x/net/http2/unencrypted.go
@@ -0,0 +1,32 @@
+// Copyright 2024 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package http2
+
+import (
+ "crypto/tls"
+ "errors"
+ "net"
+)
+
+const nextProtoUnencryptedHTTP2 = "unencrypted_http2"
+
+// unencryptedNetConnFromTLSConn retrieves a net.Conn wrapped in a *tls.Conn.
+//
+// TLSNextProto functions accept a *tls.Conn.
+//
+// When passing an unencrypted HTTP/2 connection to a TLSNextProto function,
+// we pass a *tls.Conn with an underlying net.Conn containing the unencrypted connection.
+// To be extra careful about mistakes (accidentally dropping TLS encryption in a place
+// where we want it), the tls.Conn contains a net.Conn with an UnencryptedNetConn method
+// that returns the actual connection we want to use.
+func unencryptedNetConnFromTLSConn(tc *tls.Conn) (net.Conn, error) {
+ conner, ok := tc.NetConn().(interface {
+ UnencryptedNetConn() net.Conn
+ })
+ if !ok {
+ return nil, errors.New("http2: TLS conn unexpectedly found in unencrypted handoff")
+ }
+ return conner.UnencryptedNetConn(), nil
+}
diff --git a/vendor/golang.org/x/net/http2/write.go b/vendor/golang.org/x/net/http2/write.go
index 33f61398a1..fdb35b9477 100644
--- a/vendor/golang.org/x/net/http2/write.go
+++ b/vendor/golang.org/x/net/http2/write.go
@@ -13,6 +13,7 @@ import (
"golang.org/x/net/http/httpguts"
"golang.org/x/net/http2/hpack"
+ "golang.org/x/net/internal/httpcommon"
)
// writeFramer is implemented by any type that is used to write frames.
@@ -131,6 +132,16 @@ func (se StreamError) writeFrame(ctx writeContext) error {
func (se StreamError) staysWithinBuffer(max int) bool { return frameHeaderLen+4 <= max }
+type writePing struct {
+ data [8]byte
+}
+
+func (w writePing) writeFrame(ctx writeContext) error {
+ return ctx.Framer().WritePing(false, w.data)
+}
+
+func (w writePing) staysWithinBuffer(max int) bool { return frameHeaderLen+len(w.data) <= max }
+
type writePingAck struct{ pf *PingFrame }
func (w writePingAck) writeFrame(ctx writeContext) error {
@@ -341,7 +352,7 @@ func encodeHeaders(enc *hpack.Encoder, h http.Header, keys []string) {
}
for _, k := range keys {
vv := h[k]
- k, ascii := lowerHeader(k)
+ k, ascii := httpcommon.LowerHeader(k)
if !ascii {
// Skip writing invalid headers. Per RFC 7540, Section 8.1.2, header
// field names have to be ASCII characters (just as in HTTP/1.x).
diff --git a/vendor/golang.org/x/net/internal/httpcommon/ascii.go b/vendor/golang.org/x/net/internal/httpcommon/ascii.go
new file mode 100644
index 0000000000..ed14da5afc
--- /dev/null
+++ b/vendor/golang.org/x/net/internal/httpcommon/ascii.go
@@ -0,0 +1,53 @@
+// Copyright 2025 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package httpcommon
+
+import "strings"
+
+// The HTTP protocols are defined in terms of ASCII, not Unicode. This file
+// contains helper functions which may use Unicode-aware functions which would
+// otherwise be unsafe and could introduce vulnerabilities if used improperly.
+
+// asciiEqualFold is strings.EqualFold, ASCII only. It reports whether s and t
+// are equal, ASCII-case-insensitively.
+func asciiEqualFold(s, t string) bool {
+ if len(s) != len(t) {
+ return false
+ }
+ for i := 0; i < len(s); i++ {
+ if lower(s[i]) != lower(t[i]) {
+ return false
+ }
+ }
+ return true
+}
+
+// lower returns the ASCII lowercase version of b.
+func lower(b byte) byte {
+ if 'A' <= b && b <= 'Z' {
+ return b + ('a' - 'A')
+ }
+ return b
+}
+
+// isASCIIPrint returns whether s is ASCII and printable according to
+// https://tools.ietf.org/html/rfc20#section-4.2.
+func isASCIIPrint(s string) bool {
+ for i := 0; i < len(s); i++ {
+ if s[i] < ' ' || s[i] > '~' {
+ return false
+ }
+ }
+ return true
+}
+
+// asciiToLower returns the lowercase version of s if s is ASCII and printable,
+// and whether or not it was.
+func asciiToLower(s string) (lower string, ok bool) {
+ if !isASCIIPrint(s) {
+ return "", false
+ }
+ return strings.ToLower(s), true
+}
diff --git a/vendor/golang.org/x/net/http2/headermap.go b/vendor/golang.org/x/net/internal/httpcommon/headermap.go
similarity index 74%
rename from vendor/golang.org/x/net/http2/headermap.go
rename to vendor/golang.org/x/net/internal/httpcommon/headermap.go
index 149b3dd20e..92483d8e41 100644
--- a/vendor/golang.org/x/net/http2/headermap.go
+++ b/vendor/golang.org/x/net/internal/httpcommon/headermap.go
@@ -1,11 +1,11 @@
-// Copyright 2014 The Go Authors. All rights reserved.
+// Copyright 2025 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
-package http2
+package httpcommon
import (
- "net/http"
+ "net/textproto"
"sync"
)
@@ -82,13 +82,15 @@ func buildCommonHeaderMaps() {
commonLowerHeader = make(map[string]string, len(common))
commonCanonHeader = make(map[string]string, len(common))
for _, v := range common {
- chk := http.CanonicalHeaderKey(v)
+ chk := textproto.CanonicalMIMEHeaderKey(v)
commonLowerHeader[chk] = v
commonCanonHeader[v] = chk
}
}
-func lowerHeader(v string) (lower string, ascii bool) {
+// LowerHeader returns the lowercase form of a header name,
+// used on the wire for HTTP/2 and HTTP/3 requests.
+func LowerHeader(v string) (lower string, ascii bool) {
buildCommonHeaderMapsOnce()
if s, ok := commonLowerHeader[v]; ok {
return s, true
@@ -96,10 +98,18 @@ func lowerHeader(v string) (lower string, ascii bool) {
return asciiToLower(v)
}
-func canonicalHeader(v string) string {
+// CanonicalHeader canonicalizes a header name. (For example, "host" becomes "Host".)
+func CanonicalHeader(v string) string {
buildCommonHeaderMapsOnce()
if s, ok := commonCanonHeader[v]; ok {
return s
}
- return http.CanonicalHeaderKey(v)
+ return textproto.CanonicalMIMEHeaderKey(v)
+}
+
+// CachedCanonicalHeader returns the canonical form of a well-known header name.
+func CachedCanonicalHeader(v string) (string, bool) {
+ buildCommonHeaderMapsOnce()
+ s, ok := commonCanonHeader[v]
+ return s, ok
}
diff --git a/vendor/golang.org/x/net/internal/httpcommon/request.go b/vendor/golang.org/x/net/internal/httpcommon/request.go
new file mode 100644
index 0000000000..4b70553179
--- /dev/null
+++ b/vendor/golang.org/x/net/internal/httpcommon/request.go
@@ -0,0 +1,467 @@
+// Copyright 2025 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package httpcommon
+
+import (
+ "context"
+ "errors"
+ "fmt"
+ "net/http/httptrace"
+ "net/textproto"
+ "net/url"
+ "sort"
+ "strconv"
+ "strings"
+
+ "golang.org/x/net/http/httpguts"
+ "golang.org/x/net/http2/hpack"
+)
+
+var (
+ ErrRequestHeaderListSize = errors.New("request header list larger than peer's advertised limit")
+)
+
+// Request is a subset of http.Request.
+// It'd be simpler to pass an *http.Request, of course, but we can't depend on net/http
+// without creating a dependency cycle.
+type Request struct {
+ URL *url.URL
+ Method string
+ Host string
+ Header map[string][]string
+ Trailer map[string][]string
+ ActualContentLength int64 // 0 means 0, -1 means unknown
+}
+
+// EncodeHeadersParam is parameters to EncodeHeaders.
+type EncodeHeadersParam struct {
+ Request Request
+
+ // AddGzipHeader indicates that an "accept-encoding: gzip" header should be
+ // added to the request.
+ AddGzipHeader bool
+
+ // PeerMaxHeaderListSize, when non-zero, is the peer's MAX_HEADER_LIST_SIZE setting.
+ PeerMaxHeaderListSize uint64
+
+ // DefaultUserAgent is the User-Agent header to send when the request
+ // neither contains a User-Agent nor disables it.
+ DefaultUserAgent string
+}
+
+// EncodeHeadersParam is the result of EncodeHeaders.
+type EncodeHeadersResult struct {
+ HasBody bool
+ HasTrailers bool
+}
+
+// EncodeHeaders constructs request headers common to HTTP/2 and HTTP/3.
+// It validates a request and calls headerf with each pseudo-header and header
+// for the request.
+// The headerf function is called with the validated, canonicalized header name.
+func EncodeHeaders(ctx context.Context, param EncodeHeadersParam, headerf func(name, value string)) (res EncodeHeadersResult, _ error) {
+ req := param.Request
+
+ // Check for invalid connection-level headers.
+ if err := checkConnHeaders(req.Header); err != nil {
+ return res, err
+ }
+
+ if req.URL == nil {
+ return res, errors.New("Request.URL is nil")
+ }
+
+ host := req.Host
+ if host == "" {
+ host = req.URL.Host
+ }
+ host, err := httpguts.PunycodeHostPort(host)
+ if err != nil {
+ return res, err
+ }
+ if !httpguts.ValidHostHeader(host) {
+ return res, errors.New("invalid Host header")
+ }
+
+ // isNormalConnect is true if this is a non-extended CONNECT request.
+ isNormalConnect := false
+ var protocol string
+ if vv := req.Header[":protocol"]; len(vv) > 0 {
+ protocol = vv[0]
+ }
+ if req.Method == "CONNECT" && protocol == "" {
+ isNormalConnect = true
+ } else if protocol != "" && req.Method != "CONNECT" {
+ return res, errors.New("invalid :protocol header in non-CONNECT request")
+ }
+
+ // Validate the path, except for non-extended CONNECT requests which have no path.
+ var path string
+ if !isNormalConnect {
+ path = req.URL.RequestURI()
+ if !validPseudoPath(path) {
+ orig := path
+ path = strings.TrimPrefix(path, req.URL.Scheme+"://"+host)
+ if !validPseudoPath(path) {
+ if req.URL.Opaque != "" {
+ return res, fmt.Errorf("invalid request :path %q from URL.Opaque = %q", orig, req.URL.Opaque)
+ } else {
+ return res, fmt.Errorf("invalid request :path %q", orig)
+ }
+ }
+ }
+ }
+
+ // Check for any invalid headers+trailers and return an error before we
+ // potentially pollute our hpack state. (We want to be able to
+ // continue to reuse the hpack encoder for future requests)
+ if err := validateHeaders(req.Header); err != "" {
+ return res, fmt.Errorf("invalid HTTP header %s", err)
+ }
+ if err := validateHeaders(req.Trailer); err != "" {
+ return res, fmt.Errorf("invalid HTTP trailer %s", err)
+ }
+
+ trailers, err := commaSeparatedTrailers(req.Trailer)
+ if err != nil {
+ return res, err
+ }
+
+ enumerateHeaders := func(f func(name, value string)) {
+ // 8.1.2.3 Request Pseudo-Header Fields
+ // The :path pseudo-header field includes the path and query parts of the
+ // target URI (the path-absolute production and optionally a '?' character
+ // followed by the query production, see Sections 3.3 and 3.4 of
+ // [RFC3986]).
+ f(":authority", host)
+ m := req.Method
+ if m == "" {
+ m = "GET"
+ }
+ f(":method", m)
+ if !isNormalConnect {
+ f(":path", path)
+ f(":scheme", req.URL.Scheme)
+ }
+ if protocol != "" {
+ f(":protocol", protocol)
+ }
+ if trailers != "" {
+ f("trailer", trailers)
+ }
+
+ var didUA bool
+ for k, vv := range req.Header {
+ if asciiEqualFold(k, "host") || asciiEqualFold(k, "content-length") {
+ // Host is :authority, already sent.
+ // Content-Length is automatic, set below.
+ continue
+ } else if asciiEqualFold(k, "connection") ||
+ asciiEqualFold(k, "proxy-connection") ||
+ asciiEqualFold(k, "transfer-encoding") ||
+ asciiEqualFold(k, "upgrade") ||
+ asciiEqualFold(k, "keep-alive") {
+ // Per 8.1.2.2 Connection-Specific Header
+ // Fields, don't send connection-specific
+ // fields. We have already checked if any
+ // are error-worthy so just ignore the rest.
+ continue
+ } else if asciiEqualFold(k, "user-agent") {
+ // Match Go's http1 behavior: at most one
+ // User-Agent. If set to nil or empty string,
+ // then omit it. Otherwise if not mentioned,
+ // include the default (below).
+ didUA = true
+ if len(vv) < 1 {
+ continue
+ }
+ vv = vv[:1]
+ if vv[0] == "" {
+ continue
+ }
+ } else if asciiEqualFold(k, "cookie") {
+ // Per 8.1.2.5 To allow for better compression efficiency, the
+ // Cookie header field MAY be split into separate header fields,
+ // each with one or more cookie-pairs.
+ for _, v := range vv {
+ for {
+ p := strings.IndexByte(v, ';')
+ if p < 0 {
+ break
+ }
+ f("cookie", v[:p])
+ p++
+ // strip space after semicolon if any.
+ for p+1 <= len(v) && v[p] == ' ' {
+ p++
+ }
+ v = v[p:]
+ }
+ if len(v) > 0 {
+ f("cookie", v)
+ }
+ }
+ continue
+ } else if k == ":protocol" {
+ // :protocol pseudo-header was already sent above.
+ continue
+ }
+
+ for _, v := range vv {
+ f(k, v)
+ }
+ }
+ if shouldSendReqContentLength(req.Method, req.ActualContentLength) {
+ f("content-length", strconv.FormatInt(req.ActualContentLength, 10))
+ }
+ if param.AddGzipHeader {
+ f("accept-encoding", "gzip")
+ }
+ if !didUA {
+ f("user-agent", param.DefaultUserAgent)
+ }
+ }
+
+ // Do a first pass over the headers counting bytes to ensure
+ // we don't exceed cc.peerMaxHeaderListSize. This is done as a
+ // separate pass before encoding the headers to prevent
+ // modifying the hpack state.
+ if param.PeerMaxHeaderListSize > 0 {
+ hlSize := uint64(0)
+ enumerateHeaders(func(name, value string) {
+ hf := hpack.HeaderField{Name: name, Value: value}
+ hlSize += uint64(hf.Size())
+ })
+
+ if hlSize > param.PeerMaxHeaderListSize {
+ return res, ErrRequestHeaderListSize
+ }
+ }
+
+ trace := httptrace.ContextClientTrace(ctx)
+
+ // Header list size is ok. Write the headers.
+ enumerateHeaders(func(name, value string) {
+ name, ascii := LowerHeader(name)
+ if !ascii {
+ // Skip writing invalid headers. Per RFC 7540, Section 8.1.2, header
+ // field names have to be ASCII characters (just as in HTTP/1.x).
+ return
+ }
+
+ headerf(name, value)
+
+ if trace != nil && trace.WroteHeaderField != nil {
+ trace.WroteHeaderField(name, []string{value})
+ }
+ })
+
+ res.HasBody = req.ActualContentLength != 0
+ res.HasTrailers = trailers != ""
+ return res, nil
+}
+
+// IsRequestGzip reports whether we should add an Accept-Encoding: gzip header
+// for a request.
+func IsRequestGzip(method string, header map[string][]string, disableCompression bool) bool {
+ // TODO(bradfitz): this is a copy of the logic in net/http. Unify somewhere?
+ if !disableCompression &&
+ len(header["Accept-Encoding"]) == 0 &&
+ len(header["Range"]) == 0 &&
+ method != "HEAD" {
+ // Request gzip only, not deflate. Deflate is ambiguous and
+ // not as universally supported anyway.
+ // See: https://zlib.net/zlib_faq.html#faq39
+ //
+ // Note that we don't request this for HEAD requests,
+ // due to a bug in nginx:
+ // http://trac.nginx.org/nginx/ticket/358
+ // https://golang.org/issue/5522
+ //
+ // We don't request gzip if the request is for a range, since
+ // auto-decoding a portion of a gzipped document will just fail
+ // anyway. See https://golang.org/issue/8923
+ return true
+ }
+ return false
+}
+
+// checkConnHeaders checks whether req has any invalid connection-level headers.
+//
+// https://www.rfc-editor.org/rfc/rfc9114.html#section-4.2-3
+// https://www.rfc-editor.org/rfc/rfc9113.html#section-8.2.2-1
+//
+// Certain headers are special-cased as okay but not transmitted later.
+// For example, we allow "Transfer-Encoding: chunked", but drop the header when encoding.
+func checkConnHeaders(h map[string][]string) error {
+ if vv := h["Upgrade"]; len(vv) > 0 && (vv[0] != "" && vv[0] != "chunked") {
+ return fmt.Errorf("invalid Upgrade request header: %q", vv)
+ }
+ if vv := h["Transfer-Encoding"]; len(vv) > 0 && (len(vv) > 1 || vv[0] != "" && vv[0] != "chunked") {
+ return fmt.Errorf("invalid Transfer-Encoding request header: %q", vv)
+ }
+ if vv := h["Connection"]; len(vv) > 0 && (len(vv) > 1 || vv[0] != "" && !asciiEqualFold(vv[0], "close") && !asciiEqualFold(vv[0], "keep-alive")) {
+ return fmt.Errorf("invalid Connection request header: %q", vv)
+ }
+ return nil
+}
+
+func commaSeparatedTrailers(trailer map[string][]string) (string, error) {
+ keys := make([]string, 0, len(trailer))
+ for k := range trailer {
+ k = CanonicalHeader(k)
+ switch k {
+ case "Transfer-Encoding", "Trailer", "Content-Length":
+ return "", fmt.Errorf("invalid Trailer key %q", k)
+ }
+ keys = append(keys, k)
+ }
+ if len(keys) > 0 {
+ sort.Strings(keys)
+ return strings.Join(keys, ","), nil
+ }
+ return "", nil
+}
+
+// validPseudoPath reports whether v is a valid :path pseudo-header
+// value. It must be either:
+//
+// - a non-empty string starting with '/'
+// - the string '*', for OPTIONS requests.
+//
+// For now this is only used a quick check for deciding when to clean
+// up Opaque URLs before sending requests from the Transport.
+// See golang.org/issue/16847
+//
+// We used to enforce that the path also didn't start with "//", but
+// Google's GFE accepts such paths and Chrome sends them, so ignore
+// that part of the spec. See golang.org/issue/19103.
+func validPseudoPath(v string) bool {
+ return (len(v) > 0 && v[0] == '/') || v == "*"
+}
+
+func validateHeaders(hdrs map[string][]string) string {
+ for k, vv := range hdrs {
+ if !httpguts.ValidHeaderFieldName(k) && k != ":protocol" {
+ return fmt.Sprintf("name %q", k)
+ }
+ for _, v := range vv {
+ if !httpguts.ValidHeaderFieldValue(v) {
+ // Don't include the value in the error,
+ // because it may be sensitive.
+ return fmt.Sprintf("value for header %q", k)
+ }
+ }
+ }
+ return ""
+}
+
+// shouldSendReqContentLength reports whether we should send
+// a "content-length" request header. This logic is basically a copy of the net/http
+// transferWriter.shouldSendContentLength.
+// The contentLength is the corrected contentLength (so 0 means actually 0, not unknown).
+// -1 means unknown.
+func shouldSendReqContentLength(method string, contentLength int64) bool {
+ if contentLength > 0 {
+ return true
+ }
+ if contentLength < 0 {
+ return false
+ }
+ // For zero bodies, whether we send a content-length depends on the method.
+ // It also kinda doesn't matter for http2 either way, with END_STREAM.
+ switch method {
+ case "POST", "PUT", "PATCH":
+ return true
+ default:
+ return false
+ }
+}
+
+// ServerRequestParam is parameters to NewServerRequest.
+type ServerRequestParam struct {
+ Method string
+ Scheme, Authority, Path string
+ Protocol string
+ Header map[string][]string
+}
+
+// ServerRequestResult is the result of NewServerRequest.
+type ServerRequestResult struct {
+ // Various http.Request fields.
+ URL *url.URL
+ RequestURI string
+ Trailer map[string][]string
+
+ NeedsContinue bool // client provided an "Expect: 100-continue" header
+
+ // If the request should be rejected, this is a short string suitable for passing
+ // to the http2 package's CountError function.
+ // It might be a bit odd to return errors this way rather than returing an error,
+ // but this ensures we don't forget to include a CountError reason.
+ InvalidReason string
+}
+
+func NewServerRequest(rp ServerRequestParam) ServerRequestResult {
+ needsContinue := httpguts.HeaderValuesContainsToken(rp.Header["Expect"], "100-continue")
+ if needsContinue {
+ delete(rp.Header, "Expect")
+ }
+ // Merge Cookie headers into one "; "-delimited value.
+ if cookies := rp.Header["Cookie"]; len(cookies) > 1 {
+ rp.Header["Cookie"] = []string{strings.Join(cookies, "; ")}
+ }
+
+ // Setup Trailers
+ var trailer map[string][]string
+ for _, v := range rp.Header["Trailer"] {
+ for _, key := range strings.Split(v, ",") {
+ key = textproto.CanonicalMIMEHeaderKey(textproto.TrimString(key))
+ switch key {
+ case "Transfer-Encoding", "Trailer", "Content-Length":
+ // Bogus. (copy of http1 rules)
+ // Ignore.
+ default:
+ if trailer == nil {
+ trailer = make(map[string][]string)
+ }
+ trailer[key] = nil
+ }
+ }
+ }
+ delete(rp.Header, "Trailer")
+
+ // "':authority' MUST NOT include the deprecated userinfo subcomponent
+ // for "http" or "https" schemed URIs."
+ // https://www.rfc-editor.org/rfc/rfc9113.html#section-8.3.1-2.3.8
+ if strings.IndexByte(rp.Authority, '@') != -1 && (rp.Scheme == "http" || rp.Scheme == "https") {
+ return ServerRequestResult{
+ InvalidReason: "userinfo_in_authority",
+ }
+ }
+
+ var url_ *url.URL
+ var requestURI string
+ if rp.Method == "CONNECT" && rp.Protocol == "" {
+ url_ = &url.URL{Host: rp.Authority}
+ requestURI = rp.Authority // mimic HTTP/1 server behavior
+ } else {
+ var err error
+ url_, err = url.ParseRequestURI(rp.Path)
+ if err != nil {
+ return ServerRequestResult{
+ InvalidReason: "bad_path",
+ }
+ }
+ requestURI = rp.Path
+ }
+
+ return ServerRequestResult{
+ URL: url_,
+ NeedsContinue: needsContinue,
+ RequestURI: requestURI,
+ Trailer: trailer,
+ }
+}
diff --git a/vendor/golang.org/x/net/proxy/per_host.go b/vendor/golang.org/x/net/proxy/per_host.go
index d7d4b8b6e3..32bdf435ec 100644
--- a/vendor/golang.org/x/net/proxy/per_host.go
+++ b/vendor/golang.org/x/net/proxy/per_host.go
@@ -7,6 +7,7 @@ package proxy
import (
"context"
"net"
+ "net/netip"
"strings"
)
@@ -57,7 +58,8 @@ func (p *PerHost) DialContext(ctx context.Context, network, addr string) (c net.
}
func (p *PerHost) dialerForRequest(host string) Dialer {
- if ip := net.ParseIP(host); ip != nil {
+ if nip, err := netip.ParseAddr(host); err == nil {
+ ip := net.IP(nip.AsSlice())
for _, net := range p.bypassNetworks {
if net.Contains(ip) {
return p.bypass
@@ -108,8 +110,8 @@ func (p *PerHost) AddFromString(s string) {
}
continue
}
- if ip := net.ParseIP(host); ip != nil {
- p.AddIP(ip)
+ if nip, err := netip.ParseAddr(host); err == nil {
+ p.AddIP(net.IP(nip.AsSlice()))
continue
}
if strings.HasPrefix(host, "*.") {
diff --git a/vendor/golang.org/x/net/websocket/websocket.go b/vendor/golang.org/x/net/websocket/websocket.go
index 923a5780ec..3448d20395 100644
--- a/vendor/golang.org/x/net/websocket/websocket.go
+++ b/vendor/golang.org/x/net/websocket/websocket.go
@@ -6,9 +6,10 @@
// as specified in RFC 6455.
//
// This package currently lacks some features found in an alternative
-// and more actively maintained WebSocket package:
+// and more actively maintained WebSocket packages:
//
-// https://pkg.go.dev/nhooyr.io/websocket
+// - [github.com/gorilla/websocket]
+// - [github.com/coder/websocket]
package websocket // import "golang.org/x/net/websocket"
import (
diff --git a/vendor/golang.org/x/sync/LICENSE b/vendor/golang.org/x/sync/LICENSE
index 6a66aea5ea..2a7cf70da6 100644
--- a/vendor/golang.org/x/sync/LICENSE
+++ b/vendor/golang.org/x/sync/LICENSE
@@ -1,4 +1,4 @@
-Copyright (c) 2009 The Go Authors. All rights reserved.
+Copyright 2009 The Go Authors.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
@@ -10,7 +10,7 @@ notice, this list of conditions and the following disclaimer.
copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the
distribution.
- * Neither the name of Google Inc. nor the names of its
+ * Neither the name of Google LLC nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
diff --git a/vendor/golang.org/x/sync/errgroup/errgroup.go b/vendor/golang.org/x/sync/errgroup/errgroup.go
index 948a3ee63d..a4ea5d14f1 100644
--- a/vendor/golang.org/x/sync/errgroup/errgroup.go
+++ b/vendor/golang.org/x/sync/errgroup/errgroup.go
@@ -46,7 +46,7 @@ func (g *Group) done() {
// returns a non-nil error or the first time Wait returns, whichever occurs
// first.
func WithContext(ctx context.Context) (*Group, context.Context) {
- ctx, cancel := withCancelCause(ctx)
+ ctx, cancel := context.WithCancelCause(ctx)
return &Group{cancel: cancel}, ctx
}
@@ -118,6 +118,7 @@ func (g *Group) TryGo(f func() error) bool {
// SetLimit limits the number of active goroutines in this group to at most n.
// A negative value indicates no limit.
+// A limit of zero will prevent any new goroutines from being added.
//
// Any subsequent call to the Go method will block until it can add an active
// goroutine without exceeding the configured limit.
diff --git a/vendor/golang.org/x/sync/errgroup/go120.go b/vendor/golang.org/x/sync/errgroup/go120.go
deleted file mode 100644
index f93c740b63..0000000000
--- a/vendor/golang.org/x/sync/errgroup/go120.go
+++ /dev/null
@@ -1,13 +0,0 @@
-// Copyright 2023 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-//go:build go1.20
-
-package errgroup
-
-import "context"
-
-func withCancelCause(parent context.Context) (context.Context, func(error)) {
- return context.WithCancelCause(parent)
-}
diff --git a/vendor/golang.org/x/sync/errgroup/pre_go120.go b/vendor/golang.org/x/sync/errgroup/pre_go120.go
deleted file mode 100644
index 88ce33434e..0000000000
--- a/vendor/golang.org/x/sync/errgroup/pre_go120.go
+++ /dev/null
@@ -1,14 +0,0 @@
-// Copyright 2023 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-//go:build !go1.20
-
-package errgroup
-
-import "context"
-
-func withCancelCause(parent context.Context) (context.Context, func(error)) {
- ctx, cancel := context.WithCancel(parent)
- return ctx, func(error) { cancel() }
-}
diff --git a/vendor/golang.org/x/sys/LICENSE b/vendor/golang.org/x/sys/LICENSE
index 6a66aea5ea..2a7cf70da6 100644
--- a/vendor/golang.org/x/sys/LICENSE
+++ b/vendor/golang.org/x/sys/LICENSE
@@ -1,4 +1,4 @@
-Copyright (c) 2009 The Go Authors. All rights reserved.
+Copyright 2009 The Go Authors.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
@@ -10,7 +10,7 @@ notice, this list of conditions and the following disclaimer.
copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the
distribution.
- * Neither the name of Google Inc. nor the names of its
+ * Neither the name of Google LLC nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
diff --git a/vendor/golang.org/x/sys/cpu/asm_darwin_x86_gc.s b/vendor/golang.org/x/sys/cpu/asm_darwin_x86_gc.s
new file mode 100644
index 0000000000..ec2acfe540
--- /dev/null
+++ b/vendor/golang.org/x/sys/cpu/asm_darwin_x86_gc.s
@@ -0,0 +1,17 @@
+// Copyright 2024 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build darwin && amd64 && gc
+
+#include "textflag.h"
+
+TEXT libc_sysctl_trampoline<>(SB),NOSPLIT,$0-0
+ JMP libc_sysctl(SB)
+GLOBL ·libc_sysctl_trampoline_addr(SB), RODATA, $8
+DATA ·libc_sysctl_trampoline_addr(SB)/8, $libc_sysctl_trampoline<>(SB)
+
+TEXT libc_sysctlbyname_trampoline<>(SB),NOSPLIT,$0-0
+ JMP libc_sysctlbyname(SB)
+GLOBL ·libc_sysctlbyname_trampoline_addr(SB), RODATA, $8
+DATA ·libc_sysctlbyname_trampoline_addr(SB)/8, $libc_sysctlbyname_trampoline<>(SB)
diff --git a/vendor/golang.org/x/sys/cpu/cpu.go b/vendor/golang.org/x/sys/cpu/cpu.go
index 8fa707aa4b..9c105f23af 100644
--- a/vendor/golang.org/x/sys/cpu/cpu.go
+++ b/vendor/golang.org/x/sys/cpu/cpu.go
@@ -72,6 +72,9 @@ var X86 struct {
HasSSSE3 bool // Supplemental streaming SIMD extension 3
HasSSE41 bool // Streaming SIMD extension 4 and 4.1
HasSSE42 bool // Streaming SIMD extension 4 and 4.2
+ HasAVXIFMA bool // Advanced vector extension Integer Fused Multiply Add
+ HasAVXVNNI bool // Advanced vector extension Vector Neural Network Instructions
+ HasAVXVNNIInt8 bool // Advanced vector extension Vector Neural Network Int8 instructions
_ CacheLinePad
}
@@ -105,6 +108,8 @@ var ARM64 struct {
HasSVE bool // Scalable Vector Extensions
HasSVE2 bool // Scalable Vector Extensions 2
HasASIMDFHM bool // Advanced SIMD multiplication FP16 to FP32
+ HasDIT bool // Data Independent Timing support
+ HasI8MM bool // Advanced SIMD Int8 matrix multiplication instructions
_ CacheLinePad
}
@@ -199,6 +204,25 @@ var S390X struct {
_ CacheLinePad
}
+// RISCV64 contains the supported CPU features and performance characteristics for riscv64
+// platforms. The booleans in RISCV64, with the exception of HasFastMisaligned, indicate
+// the presence of RISC-V extensions.
+//
+// It is safe to assume that all the RV64G extensions are supported and so they are omitted from
+// this structure. As riscv64 Go programs require at least RV64G, the code that populates
+// this structure cannot run successfully if some of the RV64G extensions are missing.
+// The struct is padded to avoid false sharing.
+var RISCV64 struct {
+ _ CacheLinePad
+ HasFastMisaligned bool // Fast misaligned accesses
+ HasC bool // Compressed instruction-set extension
+ HasV bool // Vector extension compatible with RVV 1.0
+ HasZba bool // Address generation instructions extension
+ HasZbb bool // Basic bit-manipulation extension
+ HasZbs bool // Single-bit instructions extension
+ _ CacheLinePad
+}
+
func init() {
archInit()
initOptions()
diff --git a/vendor/golang.org/x/sys/cpu/cpu_arm64.go b/vendor/golang.org/x/sys/cpu/cpu_arm64.go
index 0e27a21e1f..af2aa99f9f 100644
--- a/vendor/golang.org/x/sys/cpu/cpu_arm64.go
+++ b/vendor/golang.org/x/sys/cpu/cpu_arm64.go
@@ -38,6 +38,8 @@ func initOptions() {
{Name: "dcpop", Feature: &ARM64.HasDCPOP},
{Name: "asimddp", Feature: &ARM64.HasASIMDDP},
{Name: "asimdfhm", Feature: &ARM64.HasASIMDFHM},
+ {Name: "dit", Feature: &ARM64.HasDIT},
+ {Name: "i8mm", Feature: &ARM64.HasI8MM},
}
}
@@ -145,6 +147,11 @@ func parseARM64SystemRegisters(isar0, isar1, pfr0 uint64) {
ARM64.HasLRCPC = true
}
+ switch extractBits(isar1, 52, 55) {
+ case 1:
+ ARM64.HasI8MM = true
+ }
+
// ID_AA64PFR0_EL1
switch extractBits(pfr0, 16, 19) {
case 0:
@@ -168,6 +175,11 @@ func parseARM64SystemRegisters(isar0, isar1, pfr0 uint64) {
parseARM64SVERegister(getzfr0())
}
+
+ switch extractBits(pfr0, 48, 51) {
+ case 1:
+ ARM64.HasDIT = true
+ }
}
func parseARM64SVERegister(zfr0 uint64) {
diff --git a/vendor/golang.org/x/sys/cpu/cpu_darwin_x86.go b/vendor/golang.org/x/sys/cpu/cpu_darwin_x86.go
new file mode 100644
index 0000000000..b838cb9e95
--- /dev/null
+++ b/vendor/golang.org/x/sys/cpu/cpu_darwin_x86.go
@@ -0,0 +1,61 @@
+// Copyright 2024 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build darwin && amd64 && gc
+
+package cpu
+
+// darwinSupportsAVX512 checks Darwin kernel for AVX512 support via sysctl
+// call (see issue 43089). It also restricts AVX512 support for Darwin to
+// kernel version 21.3.0 (MacOS 12.2.0) or later (see issue 49233).
+//
+// Background:
+// Darwin implements a special mechanism to economize on thread state when
+// AVX512 specific registers are not in use. This scheme minimizes state when
+// preempting threads that haven't yet used any AVX512 instructions, but adds
+// special requirements to check for AVX512 hardware support at runtime (e.g.
+// via sysctl call or commpage inspection). See issue 43089 and link below for
+// full background:
+// https://github.com/apple-oss-distributions/xnu/blob/xnu-11215.1.10/osfmk/i386/fpu.c#L214-L240
+//
+// Additionally, all versions of the Darwin kernel from 19.6.0 through 21.2.0
+// (corresponding to MacOS 10.15.6 - 12.1) have a bug that can cause corruption
+// of the AVX512 mask registers (K0-K7) upon signal return. For this reason
+// AVX512 is considered unsafe to use on Darwin for kernel versions prior to
+// 21.3.0, where a fix has been confirmed. See issue 49233 for full background.
+func darwinSupportsAVX512() bool {
+ return darwinSysctlEnabled([]byte("hw.optional.avx512f\x00")) && darwinKernelVersionCheck(21, 3, 0)
+}
+
+// Ensure Darwin kernel version is at least major.minor.patch, avoiding dependencies
+func darwinKernelVersionCheck(major, minor, patch int) bool {
+ var release [256]byte
+ err := darwinOSRelease(&release)
+ if err != nil {
+ return false
+ }
+
+ var mmp [3]int
+ c := 0
+Loop:
+ for _, b := range release[:] {
+ switch {
+ case b >= '0' && b <= '9':
+ mmp[c] = 10*mmp[c] + int(b-'0')
+ case b == '.':
+ c++
+ if c > 2 {
+ return false
+ }
+ case b == 0:
+ break Loop
+ default:
+ return false
+ }
+ }
+ if c != 2 {
+ return false
+ }
+ return mmp[0] > major || mmp[0] == major && (mmp[1] > minor || mmp[1] == minor && mmp[2] >= patch)
+}
diff --git a/vendor/golang.org/x/sys/cpu/cpu_gc_x86.go b/vendor/golang.org/x/sys/cpu/cpu_gc_x86.go
index 910728fb16..32a44514e2 100644
--- a/vendor/golang.org/x/sys/cpu/cpu_gc_x86.go
+++ b/vendor/golang.org/x/sys/cpu/cpu_gc_x86.go
@@ -6,10 +6,10 @@
package cpu
-// cpuid is implemented in cpu_x86.s for gc compiler
+// cpuid is implemented in cpu_gc_x86.s for gc compiler
// and in cpu_gccgo.c for gccgo.
func cpuid(eaxArg, ecxArg uint32) (eax, ebx, ecx, edx uint32)
-// xgetbv with ecx = 0 is implemented in cpu_x86.s for gc compiler
+// xgetbv with ecx = 0 is implemented in cpu_gc_x86.s for gc compiler
// and in cpu_gccgo.c for gccgo.
func xgetbv() (eax, edx uint32)
diff --git a/vendor/golang.org/x/sys/cpu/cpu_x86.s b/vendor/golang.org/x/sys/cpu/cpu_gc_x86.s
similarity index 94%
rename from vendor/golang.org/x/sys/cpu/cpu_x86.s
rename to vendor/golang.org/x/sys/cpu/cpu_gc_x86.s
index 7d7ba33efb..ce208ce6d6 100644
--- a/vendor/golang.org/x/sys/cpu/cpu_x86.s
+++ b/vendor/golang.org/x/sys/cpu/cpu_gc_x86.s
@@ -18,7 +18,7 @@ TEXT ·cpuid(SB), NOSPLIT, $0-24
RET
// func xgetbv() (eax, edx uint32)
-TEXT ·xgetbv(SB),NOSPLIT,$0-8
+TEXT ·xgetbv(SB), NOSPLIT, $0-8
MOVL $0, CX
XGETBV
MOVL AX, eax+0(FP)
diff --git a/vendor/golang.org/x/sys/cpu/cpu_gccgo_x86.go b/vendor/golang.org/x/sys/cpu/cpu_gccgo_x86.go
index 99c60fe9f9..170d21ddfd 100644
--- a/vendor/golang.org/x/sys/cpu/cpu_gccgo_x86.go
+++ b/vendor/golang.org/x/sys/cpu/cpu_gccgo_x86.go
@@ -23,9 +23,3 @@ func xgetbv() (eax, edx uint32) {
gccgoXgetbv(&a, &d)
return a, d
}
-
-// gccgo doesn't build on Darwin, per:
-// https://github.com/Homebrew/homebrew-core/blob/HEAD/Formula/gcc.rb#L76
-func darwinSupportsAVX512() bool {
- return false
-}
diff --git a/vendor/golang.org/x/sys/cpu/cpu_linux_arm64.go b/vendor/golang.org/x/sys/cpu/cpu_linux_arm64.go
index 3d386d0fc2..f1caf0f78e 100644
--- a/vendor/golang.org/x/sys/cpu/cpu_linux_arm64.go
+++ b/vendor/golang.org/x/sys/cpu/cpu_linux_arm64.go
@@ -35,8 +35,10 @@ const (
hwcap_SHA512 = 1 << 21
hwcap_SVE = 1 << 22
hwcap_ASIMDFHM = 1 << 23
+ hwcap_DIT = 1 << 24
hwcap2_SVE2 = 1 << 1
+ hwcap2_I8MM = 1 << 13
)
// linuxKernelCanEmulateCPUID reports whether we're running
@@ -106,9 +108,11 @@ func doinit() {
ARM64.HasSHA512 = isSet(hwCap, hwcap_SHA512)
ARM64.HasSVE = isSet(hwCap, hwcap_SVE)
ARM64.HasASIMDFHM = isSet(hwCap, hwcap_ASIMDFHM)
+ ARM64.HasDIT = isSet(hwCap, hwcap_DIT)
// HWCAP2 feature bits
ARM64.HasSVE2 = isSet(hwCap2, hwcap2_SVE2)
+ ARM64.HasI8MM = isSet(hwCap2, hwcap2_I8MM)
}
func isSet(hwc uint, value uint) bool {
diff --git a/vendor/golang.org/x/sys/cpu/cpu_linux_noinit.go b/vendor/golang.org/x/sys/cpu/cpu_linux_noinit.go
index cd63e73355..7d902b6847 100644
--- a/vendor/golang.org/x/sys/cpu/cpu_linux_noinit.go
+++ b/vendor/golang.org/x/sys/cpu/cpu_linux_noinit.go
@@ -2,7 +2,7 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
-//go:build linux && !arm && !arm64 && !mips64 && !mips64le && !ppc64 && !ppc64le && !s390x
+//go:build linux && !arm && !arm64 && !mips64 && !mips64le && !ppc64 && !ppc64le && !s390x && !riscv64
package cpu
diff --git a/vendor/golang.org/x/sys/cpu/cpu_linux_riscv64.go b/vendor/golang.org/x/sys/cpu/cpu_linux_riscv64.go
new file mode 100644
index 0000000000..cb4a0c5728
--- /dev/null
+++ b/vendor/golang.org/x/sys/cpu/cpu_linux_riscv64.go
@@ -0,0 +1,137 @@
+// Copyright 2024 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package cpu
+
+import (
+ "syscall"
+ "unsafe"
+)
+
+// RISC-V extension discovery code for Linux. The approach here is to first try the riscv_hwprobe
+// syscall falling back to HWCAP to check for the C extension if riscv_hwprobe is not available.
+//
+// A note on detection of the Vector extension using HWCAP.
+//
+// Support for the Vector extension version 1.0 was added to the Linux kernel in release 6.5.
+// Support for the riscv_hwprobe syscall was added in 6.4. It follows that if the riscv_hwprobe
+// syscall is not available then neither is the Vector extension (which needs kernel support).
+// The riscv_hwprobe syscall should then be all we need to detect the Vector extension.
+// However, some RISC-V board manufacturers ship boards with an older kernel on top of which
+// they have back-ported various versions of the Vector extension patches but not the riscv_hwprobe
+// patches. These kernels advertise support for the Vector extension using HWCAP. Falling
+// back to HWCAP to detect the Vector extension, if riscv_hwprobe is not available, or simply not
+// bothering with riscv_hwprobe at all and just using HWCAP may then seem like an attractive option.
+//
+// Unfortunately, simply checking the 'V' bit in AT_HWCAP will not work as this bit is used by
+// RISC-V board and cloud instance providers to mean different things. The Lichee Pi 4A board
+// and the Scaleway RV1 cloud instances use the 'V' bit to advertise their support for the unratified
+// 0.7.1 version of the Vector Specification. The Banana Pi BPI-F3 and the CanMV-K230 board use
+// it to advertise support for 1.0 of the Vector extension. Versions 0.7.1 and 1.0 of the Vector
+// extension are binary incompatible. HWCAP can then not be used in isolation to populate the
+// HasV field as this field indicates that the underlying CPU is compatible with RVV 1.0.
+//
+// There is a way at runtime to distinguish between versions 0.7.1 and 1.0 of the Vector
+// specification by issuing a RVV 1.0 vsetvli instruction and checking the vill bit of the vtype
+// register. This check would allow us to safely detect version 1.0 of the Vector extension
+// with HWCAP, if riscv_hwprobe were not available. However, the check cannot
+// be added until the assembler supports the Vector instructions.
+//
+// Note the riscv_hwprobe syscall does not suffer from these ambiguities by design as all of the
+// extensions it advertises support for are explicitly versioned. It's also worth noting that
+// the riscv_hwprobe syscall is the only way to detect multi-letter RISC-V extensions, e.g., Zba.
+// These cannot be detected using HWCAP and so riscv_hwprobe must be used to detect the majority
+// of RISC-V extensions.
+//
+// Please see https://docs.kernel.org/arch/riscv/hwprobe.html for more information.
+
+// golang.org/x/sys/cpu is not allowed to depend on golang.org/x/sys/unix so we must
+// reproduce the constants, types and functions needed to make the riscv_hwprobe syscall
+// here.
+
+const (
+ // Copied from golang.org/x/sys/unix/ztypes_linux_riscv64.go.
+ riscv_HWPROBE_KEY_IMA_EXT_0 = 0x4
+ riscv_HWPROBE_IMA_C = 0x2
+ riscv_HWPROBE_IMA_V = 0x4
+ riscv_HWPROBE_EXT_ZBA = 0x8
+ riscv_HWPROBE_EXT_ZBB = 0x10
+ riscv_HWPROBE_EXT_ZBS = 0x20
+ riscv_HWPROBE_KEY_CPUPERF_0 = 0x5
+ riscv_HWPROBE_MISALIGNED_FAST = 0x3
+ riscv_HWPROBE_MISALIGNED_MASK = 0x7
+)
+
+const (
+ // sys_RISCV_HWPROBE is copied from golang.org/x/sys/unix/zsysnum_linux_riscv64.go.
+ sys_RISCV_HWPROBE = 258
+)
+
+// riscvHWProbePairs is copied from golang.org/x/sys/unix/ztypes_linux_riscv64.go.
+type riscvHWProbePairs struct {
+ key int64
+ value uint64
+}
+
+const (
+ // CPU features
+ hwcap_RISCV_ISA_C = 1 << ('C' - 'A')
+)
+
+func doinit() {
+ // A slice of key/value pair structures is passed to the RISCVHWProbe syscall. The key
+ // field should be initialised with one of the key constants defined above, e.g.,
+ // RISCV_HWPROBE_KEY_IMA_EXT_0. The syscall will set the value field to the appropriate value.
+ // If the kernel does not recognise a key it will set the key field to -1 and the value field to 0.
+
+ pairs := []riscvHWProbePairs{
+ {riscv_HWPROBE_KEY_IMA_EXT_0, 0},
+ {riscv_HWPROBE_KEY_CPUPERF_0, 0},
+ }
+
+ // This call only indicates that extensions are supported if they are implemented on all cores.
+ if riscvHWProbe(pairs, 0) {
+ if pairs[0].key != -1 {
+ v := uint(pairs[0].value)
+ RISCV64.HasC = isSet(v, riscv_HWPROBE_IMA_C)
+ RISCV64.HasV = isSet(v, riscv_HWPROBE_IMA_V)
+ RISCV64.HasZba = isSet(v, riscv_HWPROBE_EXT_ZBA)
+ RISCV64.HasZbb = isSet(v, riscv_HWPROBE_EXT_ZBB)
+ RISCV64.HasZbs = isSet(v, riscv_HWPROBE_EXT_ZBS)
+ }
+ if pairs[1].key != -1 {
+ v := pairs[1].value & riscv_HWPROBE_MISALIGNED_MASK
+ RISCV64.HasFastMisaligned = v == riscv_HWPROBE_MISALIGNED_FAST
+ }
+ }
+
+ // Let's double check with HWCAP if the C extension does not appear to be supported.
+ // This may happen if we're running on a kernel older than 6.4.
+
+ if !RISCV64.HasC {
+ RISCV64.HasC = isSet(hwCap, hwcap_RISCV_ISA_C)
+ }
+}
+
+func isSet(hwc uint, value uint) bool {
+ return hwc&value != 0
+}
+
+// riscvHWProbe is a simplified version of the generated wrapper function found in
+// golang.org/x/sys/unix/zsyscall_linux_riscv64.go. We simplify it by removing the
+// cpuCount and cpus parameters which we do not need. We always want to pass 0 for
+// these parameters here so the kernel only reports the extensions that are present
+// on all cores.
+func riscvHWProbe(pairs []riscvHWProbePairs, flags uint) bool {
+ var _zero uintptr
+ var p0 unsafe.Pointer
+ if len(pairs) > 0 {
+ p0 = unsafe.Pointer(&pairs[0])
+ } else {
+ p0 = unsafe.Pointer(&_zero)
+ }
+
+ _, _, e1 := syscall.Syscall6(sys_RISCV_HWPROBE, uintptr(p0), uintptr(len(pairs)), uintptr(0), uintptr(0), uintptr(flags), 0)
+ return e1 == 0
+}
diff --git a/vendor/golang.org/x/sys/cpu/cpu_other_x86.go b/vendor/golang.org/x/sys/cpu/cpu_other_x86.go
new file mode 100644
index 0000000000..a0fd7e2f75
--- /dev/null
+++ b/vendor/golang.org/x/sys/cpu/cpu_other_x86.go
@@ -0,0 +1,11 @@
+// Copyright 2024 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build 386 || amd64p32 || (amd64 && (!darwin || !gc))
+
+package cpu
+
+func darwinSupportsAVX512() bool {
+ panic("only implemented for gc && amd64 && darwin")
+}
diff --git a/vendor/golang.org/x/sys/cpu/cpu_riscv64.go b/vendor/golang.org/x/sys/cpu/cpu_riscv64.go
index 7f0c79c004..aca3199c91 100644
--- a/vendor/golang.org/x/sys/cpu/cpu_riscv64.go
+++ b/vendor/golang.org/x/sys/cpu/cpu_riscv64.go
@@ -8,4 +8,13 @@ package cpu
const cacheLineSize = 64
-func initOptions() {}
+func initOptions() {
+ options = []option{
+ {Name: "fastmisaligned", Feature: &RISCV64.HasFastMisaligned},
+ {Name: "c", Feature: &RISCV64.HasC},
+ {Name: "v", Feature: &RISCV64.HasV},
+ {Name: "zba", Feature: &RISCV64.HasZba},
+ {Name: "zbb", Feature: &RISCV64.HasZbb},
+ {Name: "zbs", Feature: &RISCV64.HasZbs},
+ }
+}
diff --git a/vendor/golang.org/x/sys/cpu/cpu_x86.go b/vendor/golang.org/x/sys/cpu/cpu_x86.go
index c29f5e4c5a..1e642f3304 100644
--- a/vendor/golang.org/x/sys/cpu/cpu_x86.go
+++ b/vendor/golang.org/x/sys/cpu/cpu_x86.go
@@ -53,6 +53,9 @@ func initOptions() {
{Name: "sse41", Feature: &X86.HasSSE41},
{Name: "sse42", Feature: &X86.HasSSE42},
{Name: "ssse3", Feature: &X86.HasSSSE3},
+ {Name: "avxifma", Feature: &X86.HasAVXIFMA},
+ {Name: "avxvnni", Feature: &X86.HasAVXVNNI},
+ {Name: "avxvnniint8", Feature: &X86.HasAVXVNNIInt8},
// These capabilities should always be enabled on amd64:
{Name: "sse2", Feature: &X86.HasSSE2, Required: runtime.GOARCH == "amd64"},
@@ -92,10 +95,8 @@ func archInit() {
osSupportsAVX = isSet(1, eax) && isSet(2, eax)
if runtime.GOOS == "darwin" {
- // Darwin doesn't save/restore AVX-512 mask registers correctly across signal handlers.
- // Since users can't rely on mask register contents, let's not advertise AVX-512 support.
- // See issue 49233.
- osSupportsAVX512 = false
+ // Darwin requires special AVX512 checks, see cpu_darwin_x86.go
+ osSupportsAVX512 = osSupportsAVX && darwinSupportsAVX512()
} else {
// Check if OPMASK and ZMM registers have OS support.
osSupportsAVX512 = osSupportsAVX && isSet(5, eax) && isSet(6, eax) && isSet(7, eax)
@@ -108,7 +109,7 @@ func archInit() {
return
}
- _, ebx7, ecx7, edx7 := cpuid(7, 0)
+ eax7, ebx7, ecx7, edx7 := cpuid(7, 0)
X86.HasBMI1 = isSet(3, ebx7)
X86.HasAVX2 = isSet(5, ebx7) && osSupportsAVX
X86.HasBMI2 = isSet(8, ebx7)
@@ -136,14 +137,24 @@ func archInit() {
X86.HasAVX512VAES = isSet(9, ecx7)
X86.HasAVX512VBMI2 = isSet(6, ecx7)
X86.HasAVX512BITALG = isSet(12, ecx7)
-
- eax71, _, _, _ := cpuid(7, 1)
- X86.HasAVX512BF16 = isSet(5, eax71)
}
X86.HasAMXTile = isSet(24, edx7)
X86.HasAMXInt8 = isSet(25, edx7)
X86.HasAMXBF16 = isSet(22, edx7)
+
+ // These features depend on the second level of extended features.
+ if eax7 >= 1 {
+ eax71, _, _, edx71 := cpuid(7, 1)
+ if X86.HasAVX512 {
+ X86.HasAVX512BF16 = isSet(5, eax71)
+ }
+ if X86.HasAVX {
+ X86.HasAVXIFMA = isSet(23, eax71)
+ X86.HasAVXVNNI = isSet(4, eax71)
+ X86.HasAVXVNNIInt8 = isSet(4, edx71)
+ }
+ }
}
func isSet(bitpos uint, value uint32) bool {
diff --git a/vendor/golang.org/x/sys/cpu/syscall_darwin_x86_gc.go b/vendor/golang.org/x/sys/cpu/syscall_darwin_x86_gc.go
new file mode 100644
index 0000000000..4d0888b0c0
--- /dev/null
+++ b/vendor/golang.org/x/sys/cpu/syscall_darwin_x86_gc.go
@@ -0,0 +1,98 @@
+// Copyright 2024 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Minimal copy of x/sys/unix so the cpu package can make a
+// system call on Darwin without depending on x/sys/unix.
+
+//go:build darwin && amd64 && gc
+
+package cpu
+
+import (
+ "syscall"
+ "unsafe"
+)
+
+type _C_int int32
+
+// adapted from unix.Uname() at x/sys/unix/syscall_darwin.go L419
+func darwinOSRelease(release *[256]byte) error {
+ // from x/sys/unix/zerrors_openbsd_amd64.go
+ const (
+ CTL_KERN = 0x1
+ KERN_OSRELEASE = 0x2
+ )
+
+ mib := []_C_int{CTL_KERN, KERN_OSRELEASE}
+ n := unsafe.Sizeof(*release)
+
+ return sysctl(mib, &release[0], &n, nil, 0)
+}
+
+type Errno = syscall.Errno
+
+var _zero uintptr // Single-word zero for use when we need a valid pointer to 0 bytes.
+
+// from x/sys/unix/zsyscall_darwin_amd64.go L791-807
+func sysctl(mib []_C_int, old *byte, oldlen *uintptr, new *byte, newlen uintptr) error {
+ var _p0 unsafe.Pointer
+ if len(mib) > 0 {
+ _p0 = unsafe.Pointer(&mib[0])
+ } else {
+ _p0 = unsafe.Pointer(&_zero)
+ }
+ if _, _, err := syscall_syscall6(
+ libc_sysctl_trampoline_addr,
+ uintptr(_p0),
+ uintptr(len(mib)),
+ uintptr(unsafe.Pointer(old)),
+ uintptr(unsafe.Pointer(oldlen)),
+ uintptr(unsafe.Pointer(new)),
+ uintptr(newlen),
+ ); err != 0 {
+ return err
+ }
+
+ return nil
+}
+
+var libc_sysctl_trampoline_addr uintptr
+
+// adapted from internal/cpu/cpu_arm64_darwin.go
+func darwinSysctlEnabled(name []byte) bool {
+ out := int32(0)
+ nout := unsafe.Sizeof(out)
+ if ret := sysctlbyname(&name[0], (*byte)(unsafe.Pointer(&out)), &nout, nil, 0); ret != nil {
+ return false
+ }
+ return out > 0
+}
+
+//go:cgo_import_dynamic libc_sysctl sysctl "/usr/lib/libSystem.B.dylib"
+
+var libc_sysctlbyname_trampoline_addr uintptr
+
+// adapted from runtime/sys_darwin.go in the pattern of sysctl() above, as defined in x/sys/unix
+func sysctlbyname(name *byte, old *byte, oldlen *uintptr, new *byte, newlen uintptr) error {
+ if _, _, err := syscall_syscall6(
+ libc_sysctlbyname_trampoline_addr,
+ uintptr(unsafe.Pointer(name)),
+ uintptr(unsafe.Pointer(old)),
+ uintptr(unsafe.Pointer(oldlen)),
+ uintptr(unsafe.Pointer(new)),
+ uintptr(newlen),
+ 0,
+ ); err != 0 {
+ return err
+ }
+
+ return nil
+}
+
+//go:cgo_import_dynamic libc_sysctlbyname sysctlbyname "/usr/lib/libSystem.B.dylib"
+
+// Implemented in the runtime package (runtime/sys_darwin.go)
+func syscall_syscall6(fn, a1, a2, a3, a4, a5, a6 uintptr) (r1, r2 uintptr, err Errno)
+
+//go:linkname syscall_syscall6 syscall.syscall6
diff --git a/vendor/golang.org/x/sys/unix/README.md b/vendor/golang.org/x/sys/unix/README.md
index 7d3c060e12..6e08a76a71 100644
--- a/vendor/golang.org/x/sys/unix/README.md
+++ b/vendor/golang.org/x/sys/unix/README.md
@@ -156,7 +156,7 @@ from the generated architecture-specific files listed below, and merge these
into a common file for each OS.
The merge is performed in the following steps:
-1. Construct the set of common code that is idential in all architecture-specific files.
+1. Construct the set of common code that is identical in all architecture-specific files.
2. Write this common code to the merged file.
3. Remove the common code from all architecture-specific files.
diff --git a/vendor/golang.org/x/sys/unix/auxv.go b/vendor/golang.org/x/sys/unix/auxv.go
new file mode 100644
index 0000000000..37a82528f5
--- /dev/null
+++ b/vendor/golang.org/x/sys/unix/auxv.go
@@ -0,0 +1,36 @@
+// Copyright 2025 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build go1.21 && (aix || darwin || dragonfly || freebsd || linux || netbsd || openbsd || solaris || zos)
+
+package unix
+
+import (
+ "syscall"
+ "unsafe"
+)
+
+//go:linkname runtime_getAuxv runtime.getAuxv
+func runtime_getAuxv() []uintptr
+
+// Auxv returns the ELF auxiliary vector as a sequence of key/value pairs.
+// The returned slice is always a fresh copy, owned by the caller.
+// It returns an error on non-ELF platforms, or if the auxiliary vector cannot be accessed,
+// which happens in some locked-down environments and build modes.
+func Auxv() ([][2]uintptr, error) {
+ vec := runtime_getAuxv()
+ vecLen := len(vec)
+
+ if vecLen == 0 {
+ return nil, syscall.ENOENT
+ }
+
+ if vecLen%2 != 0 {
+ return nil, syscall.EINVAL
+ }
+
+ result := make([]uintptr, vecLen)
+ copy(result, vec)
+ return unsafe.Slice((*[2]uintptr)(unsafe.Pointer(&result[0])), vecLen/2), nil
+}
diff --git a/vendor/golang.org/x/sys/unix/auxv_unsupported.go b/vendor/golang.org/x/sys/unix/auxv_unsupported.go
new file mode 100644
index 0000000000..1200487f2e
--- /dev/null
+++ b/vendor/golang.org/x/sys/unix/auxv_unsupported.go
@@ -0,0 +1,13 @@
+// Copyright 2025 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build !go1.21 && (aix || darwin || dragonfly || freebsd || linux || netbsd || openbsd || solaris || zos)
+
+package unix
+
+import "syscall"
+
+func Auxv() ([][2]uintptr, error) {
+ return nil, syscall.ENOTSUP
+}
diff --git a/vendor/golang.org/x/sys/unix/ioctl_linux.go b/vendor/golang.org/x/sys/unix/ioctl_linux.go
index dbe680eab8..7ca4fa12aa 100644
--- a/vendor/golang.org/x/sys/unix/ioctl_linux.go
+++ b/vendor/golang.org/x/sys/unix/ioctl_linux.go
@@ -58,6 +58,102 @@ func IoctlGetEthtoolDrvinfo(fd int, ifname string) (*EthtoolDrvinfo, error) {
return &value, err
}
+// IoctlGetEthtoolTsInfo fetches ethtool timestamping and PHC
+// association for the network device specified by ifname.
+func IoctlGetEthtoolTsInfo(fd int, ifname string) (*EthtoolTsInfo, error) {
+ ifr, err := NewIfreq(ifname)
+ if err != nil {
+ return nil, err
+ }
+
+ value := EthtoolTsInfo{Cmd: ETHTOOL_GET_TS_INFO}
+ ifrd := ifr.withData(unsafe.Pointer(&value))
+
+ err = ioctlIfreqData(fd, SIOCETHTOOL, &ifrd)
+ return &value, err
+}
+
+// IoctlGetHwTstamp retrieves the hardware timestamping configuration
+// for the network device specified by ifname.
+func IoctlGetHwTstamp(fd int, ifname string) (*HwTstampConfig, error) {
+ ifr, err := NewIfreq(ifname)
+ if err != nil {
+ return nil, err
+ }
+
+ value := HwTstampConfig{}
+ ifrd := ifr.withData(unsafe.Pointer(&value))
+
+ err = ioctlIfreqData(fd, SIOCGHWTSTAMP, &ifrd)
+ return &value, err
+}
+
+// IoctlSetHwTstamp updates the hardware timestamping configuration for
+// the network device specified by ifname.
+func IoctlSetHwTstamp(fd int, ifname string, cfg *HwTstampConfig) error {
+ ifr, err := NewIfreq(ifname)
+ if err != nil {
+ return err
+ }
+ ifrd := ifr.withData(unsafe.Pointer(cfg))
+ return ioctlIfreqData(fd, SIOCSHWTSTAMP, &ifrd)
+}
+
+// FdToClockID derives the clock ID from the file descriptor number
+// - see clock_gettime(3), FD_TO_CLOCKID macros. The resulting ID is
+// suitable for system calls like ClockGettime.
+func FdToClockID(fd int) int32 { return int32((int(^fd) << 3) | 3) }
+
+// IoctlPtpClockGetcaps returns the description of a given PTP device.
+func IoctlPtpClockGetcaps(fd int) (*PtpClockCaps, error) {
+ var value PtpClockCaps
+ err := ioctlPtr(fd, PTP_CLOCK_GETCAPS2, unsafe.Pointer(&value))
+ return &value, err
+}
+
+// IoctlPtpSysOffsetPrecise returns a description of the clock
+// offset compared to the system clock.
+func IoctlPtpSysOffsetPrecise(fd int) (*PtpSysOffsetPrecise, error) {
+ var value PtpSysOffsetPrecise
+ err := ioctlPtr(fd, PTP_SYS_OFFSET_PRECISE2, unsafe.Pointer(&value))
+ return &value, err
+}
+
+// IoctlPtpSysOffsetExtended returns an extended description of the
+// clock offset compared to the system clock. The samples parameter
+// specifies the desired number of measurements.
+func IoctlPtpSysOffsetExtended(fd int, samples uint) (*PtpSysOffsetExtended, error) {
+ value := PtpSysOffsetExtended{Samples: uint32(samples)}
+ err := ioctlPtr(fd, PTP_SYS_OFFSET_EXTENDED2, unsafe.Pointer(&value))
+ return &value, err
+}
+
+// IoctlPtpPinGetfunc returns the configuration of the specified
+// I/O pin on given PTP device.
+func IoctlPtpPinGetfunc(fd int, index uint) (*PtpPinDesc, error) {
+ value := PtpPinDesc{Index: uint32(index)}
+ err := ioctlPtr(fd, PTP_PIN_GETFUNC2, unsafe.Pointer(&value))
+ return &value, err
+}
+
+// IoctlPtpPinSetfunc updates configuration of the specified PTP
+// I/O pin.
+func IoctlPtpPinSetfunc(fd int, pd *PtpPinDesc) error {
+ return ioctlPtr(fd, PTP_PIN_SETFUNC2, unsafe.Pointer(pd))
+}
+
+// IoctlPtpPeroutRequest configures the periodic output mode of the
+// PTP I/O pins.
+func IoctlPtpPeroutRequest(fd int, r *PtpPeroutRequest) error {
+ return ioctlPtr(fd, PTP_PEROUT_REQUEST2, unsafe.Pointer(r))
+}
+
+// IoctlPtpExttsRequest configures the external timestamping mode
+// of the PTP I/O pins.
+func IoctlPtpExttsRequest(fd int, r *PtpExttsRequest) error {
+ return ioctlPtr(fd, PTP_EXTTS_REQUEST2, unsafe.Pointer(r))
+}
+
// IoctlGetWatchdogInfo fetches information about a watchdog device from the
// Linux watchdog API. For more information, see:
// https://www.kernel.org/doc/html/latest/watchdog/watchdog-api.html.
diff --git a/vendor/golang.org/x/sys/unix/mkerrors.sh b/vendor/golang.org/x/sys/unix/mkerrors.sh
index 4ed2e488b6..6ab02b6c31 100644
--- a/vendor/golang.org/x/sys/unix/mkerrors.sh
+++ b/vendor/golang.org/x/sys/unix/mkerrors.sh
@@ -58,6 +58,7 @@ includes_Darwin='
#define _DARWIN_USE_64_BIT_INODE
#define __APPLE_USE_RFC_3542
#include