riscv-software-src
diff --git a/‎arch/ext/Zcd.yaml‎
Lines changed: 43 additions & 0 deletions b/‎arch/ext/Zcd.yaml‎
Lines changed: 43 additions & 0 deletions
diff --git a/‎arch/inst/Zbkb/brev8.yaml‎
Lines changed: 20 additions & 4 deletions b/‎arch/inst/Zbkb/brev8.yaml‎
Lines changed: 20 additions & 4 deletions
diff --git a/‎arch/inst/Zbkb/unzip.yaml‎
Lines changed: 20 additions & 4 deletions b/‎arch/inst/Zbkb/unzip.yaml‎
Lines changed: 20 additions & 4 deletions
diff --git a/‎arch/inst/Zbkb/zip.yaml‎
Lines changed: 20 additions & 4 deletions b/‎arch/inst/Zbkb/zip.yaml‎
Lines changed: 20 additions & 4 deletions
diff --git a/‎arch/inst/Zbkx/xperm4.yaml‎
Lines changed: 31 additions & 4 deletions b/‎arch/inst/Zbkx/xperm4.yaml‎
Lines changed: 31 additions & 4 deletions
diff --git a/‎arch/inst/Zbkx/xperm8.yaml‎
Lines changed: 31 additions & 4 deletions b/‎arch/inst/Zbkx/xperm8.yaml‎
Lines changed: 31 additions & 4 deletions
diff --git a/‎arch/inst/Zcmp/cm.mva01s.yaml‎
Lines changed: 33 additions & 0 deletions b/‎arch/inst/Zcmp/cm.mva01s.yaml‎
Lines changed: 33 additions & 0 deletions
diff --git a/‎arch/inst/Zcmp/cm.mvsa01.yaml‎
Lines changed: 35 additions & 0 deletions b/‎arch/inst/Zcmp/cm.mvsa01.yaml‎
Lines changed: 35 additions & 0 deletions
diff --git a/‎arch/inst/Zcmp/cm.pop.yaml‎
Lines changed: 83 additions & 0 deletions b/‎arch/inst/Zcmp/cm.pop.yaml‎
Lines changed: 83 additions & 0 deletions
@@ -0,0 +1,43 @@
+# yaml-language-server: $schema=../../schemas/ext_schema.json
+
+$schema: "ext_schema.json#"
+kind: extension
+name: Zcd
+long_name: Compressed instructions for double precision floating point
+description: |
+  Zcd is the existing set of compressed double precision floating point loads and stores:
+  `c.fld`, `c.fldsp`, `c.fsd`, `c.fsdsp`.
+
+type: unprivileged
+company:
+  name: RISC-V International
+  url: https://riscv.org
+versions:
+  - version: "1.0.0"
+    state: ratified
+    ratification_date: 2023-04
+    repositories:
+      - url: https://github.com/riscv/riscv-code-size-reduction
+        branch: main
+    contributors:
+      - name: Tariq Kurd
+      - name: Ibrahim Abu Kharmeh
+      - name: Torbjørn Viem Ness
+      - name: Matteo Perotti
+      - name: Nidal Faour
+      - name: Bill Traynor
+      - name: Rafael Sene
+      - name: Xinlong Wu
+      - name: sinan
+      - name: Jeremy Bennett
+      - name: Heda Chen
+      - name: Alasdair Armstrong
+      - name: Graeme Smecher
+      - name: Nicolas Brunie
+      - name: Jiawei
+    requires:
+      allOf:
+        - anyOf:
+          - { name: Zca, version: "= 1.0.0" }
+          - { name: C, version: "= 1.0.0" }
+        - { name: D, version: "~> 2.2.0" }
@@ -3,11 +3,10 @@
 $schema: inst_schema.json#
 kind: instruction
 name: brev8
-long_name: No synopsis available.
+long_name: Reverse bits in bytes
 description: |
-  No description available.
-definedBy:
-  anyOf: [B, Zbkb, Zk, Zkn, Zks]
+  This instruction reverses the order of the bits in every byte of a register.
+definedBy: Zbkb
 assembly: xd, xs1
 encoding:
   match: 011010000111-----101-----0010011
@@ -23,3 +22,20 @@ access:
   vu: always
 data_independent_timing: false
 operation(): |
+  XReg input = X[rs1];
+  XReg output = 0;
+
+  for(U32 i=0; i<(xlen()-8); i = i+8) {
+    for(U32 j=0; j<8; j = j+1) {
+      output[(i*8)+(7-j)] = input[(i*8)+j];
+    }
+  }
+
+  X[rd] = output;
+
+sail(): |
+  result : xlenbits = EXTZ(0b0);
+  foreach (i from 0 to sizeof(xlen) by 8) {
+    result[i+7..i] = reverse_bits_in_byte(X(rs1)[i+7..i]);
+  };
+  X(rd) = result;
@@ -3,11 +3,12 @@
 $schema: inst_schema.json#
 kind: instruction
 name: unzip
-long_name: No synopsis available.
+long_name: Bit deinterleave
 description: |
-  No description available.
-definedBy:
-  anyOf: [B, Zbkb, Zk, Zkn, Zks]
+  This instruction gathers bits from the high and low halves of the source word into odd/even bit
+  positions in the destination word. It is the inverse of the zip instruction. This instruction is
+  available only on RV32.
+definedBy: Zbkb
 assembly: xd, xs1
 encoding:
   match: 000010001111-----101-----0010011
@@ -24,3 +25,18 @@ access:
 data_independent_timing: false
 base: 32
 operation(): |
+  XReg input = X[rs1];
+  XReg output = 0;
+
+  for(U32 i=0; i<(xlen()/2-1); i = i+1) {
+    output[i] = input[2*i];
+    output[i+xlen()/2] = input[2*i+1];
+  }
+
+  X[rd] = output;
+
+sail(): |
+  foreach (i from 0 to xlen/2-1) {
+    X(rd)[i] = X(rs1)[2*i];
+    X(rd)[i+xlen/2] = X(rs1)[2*i+1];
+  }
@@ -3,11 +3,12 @@
 $schema: inst_schema.json#
 kind: instruction
 name: zip
-long_name: No synopsis available.
+long_name: Bit interleave
 description: |
-  No description available.
-definedBy:
-  anyOf: [B, Zbkb, Zk, Zkn, Zks]
+  This instruction scatters all of the odd and even bits of a source word into the high and low halves
+  of a destination word. It is the inverse of the unzip instruction. This instruction is available only on
+  RV32.
+definedBy: Zbkb
 assembly: xd, xs1
 encoding:
   match: 000010001111-----001-----0010011
@@ -24,3 +25,18 @@ access:
 data_independent_timing: false
 base: 32
 operation(): |
+  XReg input = X[rs1];
+  XReg output = 0;
+
+  for(U32 i=0; i<(xlen()/2-1); i = i+1){
+    output[2*i] = input[i];
+    output[2*i+1] = input[i+xlen()/2];
+  }
+
+  X[rd] = output;
+
+sail(): |
+  foreach (i from 0 to xlen/2-1) {
+    X(rd)[2*i] = X(rs1)[i];
+    X(rd)[2*i+1] = X(rs1)[i+xlen/2];
+  }
@@ -3,11 +3,12 @@
 $schema: inst_schema.json#
 kind: instruction
 name: xperm4
-long_name: No synopsis available.
+long_name: Crossbar permutation (nibbles)
 description: |
-  No description available.
-definedBy:
-  anyOf: [B, Zbkx, Zk, Zkn, Zks]
+  The xperm4 instruction operates on nibbles. The rs1 register contains a vector of XLEN/4 4-bit
+  elements. The rs2 register contains a vector of XLEN/4 4-bit indexes. The result is each element in
+  rs2 replaced by the indexed element in rs1, or zero if the index into rs2 is out of bounds.
+definedBy: Zbkx
 assembly: xd, xs1, xs2
 encoding:
   match: 0010100----------010-----0110011
@@ -25,3 +26,29 @@ access:
   vu: always
 data_independent_timing: false
 operation(): |
+  XReg input1 = X[rs1];
+  XReg input2 = X[rs2];
+  XReg output = 0;
+
+  for(U32 i=0; i<(xlen()-4); i = i+4) {
+    XReg index = input2[i+3:i];
+    if(4*index < xlen()) {
+      output[i+3:i] = input1[4*index+3:4*index];
+    }
+  }
+
+  X[rd] = output;
+
+sail(): |
+  val xperm4_lookup : (bits(4), xlenbits) -> bits(4)
+  function xperm4_lookup (idx, lut) = {
+    (lut >> (idx @ 0b00))[3..0]
+  }
+  function clause execute ( XPERM_4 (rs2,rs1,rd)) = {
+    result : xlenbits = EXTZ(0b0);
+    foreach(i from 0 to xlen by 4) {
+      result[i+3..i] = xperm4_lookup(X(rs2)[i+3..i], X(rs1));
+    };
+    X(rd) = result;
+    RETIRE_SUCCESS
+  }
@@ -3,11 +3,12 @@
 $schema: inst_schema.json#
 kind: instruction
 name: xperm8
-long_name: No synopsis available.
+long_name: Crossbar permutation (bytes)
 description: |
-  No description available.
-definedBy:
-  anyOf: [B, Zbkx, Zk, Zkn, Zks]
+  The xperm8 instruction operates on bytes. The rs1 register contains a vector of XLEN/8 8-bit
+  elements. The rs2 register contains a vector of XLEN/8 8-bit indexes. The result is each element in
+  rs2 replaced by the indexed element in rs1, or zero if the index into rs2 is out of bounds.
+definedBy: Zbkx
 assembly: xd, xs1, xs2
 encoding:
   match: 0010100----------100-----0110011
@@ -25,3 +26,29 @@ access:
   vu: always
 data_independent_timing: false
 operation(): |
+  XReg input1 = X[rs1];
+  XReg input2 = X[rs2];
+  XReg output = 0;
+
+  for(U32 i=0; i<(xlen()-8); i = i+8) {
+    XReg index = input2[i+7:i];
+    if(8*index < xlen()) {
+      output[i+7:i] = input1[8*index+7:8*index];
+    }
+  }
+
+  X[rd] = output;
+
+sail(): |
+  val xperm8_lookup : (bits(8), xlenbits) -> bits(8)
+  function xperm8_lookup (idx, lut) = {
+    (lut >> (idx @ 0b00))[7..0]
+  }
+  function clause execute ( XPERM_8 (rs2,rs1,rd)) = {
+    result : xlenbits = EXTZ(0b0);
+    foreach(i from 0 to xlen by 8) {
+      result[i+7..i] = xperm8_lookup(X(rs2)[i+7..i], X(rs1));
+    };
+    X(rd) = result;
+    RETIRE_SUCCESS
+  }
@@ -0,0 +1,33 @@
+# yaml-language-server: $schema=../../../schemas/inst_schema.json
+
+$schema: "inst_schema.json#"
+kind: instruction
+name: cm.mva01s
+long_name: Move two s0-s7 registers into a0-a1
+description: |
+  This instruction moves r1s' into a0 and r2s' into a1. The execution is atomic, so it is not possible to observe state where only one of a0 or a1 have been updated.
+  The encoding uses sreg number specifiers instead of xreg number specifiers to save encoding space. The mapping between them is specified in the pseudo-code below.
+definedBy:
+  anyOf:
+    - Zcmp
+assembly: r1s, r2s
+encoding:
+  match: 101011---11---10
+  variables:
+    - name: r1s
+      location: 9-7
+    - name: r2s
+      location: 4-2
+access:
+  s: always
+  u: always
+  vs: always
+  vu: always
+operation(): |
+  if (implemented?(ExtensionName::Zcmp) && (CSR[misa].C == 1'b0)) {
+    raise(ExceptionCode::IllegalInstruction, mode(), $encoding);
+  }
+  XReg xreg1 = (r1s[2:1]>0) ? {1,0,r1s[2:0]} : {0,1,r1s[2:0]};
+  XReg xreg2 = (r2s[2:1]>0) ? {1,0,r2s[2:0]} : {0,1,r2s[2:0]};
+  X[10] = X[xreg1];
+  X[11] = X[xreg2];
@@ -0,0 +1,35 @@
+# yaml-language-server: $schema=../../../schemas/inst_schema.json
+
+$schema: "inst_schema.json#"
+kind: instruction
+name: cm.mvsa01
+long_name: Move a0-a1 into two registers of s0-s7
+description: |
+  This instruction moves a0 into r1s' and a1 into r2s'. r1s' and r2s' must be different.
+  The execution is atomic, so it is not possible to observe state where only one of r1s' or r2s' has been updated.
+  The encoding uses sreg number specifiers instead of xreg number specifiers to save encoding space.
+  The mapping between them is specified in the pseudo-code below.
+definedBy:
+  anyOf:
+    - Zcmp
+assembly: r1s, r2s
+encoding:
+  match: 101011---01---10
+  variables:
+    - name: r1s
+      location: 9-7
+    - name: r2s
+      location: 4-2
+access:
+  s: always
+  u: always
+  vs: always
+  vu: always
+operation(): |
+  if (implemented?(ExtensionName::Zcmp) && (CSR[misa].C == 1'b0)) {
+    raise(ExceptionCode::IllegalInstruction, mode(), $encoding);
+  }
+  XReg xreg1 = (r1s[2:1]>0) ? {1,0,r1s[2:0]} : {0,1,r1s[2:0]};
+  XReg xreg2 = (r2s[2:1]>0) ? {1,0,r2s[2:0]} : {0,1,r2s[2:0]};
+  X[xreg1] = X[10];
+  X[xreg2] = X[11];
@@ -0,0 +1,83 @@
+# yaml-language-server: $schema=../../../schemas/inst_schema.json
+
+$schema: "inst_schema.json#"
+kind: instruction
+name: cm.pop
+long_name: Destroy function call stack frame
+description: |
+  Destroy stack frame: load `ra` and 0 to 12 saved registers from the stack frame, deallocate the stack frame.
+  This instruction pops (loads) the registers in `reg_list` from stack memory, and then adjusts the stack pointer by `stack_adj`.
+
+  Restrictions on stack_adj:
+
+  * it must be enough to store all of the listed registers
+  * it must be a multiple of 16 (bytes):
+  ** for RV32 the allowed values are: 16, 32, 48, 64, 80, 96, 112
+  ** for RV64 the allowed values are: 16, 32, 48, 64, 80, 96, 112, 128, 144, 160
+definedBy:
+  anyOf:
+    - Zcmp
+assembly: reg_list, stack_adj
+encoding:
+  match: 10111010------10
+  variables:
+    - name: rlist
+      location: 7-4
+      not: [0, 1, 2, 3]
+    - name: spimm
+      location: 3-2
+      left_shift: 4
+access:
+  s: always
+  u: always
+  vs: always
+  vu: always
+operation(): |
+  if (implemented?(ExtensionName::Zcmp) && (CSR[misa].C == 1'b0)) {
+    raise(ExceptionCode::IllegalInstruction, mode(), $encoding);
+  }
+
+  XReg size = xlen();
+  XReg nreg = (rlist == 15) ? 13 : (rlist - 3);
+  XReg stack_aligned_adj = (nreg * 4 + 15) & ~0xF;
+  XReg virtual_address_sp = X[2];
+  XReg virtual_address_new_sp = virtual_address_sp + stack_aligned_adj + spimm;
+  XReg virtual_address_base = virtual_address_new_sp - (nreg * size);
+
+  X[ 1] = read_memory_xlen(virtual_address_base +  0*size, $encoding);
+  if (nreg > 1) {
+    X[ 8] = read_memory_xlen(virtual_address_base +  1*size, $encoding);
+  }
+  if (nreg > 2) {
+    X[ 9] = read_memory_xlen(virtual_address_base +  2*size, $encoding);
+  }
+  if (nreg > 3) {
+    X[18] = read_memory_xlen(virtual_address_base +  3*size, $encoding);
+  }
+  if (nreg > 4) {
+    X[19] = read_memory_xlen(virtual_address_base +  4*size, $encoding);
+  }
+  if (nreg > 5) {
+    X[20] = read_memory_xlen(virtual_address_base +  5*size, $encoding);
+  }
+  if (nreg > 6) {
+    X[21] = read_memory_xlen(virtual_address_base +  6*size, $encoding);
+  }
+  if (nreg > 7) {
+    X[22] = read_memory_xlen(virtual_address_base +  7*size, $encoding);
+  }
+  if (nreg > 8) {
+    X[23] = read_memory_xlen(virtual_address_base +  8*size, $encoding);
+  }
+  if (nreg > 9) {
+    X[24] = read_memory_xlen(virtual_address_base +  9*size, $encoding);
+  }
+  if (nreg > 10) {
+    X[25] = read_memory_xlen(virtual_address_base + 10*size, $encoding);
+  }
+  if (nreg > 11) {
+    X[26] = read_memory_xlen(virtual_address_base + 11*size, $encoding);
+    X[27] = read_memory_xlen(virtual_address_base + 12*size, $encoding);
+  }
+
+  X[2] = virtual_address_new_sp;