diff --git a/docs/Languages.md b/docs/Languages.md
index aa9bf81989..3e3e1d8cc8 100644
--- a/docs/Languages.md
+++ b/docs/Languages.md
@@ -171,6 +171,7 @@
 - ReasonML (`reasonml`)
 - Rego (`rego`)
 - ReScript (`rescript`)
+- RISC-V Assembly (`riscvasm`)
 - RML (`rml`)
 - Robot Framework (`robot_framework`)
 - Ruby (`ruby`)
diff --git a/lib/rouge/demos/riscvasm b/lib/rouge/demos/riscvasm
new file mode 100644
index 0000000000..26fd705726
--- /dev/null
+++ b/lib/rouge/demos/riscvasm
@@ -0,0 +1,218 @@
+# RV64IDV system
+#
+# void
+# sgemm_nn(size_t n,
+#          size_t m,
+#          size_t k,
+#          const float*a,   // m * k matrix
+#          size_t lda,
+#          const float*b,   // k * n matrix
+#          size_t ldb,
+#          float*c,         // m * n matrix
+#          size_t ldc)
+#
+#  c += a*b (alpha=1, no transpose on input matrices)
+#  matrices stored in C row-major order
+
+#define n a0
+#define m a1
+#define k a2
+#define ap a3
+#define astride a4
+#define bp a5
+#define bstride a6
+#define cp a7
+#define cstride t0
+#define kt t1
+#define nt t2
+#define bnp t3
+#define cnp t4
+#define akp t5
+#define bkp s0
+#define nvl s1
+#define ccp s2
+#define amp s3
+
+# Use args as additional temporaries
+#define ft12 fa0
+#define ft13 fa1
+#define ft14 fa2
+#define ft15 fa3
+
+# This version holds a 16*VLMAX block of C matrix in vector registers
+# in inner loop, but otherwise does not cache or TLB tiling.
+
+sgemm_nn:
+    addi sp, sp, -FRAMESIZE
+    sd s0, OFFSET(sp)
+    sd s1, OFFSET(sp)
+    sd s2, OFFSET(sp)
+
+    # Check for zero size matrices
+    beqz n, exit
+    beqz m, exit
+    beqz k, exit
+
+    # Convert elements strides to byte strides.
+    ld cstride, OFFSET(sp)   # Get arg from stack frame
+    slli astride, astride, 2
+    slli bstride, bstride, 2
+    slli cstride, cstride, 2
+
+    slti t6, m, 16
+    bnez t6, end_rows
+
+c_row_loop: # Loop across rows of C blocks
+
+    mv nt, n  # Initialize n counter for next row of C blocks
+
+    mv bnp, bp # Initialize B n-loop pointer to start
+    mv cnp, cp # Initialize C n-loop pointer
+
+c_col_loop: # Loop across one row of C blocks
+    vsetvli nvl, nt, e32, ta, ma  # 32-bit vectors, LMUL=1
+
+    mv akp, ap   # reset pointer into A to beginning
+    mv bkp, bnp # step to next column in B matrix
+
+    # Initalize current C submatrix block from memory.
+    vle32.v  v0, (cnp); add ccp, cnp, cstride;
+    vle32.v  v1, (ccp); add ccp, ccp, cstride;
+    vle32.v  v2, (ccp); add ccp, ccp, cstride;
+    vle32.v  v3, (ccp); add ccp, ccp, cstride;
+    vle32.v  v4, (ccp); add ccp, ccp, cstride;
+    vle32.v  v5, (ccp); add ccp, ccp, cstride;
+    vle32.v  v6, (ccp); add ccp, ccp, cstride;
+    vle32.v  v7, (ccp); add ccp, ccp, cstride;
+    vle32.v  v8, (ccp); add ccp, ccp, cstride;
+    vle32.v  v9, (ccp); add ccp, ccp, cstride;
+    vle32.v v10, (ccp); add ccp, ccp, cstride;
+    vle32.v v11, (ccp); add ccp, ccp, cstride;
+    vle32.v v12, (ccp); add ccp, ccp, cstride;
+    vle32.v v13, (ccp); add ccp, ccp, cstride;
+    vle32.v v14, (ccp); add ccp, ccp, cstride;
+    vle32.v v15, (ccp)
+
+
+    mv kt, k # Initialize inner loop counter
+
+    # Inner loop scheduled assuming 4-clock occupancy of vfmacc instruction and single-issue pipeline
+    # Software pipeline loads
+    flw ft0, (akp); add amp, akp, astride;
+    flw ft1, (amp); add amp, amp, astride;
+    flw ft2, (amp); add amp, amp, astride;
+    flw ft3, (amp); add amp, amp, astride;
+    # Get vector from B matrix
+    vle32.v v16, (bkp)
+
+    # Loop on inner dimension for current C block
+ k_loop:
+    vfmacc.vf v0, ft0, v16
+    add bkp, bkp, bstride
+    flw ft4, (amp)
+    add amp, amp, astride
+    vfmacc.vf v1, ft1, v16
+    addi kt, kt, -1    # Decrement k counter
+    flw ft5, (amp)
+    add amp, amp, astride
+    vfmacc.vf v2, ft2, v16
+    flw ft6, (amp)
+    add amp, amp, astride
+    flw ft7, (amp)
+    vfmacc.vf v3, ft3, v16
+    add amp, amp, astride
+    flw ft8, (amp)
+    add amp, amp, astride
+    vfmacc.vf v4, ft4, v16
+    flw ft9, (amp)
+    add amp, amp, astride
+    vfmacc.vf v5, ft5, v16
+    flw ft10, (amp)
+    add amp, amp, astride
+    vfmacc.vf v6, ft6, v16
+    flw ft11, (amp)
+    add amp, amp, astride
+    vfmacc.vf v7, ft7, v16
+    flw ft12, (amp)
+    add amp, amp, astride
+    vfmacc.vf v8, ft8, v16
+    flw ft13, (amp)
+    add amp, amp, astride
+    vfmacc.vf v9, ft9, v16
+    flw ft14, (amp)
+    add amp, amp, astride
+    vfmacc.vf v10, ft10, v16
+    flw ft15, (amp)
+    add amp, amp, astride
+    addi akp, akp, 4            # Move to next column of a
+    vfmacc.vf v11, ft11, v16
+    beqz kt, 1f                 # Don't load past end of matrix
+    flw ft0, (akp)
+    add amp, akp, astride
+1:  vfmacc.vf v12, ft12, v16
+    beqz kt, 1f
+    flw ft1, (amp)
+    add amp, amp, astride
+1:  vfmacc.vf v13, ft13, v16
+    beqz kt, 1f
+    flw ft2, (amp)
+    add amp, amp, astride
+1:  vfmacc.vf v14, ft14, v16
+    beqz kt, 1f                 # Exit out of loop
+    flw ft3, (amp)
+    add amp, amp, astride
+    vfmacc.vf v15, ft15, v16
+    vle32.v v16, (bkp)            # Get next vector from B matrix, overlap loads with jump stalls
+    j k_loop
+
+1:  vfmacc.vf v15, ft15, v16
+
+    # Save C matrix block back to memory
+    vse32.v  v0, (cnp); add ccp, cnp, cstride;
+    vse32.v  v1, (ccp); add ccp, ccp, cstride;
+    vse32.v  v2, (ccp); add ccp, ccp, cstride;
+    vse32.v  v3, (ccp); add ccp, ccp, cstride;
+    vse32.v  v4, (ccp); add ccp, ccp, cstride;
+    vse32.v  v5, (ccp); add ccp, ccp, cstride;
+    vse32.v  v6, (ccp); add ccp, ccp, cstride;
+    vse32.v  v7, (ccp); add ccp, ccp, cstride;
+    vse32.v  v8, (ccp); add ccp, ccp, cstride;
+    vse32.v  v9, (ccp); add ccp, ccp, cstride;
+    vse32.v v10, (ccp); add ccp, ccp, cstride;
+    vse32.v v11, (ccp); add ccp, ccp, cstride;
+    vse32.v v12, (ccp); add ccp, ccp, cstride;
+    vse32.v v13, (ccp); add ccp, ccp, cstride;
+    vse32.v v14, (ccp); add ccp, ccp, cstride;
+    vse32.v v15, (ccp)
+
+    # Following tail instructions should be scheduled earlier in free slots during C block save.
+    # Leaving here for clarity.
+
+    # Bump pointers for loop across blocks in one row
+    slli t6, nvl, 2
+    add cnp, cnp, t6                         # Move C block pointer over
+    add bnp, bnp, t6                         # Move B block pointer over
+    sub nt, nt, nvl                          # Decrement element count in n dimension
+    bnez nt, c_col_loop                      # Any more to do?
+
+    # Move to next set of rows
+    addi m, m, -16  # Did 16 rows above
+    slli t6, astride, 4  # Multiply astride by 16
+    add ap, ap, t6         # Move A matrix pointer down 16 rows
+    slli t6, cstride, 4  # Multiply cstride by 16
+    add cp, cp, t6         # Move C matrix pointer down 16 rows
+
+    slti t6, m, 16
+    beqz t6, c_row_loop
+
+    # Handle end of matrix with fewer than 16 rows.
+    # Can use smaller versions of above decreasing in powers-of-2 depending on code-size concerns.
+end_rows:
+    # Not done.
+
+exit:
+    ld s0, OFFSET(sp)
+    ld s1, OFFSET(sp)
+    ld s2, OFFSET(sp)
+    addi sp, sp, FRAMESIZE
+    ret
diff --git a/lib/rouge/lexers/riscvasm.rb b/lib/rouge/lexers/riscvasm.rb
new file mode 100644
index 0000000000..abf8f694d4
--- /dev/null
+++ b/lib/rouge/lexers/riscvasm.rb
@@ -0,0 +1,200 @@
+# -*- coding: utf-8 -*- #
+# frozen_string_literal: true
+
+# Note that like most assembly languages there's no proper grammar for RISC-V assembly.
+# It's pretty much "what do GCC and Clang accept". I recommend not trying to read
+# their source code because it's a complete mess.
+
+module Rouge
+  module Lexers
+    class RiscvAsm < RegexLexer
+      title "RiscvAsm"
+      desc "RISC-V assembly syntax"
+      tag 'riscvasm'
+      filenames '*.s', '*.S'
+
+      # C preprocessor directives. These are only processed for .S files - not .s - however
+      # the parsing is mostly the same in both cases.
+      def self.preproc_directive
+        @preproc_directive = Set.new %w(
+          define elif else endif error if ifdef ifndef include line pragma undef warning
+        )
+      end
+
+      # Standard register name, including ABI names.
+      def self.register
+        @register = Set.new %w(
+          x0 x1 x2 x3 x4 x5 x6 x7 x8 x9 x10 x11 x12 x13 x14 x15 x16 x17 x18 x19 x20 x21 x22 x23 x24 x25 x26 x27 x28 x29 x30 x31
+          f0 f1 f2 f3 f4 f5 f6 f7 f8 f9 f10 f11 f12 f13 f14 f15 f16 f17 f18 f19 f20 f21 f22 f23 f24 f25 f26 f27 f28 f29 f30 f31
+          v0 v1 v2 v3 v4 v5 v6 v7 v8 v9 v10 v11 v12 v13 v14 v15 v16 v17 v18 v19 v20 v21 v22 v23 v24 v25 v26 v27 v28 v29 v30 v31
+          zero ra sp gp tp t0 t1 t2 s0 fp s1 a0 a1 a2 a3 a4 a5 a6 a7 s2 s3 s4 s5 s6 s7 s8 s9 s10 s11 t3 t4 t5 t6
+          ft0 ft1 ft2 ft3 ft4 ft5 ft6 ft7 fs0 fs1 fa0 fa1 fa2 fa3 fa4 fa5 fa6 fa7 fs2 fs3 fs4 fs5 fs6 fs7 fs8 fs9 fs10 fs11 ft8 ft9 ft10 ft11
+        )
+      end
+
+      # These keywords are used for some vector instructions (vsetvli etc.).
+      def self.other_keyword
+        @other_keyword = Set.new %w(
+          e8 e16 e32 e64 mf8 mf4 mf2 m1 m2 m4 m8 ta tu ma mu v0.t
+        )
+      end
+
+      # For %pcrel_hi(...) relocations etc.
+      def self.relocation_function
+        @relocation_function = Set.new %w(
+          hi lo
+          pcrel_hi pcrel_lo
+          tprel_hi tprel_lo
+          tprel_add
+          tls_ie_pcrel_hi
+          tls_gd_pcrel_hi
+          got_pcrel_hi
+        )
+      end
+
+      state :comments_and_whitespace do
+        # Don't eat newlines because those are significant.
+        rule %r/[ \t]+/, Text::Whitespace
+        rule %r((//|#).*), Comment::Single
+        rule %r(/\*.*?\*/)m, Comment::Multiline
+      end
+
+      # This is only needed to deal with preprocessor directives.
+      state :in_single_line_comment do
+        rule %r/.*/, Comment::Single, :pop!
+      end
+
+      state :literals do
+        # 1f, 2b forward/backward label references.
+        rule %r/[0-9]+[fb]\b/, Name::Label
+
+        # Octal
+        rule %r/\-?0[0-7]+\b/, Num::Oct
+        # Binary
+        rule %r/\-?0b[01]+\b/, Num::Integer
+        # Hex
+        rule %r/\-?0x[0-9a-fA-F]+\b/, Num::Hex
+        # Decimal
+        rule %r/\-?[0-9]+\b/, Num::Integer
+
+        # Float. RISC-V supposedly supports C float literals but I doubt
+        # it really supports all the hex variants etc.
+        # This is not quite accurate since you can have e.g. `.3`.
+        rule %r/\-?[0-9]+\.[0-9]*([eE]-?[0-9]+)?[fFlL]?\b/, Num::Float
+
+        # Strings.
+        rule %r/"(\\\\|\\"|[^"])*"/, Str::Double
+        rule %r/'(\\\\|\\'|[^'])*'/, Str::Single
+      end
+
+      state :relocations do
+        rule %r/%(\w+)\b/ do |m|
+          if self.class.relocation_function.include?(m[1])
+            token Name::Builtin
+          else
+            token Text
+          end
+        end
+      end
+
+      # Registers, keywords, variables and operators.
+      state :words_and_operators do
+        # Register names, keywords
+        rule %r/([\w.]+)\b/ do |m|
+          if self.class.register.include?(m[1])
+            token Name::Constant
+          elsif self.class.other_keyword.include?(m[1])
+            token Name::Constant
+          else
+            token Name::Variable
+          end
+        end
+
+        # Variables.
+        rule %r/\\?[\$\w]+/, Name::Variable
+
+        # Operators
+        rule %r/[-~*\/%<>|&\^!+(),]/, Operator
+      end
+
+      state :root do
+        # Preprocessor directive. Awkwardly these are the same as single line comments.
+        # It seems like GCC will silently ignore unknown directives so that comments
+        # work - even for `.s` files. Yes that means if you have a typo like
+        #
+        #     #defien DISABLE_DEV_BACKDOOR 1
+        #
+        # Then it will silently ignore it!
+        #
+        # [ \t] is used here to avoid matching `#\nfoo`.
+        rule %r/^\s*#[ \t]*(\w+)\b/ do |m|
+          if self.class.preproc_directive.include?(m[1])
+            token Comment::Preproc
+            push :preprocessor_directive
+          else
+            token Comment::Single
+            # Match the rest of the line as a comment too.
+            push :in_single_line_comment
+          end
+        end
+
+        mixin :comments_and_whitespace
+
+        # End of line.
+        rule %r/\n/, Text::Whitespace
+
+        # Assembly directive.
+        rule %r/\.\w+/, Name::Attribute, :directive
+
+        # Label.
+        rule %r/((\w+)|(\d+)):/, Name::Label
+
+        # Instruction or maybe macro call.
+        rule %r/[\w\.]+\b/, Name::Builtin, :args
+      end
+
+      state :preprocessor_directive do
+        mixin :comments_and_whitespace
+
+        # Escaped newline. This is one case where you can't parse
+        # .S and .s the same - if you try to escape a newline in a
+        # preprocessor directive in .S it will work but in .s it
+        # will be ignored. Here we assume .S.
+        rule %r/\\\n/, Text
+
+        rule %r/\n/, Text, :pop!
+
+        mixin :literals
+        mixin :relocations
+        mixin :words_and_operators
+
+        rule %r/./, Text
+      end
+
+      state :directive do
+        mixin :comments_and_whitespace
+
+        rule %r/\n/, Text, :pop!
+
+        mixin :literals
+        mixin :relocations
+        mixin :words_and_operators
+
+        rule %r/./, Text
+      end
+
+      state :args do
+        mixin :comments_and_whitespace
+
+        # End of instruction.
+        rule %r/[;\n]/, Text::Whitespace, :pop!
+
+        mixin :literals
+        mixin :relocations
+        mixin :words_and_operators
+
+        rule %r/./, Text
+      end
+    end
+  end
+end
diff --git a/spec/lexers/riscvasm_spec.rb b/spec/lexers/riscvasm_spec.rb
new file mode 100644
index 0000000000..af89d0a3df
--- /dev/null
+++ b/spec/lexers/riscvasm_spec.rb
@@ -0,0 +1,14 @@
+# -*- coding: utf-8 -*- #
+# frozen_string_literal: true
+
+describe Rouge::Lexers::RiscvAsm do
+  let(:subject) { Rouge::Lexers::RiscvAsm.new }
+
+  describe 'guessing' do
+    include Support::Guessing
+
+    it 'guesses by filename' do
+      assert_guess :filename => 'foo.s'
+    end
+  end
+end