diff --git a/src/coreclr/jit/codegen.h b/src/coreclr/jit/codegen.h
index 0dc138339952f0..73896cdcebab05 100644
--- a/src/coreclr/jit/codegen.h
+++ b/src/coreclr/jit/codegen.h
@@ -814,7 +814,7 @@ class CodeGen final : public CodeGenInterface
             CHECK_NONE,
             CHECK_SMALL_INT_RANGE,
             CHECK_POSITIVE,
-#ifdef TARGET_64BIT
+#if defined(TARGET_64BIT) || defined(TARGET_WASM)
             CHECK_UINT_RANGE,
             CHECK_POSITIVE_INT_RANGE,
             CHECK_INT_RANGE,
@@ -826,13 +826,13 @@ class CodeGen final : public CodeGenInterface
             COPY,
             ZERO_EXTEND_SMALL_INT,
             SIGN_EXTEND_SMALL_INT,
-#ifdef TARGET_64BIT
+#if defined(TARGET_64BIT) || defined(TARGET_WASM)
             ZERO_EXTEND_INT,
             SIGN_EXTEND_INT,
 #endif
             LOAD_ZERO_EXTEND_SMALL_INT,
             LOAD_SIGN_EXTEND_SMALL_INT,
-#ifdef TARGET_64BIT
+#if defined(TARGET_64BIT) || defined(TARGET_WASM)
             LOAD_ZERO_EXTEND_INT,
             LOAD_SIGN_EXTEND_INT,
 #endif
diff --git a/src/coreclr/jit/codegenlinear.cpp b/src/coreclr/jit/codegenlinear.cpp
index 17e47b08fee49d..110d0cf0161ae0 100644
--- a/src/coreclr/jit/codegenlinear.cpp
+++ b/src/coreclr/jit/codegenlinear.cpp
@@ -2309,6 +2309,7 @@ void CodeGen::genTransferRegGCState(regNumber dst, regNumber src)
         gcInfo.gcMarkRegSetNpt(dstMask);
     }
 }
+#endif
 
 //------------------------------------------------------------------------
 // genCodeForCast: Generates the code for GT_CAST.
@@ -2337,12 +2338,12 @@ void CodeGen::genCodeForCast(GenTreeOp* tree)
         // Casts int32/uint32/int64/uint64 --> float/double
         genIntToFloatCast(tree);
     }
-#ifndef TARGET_64BIT
+#if !defined(TARGET_64BIT) && !defined(TARGET_WASM)
     else if (varTypeIsLong(tree->gtOp1))
     {
         genLongToIntCast(tree);
     }
-#endif // !TARGET_64BIT
+#endif // !TARGET_64BIT && !TARGET_WASM
     else
     {
         // Casts int <--> int
@@ -2366,8 +2367,13 @@ CodeGen::GenIntCastDesc::GenIntCastDesc(GenTreeCast* cast)
     const bool      castIsLoad   = !src->isUsedFromReg();
 
     assert(castIsLoad == src->isUsedFromMemory());
+#ifndef TARGET_WASM
     assert((srcSize == 4) || (srcSize == genTypeSize(TYP_I_IMPL)));
     assert((dstSize == 4) || (dstSize == genTypeSize(TYP_I_IMPL)));
+#else
+    assert((srcSize == 4) || (srcSize == 8));
+    assert((dstSize == 4) || (dstSize == 8));
+#endif
 
     assert(dstSize == genTypeSize(genActualType(castType)));
 
@@ -2395,7 +2401,7 @@ CodeGen::GenIntCastDesc::GenIntCastDesc(GenTreeCast* cast)
             m_extendSrcSize = castSize;
         }
     }
-#ifdef TARGET_64BIT
+#if defined(TARGET_64BIT) || defined(TARGET_WASM)
     // castType cannot be (U)LONG on 32 bit targets, such casts should have been decomposed.
     // srcType cannot be a small int type since it's the "actual type" of the cast operand.
     // This means that widening casts do not occur on 32 bit targets.
@@ -2480,6 +2486,7 @@ CodeGen::GenIntCastDesc::GenIntCastDesc(GenTreeCast* cast)
         m_extendSrcSize = srcSize;
     }
 
+#ifndef TARGET_WASM
     if (castIsLoad)
     {
         const var_types srcLoadType = src->TypeGet();
@@ -2521,8 +2528,10 @@ CodeGen::GenIntCastDesc::GenIntCastDesc(GenTreeCast* cast)
                 unreached();
         }
     }
+#endif // !TARGET_WASM
 }
 
+#ifndef TARGET_WASM
 #if !defined(TARGET_64BIT)
 //------------------------------------------------------------------------
 // genStoreLongLclVar: Generate code to store a non-enregistered long lclVar
diff --git a/src/coreclr/jit/codegenwasm.cpp b/src/coreclr/jit/codegenwasm.cpp
index 176375f2d9c1a4..072aef90c8be56 100644
--- a/src/coreclr/jit/codegenwasm.cpp
+++ b/src/coreclr/jit/codegenwasm.cpp
@@ -348,6 +348,10 @@ void CodeGen::genCodeForTreeNode(GenTree* treeNode)
             genCodeForConstant(treeNode);
             break;
 
+        case GT_CAST:
+            genCodeForCast(treeNode->AsOp());
+            break;
+
         case GT_NEG:
         case GT_NOT:
             genCodeForNegNot(treeNode->AsOp());
@@ -447,22 +451,234 @@ static constexpr uint32_t PackOperAndType(genTreeOps oper, var_types type)
     {
         type = TYP_I_IMPL;
     }
-    static_assert((ssize_t)GT_COUNT > (ssize_t)TYP_COUNT);
-    return ((uint32_t)oper << (ConstLog2<GT_COUNT>::value + 1)) | ((uint32_t)type);
+    const int shift1 = ConstLog2<TYP_COUNT>::value + 1;
+    return ((uint32_t)oper << shift1) | ((uint32_t)type);
 }
 
 //------------------------------------------------------------------------
-// PackOperAndType: Pack a GenTreeOp* into a uint32_t
+// PackOperAndType: Pack a genTreeOps and two var_types into a uint32_t
+//
+// Arguments:
+//    oper - a genTreeOps to pack
+//    toType - a var_types to pack
+//    fromType - a var_types to pack
+//
+// Return Value:
+//    oper and the types packed into an integer that can be used as a switch value/case
 //
+static constexpr uint32_t PackOperAndType(genTreeOps oper, var_types toType, var_types fromType)
+{
+    if (fromType == TYP_BYREF)
+    {
+        fromType = TYP_I_IMPL;
+    }
+    if (toType == TYP_BYREF)
+    {
+        toType = TYP_I_IMPL;
+    }
+    const int shift1 = ConstLog2<TYP_COUNT>::value + 1;
+    const int shift2 = shift1 + ConstLog2<GT_COUNT>::value + 1;
+    return ((uint32_t)oper << shift1) | ((uint32_t)fromType) | ((uint32_t)toType << shift2);
+}
+
+// ------------------------------------------------------------------------
+// PackTypes: Pack two var_types together into a uint32_t
+
 // Arguments:
-//    treeNode - a GenTreeOp to extract oper and type from
+//    toType - a var_types to pack
+//    fromType - a var_types to pack
 //
 // Return Value:
-//    the node's oper and type packed into an integer that can be used as a switch value
+//    The two types packed together into an integer that can be used as a switch/value,
+//    the primary use case being the handling of operations with two-type variants such
+//    as casts.
+//
+static constexpr uint32_t PackTypes(var_types toType, var_types fromType)
+{
+    if (toType == TYP_BYREF)
+    {
+        toType = TYP_I_IMPL;
+    }
+    if (fromType == TYP_BYREF)
+    {
+        fromType = TYP_I_IMPL;
+    }
+    const int shift1 = ConstLog2<TYP_COUNT>::value + 1;
+    return ((uint32_t)toType) | ((uint32_t)fromType << shift1);
+}
+
+//------------------------------------------------------------------------
+// genIntToIntCast: Generate code for an integer to integer cast
+//
+// Arguments:
+//    cast - The GT_CAST node for the integer cast operation
+//
+// Notes:
+//    Handles casts to and from small int, int, and long types
+//    including proper sign extension and truncation as needed.
+//
+void CodeGen::genIntToIntCast(GenTreeCast* cast)
+{
+    GenIntCastDesc desc(cast);
+    var_types      toType     = genActualType(cast->CastToType());
+    var_types      fromType   = genActualType(cast->CastOp());
+    int            extendSize = desc.ExtendSrcSize();
+    instruction    ins        = INS_none;
+    assert(fromType == TYP_INT || fromType == TYP_LONG);
+
+    genConsumeOperands(cast);
+
+    switch (desc.ExtendKind())
+    {
+        case GenIntCastDesc::COPY:
+        {
+            if (toType == TYP_INT && fromType == TYP_LONG)
+            {
+                ins = INS_i32_wrap_i64;
+            }
+            else
+            {
+                assert(toType == fromType);
+                ins = INS_none;
+            }
+            break;
+        }
+        case GenIntCastDesc::ZERO_EXTEND_SMALL_INT:
+        {
+            int andAmount = extendSize == 1 ? 255 : 65535;
+            if (fromType == TYP_LONG)
+            {
+                GetEmitter()->emitIns(INS_i32_wrap_i64);
+            }
+            GetEmitter()->emitIns_I(INS_i32_const, EA_4BYTE, andAmount);
+            GetEmitter()->emitIns(INS_i32_and);
+            ins = (toType == TYP_LONG) ? INS_i64_extend_u_i32 : INS_none;
+            break;
+        }
+        case GenIntCastDesc::SIGN_EXTEND_SMALL_INT:
+        {
+            if (fromType == TYP_LONG)
+            {
+                GetEmitter()->emitIns(INS_i32_wrap_i64);
+            }
+            ins = (extendSize == 1) ? INS_i32_extend8_s : INS_i32_extend16_s;
+
+            break;
+        }
+        case GenIntCastDesc::ExtendKind::ZERO_EXTEND_INT:
+        {
+            ins = INS_i64_extend_u_i32;
+            break;
+        }
+        case GenIntCastDesc::ExtendKind::SIGN_EXTEND_INT:
+        {
+            ins = INS_i64_extend_s_i32;
+            break;
+        }
+        default:
+            unreached();
+    }
+
+    if (ins != INS_none)
+    {
+        GetEmitter()->emitIns(ins);
+    }
+    genProduceReg(cast);
+}
+
+//------------------------------------------------------------------------
+// genFloatToIntCast: Generate code for a floating point to integer cast
+//
+// Arguments:
+//    tree - The GT_CAST node for the float-to-int cast operation
+//
+// Notes:
+//    Handles casts from TYP_FLOAT/TYP_DOUBLE to TYP_INT/TYP_LONG.
+//    Uses saturating truncation instructions (trunc_sat) which clamp
+//    out-of-range values rather than trapping.
+//
+void CodeGen::genFloatToIntCast(GenTree* tree)
+{
+    var_types   toType     = tree->TypeGet();
+    var_types   fromType   = tree->AsCast()->CastOp()->TypeGet();
+    bool        isUnsigned = varTypeIsUnsigned(tree->AsCast()->CastToType());
+    instruction ins        = INS_none;
+    assert(varTypeIsFloating(fromType) && (toType == TYP_INT || toType == TYP_LONG));
+
+    genConsumeOperands(tree->AsCast());
+
+    switch (PackTypes(fromType, toType))
+    {
+        case PackTypes(TYP_FLOAT, TYP_INT):
+            ins = isUnsigned ? INS_i32_trunc_sat_f32_u : INS_i32_trunc_sat_f32_s;
+            break;
+        case PackTypes(TYP_DOUBLE, TYP_INT):
+            ins = isUnsigned ? INS_i32_trunc_sat_f64_u : INS_i32_trunc_sat_f64_s;
+            break;
+        case PackTypes(TYP_FLOAT, TYP_LONG):
+            ins = isUnsigned ? INS_i64_trunc_sat_f32_u : INS_i64_trunc_sat_f32_s;
+            break;
+        case PackTypes(TYP_DOUBLE, TYP_LONG):
+            ins = isUnsigned ? INS_i64_trunc_sat_f64_u : INS_i64_trunc_sat_f64_s;
+            break;
+        default:
+            unreached();
+    }
+
+    GetEmitter()->emitIns(ins);
+    genProduceReg(tree);
+}
+
+//------------------------------------------------------------------------
+// genIntToFloatCast: Generate code for an integer to floating point cast
+//
+// Arguments:
+//    tree - The GT_CAST node for the int-to-float cast operation
 //
-static uint32_t PackOperAndType(GenTreeOp* treeNode)
+// Notes:
+//    Handles casts from TYP_INT/TYP_LONG to TYP_FLOAT/TYP_DOUBLE.
+//    Currently not implemented (NYI_WASM).
+//
+void CodeGen::genIntToFloatCast(GenTree* tree)
 {
-    return PackOperAndType(treeNode->OperGet(), treeNode->TypeGet());
+    NYI_WASM("genIntToFloatCast");
+}
+
+//------------------------------------------------------------------------
+// genFloatToFloatCast: Generate code for a float to float cast
+//
+// Arguments:
+//    tree - The GT_CAST node for the float-to-float cast operation
+//
+void CodeGen::genFloatToFloatCast(GenTree* tree)
+{
+    var_types   toType   = tree->TypeGet();
+    var_types   fromType = tree->AsCast()->CastOp()->TypeGet();
+    instruction ins      = INS_none;
+
+    genConsumeOperands(tree->AsCast());
+
+    switch (PackTypes(toType, fromType))
+    {
+        case PackTypes(TYP_FLOAT, TYP_DOUBLE):
+            ins = INS_f32_demote_f64;
+            break;
+        case PackTypes(TYP_DOUBLE, TYP_FLOAT):
+            ins = INS_f64_promote_f32;
+            break;
+        case PackTypes(TYP_FLOAT, TYP_FLOAT):
+        case PackTypes(TYP_DOUBLE, TYP_DOUBLE):
+            ins = INS_none;
+            break;
+        default:
+            unreached();
+    }
+
+    if (ins != INS_none)
+    {
+        GetEmitter()->emitIns(ins);
+    }
+    genProduceReg(tree);
 }
 
 //------------------------------------------------------------------------
@@ -476,7 +692,7 @@ void CodeGen::genCodeForBinary(GenTreeOp* treeNode)
     genConsumeOperands(treeNode);
 
     instruction ins;
-    switch (PackOperAndType(treeNode))
+    switch (PackOperAndType(treeNode->OperGet(), treeNode->TypeGet()))
     {
         case PackOperAndType(GT_ADD, TYP_INT):
             if (treeNode->gtOverflow())
@@ -571,7 +787,7 @@ void CodeGen::genCodeForDivMod(GenTreeOp* treeNode)
     genConsumeOperands(treeNode);
 
     instruction ins;
-    switch (PackOperAndType(treeNode))
+    switch (PackOperAndType(treeNode->OperGet(), treeNode->TypeGet()))
     {
         case PackOperAndType(GT_DIV, TYP_INT):
             ins = INS_i32_div_s;
@@ -689,7 +905,7 @@ void CodeGen::genCodeForShift(GenTree* tree)
     // for both the shift and shiftee. So the shift may need to be extended (zero-extended) for TYP_LONG.
 
     instruction ins;
-    switch (PackOperAndType(treeNode))
+    switch (PackOperAndType(treeNode->OperGet(), treeNode->TypeGet()))
     {
         case PackOperAndType(GT_LSH, TYP_INT):
             ins = INS_i32_shl;
@@ -748,7 +964,7 @@ void CodeGen::genCodeForNegNot(GenTreeOp* tree)
     genConsumeOperands(tree);
 
     instruction ins;
-    switch (PackOperAndType(tree))
+    switch (PackOperAndType(tree->OperGet(), tree->TypeGet()))
     {
         case PackOperAndType(GT_NOT, TYP_INT):
             GetEmitter()->emitIns_I(INS_i32_const, emitTypeSize(tree), -1);
@@ -843,6 +1059,13 @@ void CodeGen::genCodeForLclVar(GenTreeLclVar* tree)
         assert(genIsValidReg(varDsc->GetRegNum()));
         unsigned wasmLclIndex = WasmRegToIndex(varDsc->GetRegNum());
         GetEmitter()->emitIns_I(INS_local_get, emitTypeSize(tree), wasmLclIndex);
+        // In this case, the resulting tree type may be different from the local var type where the value originates,
+        // and so we need an explicit conversion since we can't "load"
+        // the value with a different type like we can if the value is on the shadow stack.
+        if (tree->TypeIs(TYP_INT) && varDsc->TypeIs(TYP_LONG))
+        {
+            GetEmitter()->emitIns(INS_i32_wrap_i64);
+        }
     }
 }
 
diff --git a/src/coreclr/jit/emitwasm.cpp b/src/coreclr/jit/emitwasm.cpp
index fb8064eadfe04e..9b0bfb8ff56720 100644
--- a/src/coreclr/jit/emitwasm.cpp
+++ b/src/coreclr/jit/emitwasm.cpp
@@ -196,8 +196,8 @@ emitter::insFormat emitter::emitInsFormat(instruction ins)
 
 static unsigned GetInsOpcode(instruction ins)
 {
-    static const uint8_t insOpcodes[] = {
-#define INST(id, nm, info, fmt, opcode) static_cast<uint8_t>(opcode),
+    static const uint16_t insOpcodes[] = {
+#define INST(id, nm, info, fmt, opcode) static_cast<uint16_t>(opcode),
 #include "instrs.h"
     };
 
@@ -275,9 +275,11 @@ unsigned emitter::instrDesc::idCodeSize() const
 #error WASM64
 #endif
 
-    // Currently, all our instructions have 1 byte opcode.
-    unsigned size = 1;
-    assert(FitsIn<uint8_t>(GetInsOpcode(idIns())));
+    unsigned int opcode = GetInsOpcode(idIns());
+
+    // Currently, all our instructions have 1 or 2 byte opcodes.
+    assert(FitsIn<uint8_t>(opcode) || FitsIn<uint16_t>(opcode));
+    unsigned size = FitsIn<uint8_t>(opcode) ? 1 : 2;
     switch (idInsFmt())
     {
         case IF_OPCODE:
@@ -377,6 +379,26 @@ size_t emitter::emitRawBytes(uint8_t* destination, const void* source, size_t co
     return count;
 }
 
+size_t emitter::emitOutputOpcode(BYTE* dst, instruction ins)
+{
+    size_t   sz     = 0;
+    unsigned opcode = GetInsOpcode(ins);
+
+    assert(FitsIn<uint16_t>(opcode));
+    if (FitsIn<uint8_t>(opcode))
+    {
+        emitOutputByte(dst, opcode);
+        sz += 1;
+    }
+    else if (FitsIn<uint16_t>(opcode))
+    {
+        dst += emitOutputByte(dst, opcode & 0xFF);
+        emitOutputByte(dst, opcode >> 8);
+        sz += 2;
+    }
+    return sz;
+}
+
 size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)
 {
     BYTE*       dst    = *dp;
@@ -388,29 +410,31 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)
     switch (insFmt)
     {
         case IF_OPCODE:
-            dst += emitOutputByte(dst, opcode);
+        {
+            dst += emitOutputOpcode(dst, ins);
             break;
+        }
         case IF_BLOCK:
-            dst += emitOutputByte(dst, opcode);
+            dst += emitOutputOpcode(dst, ins);
             dst += emitOutputByte(dst, 0x40 /* block type of void */);
             break;
         case IF_ULEB128:
         {
-            dst += emitOutputByte(dst, opcode);
+            dst += emitOutputOpcode(dst, ins);
             cnsval_ssize_t constant = emitGetInsSC(id);
             dst += emitOutputULEB128(dst, (uint64_t)constant);
             break;
         }
         case IF_SLEB128:
         {
-            dst += emitOutputByte(dst, opcode);
+            dst += emitOutputOpcode(dst, ins);
             cnsval_ssize_t constant = emitGetInsSC(id);
             dst += emitOutputSLEB128(dst, (int64_t)constant);
             break;
         }
         case IF_F32:
         {
-            dst += emitOutputByte(dst, opcode);
+            dst += emitOutputOpcode(dst, ins);
             // Reinterpret the bits as a double constant and then truncate it to f32,
             //  then finally copy the raw truncated f32 bits to the output.
             cnsval_ssize_t bits = emitGetInsSC(id);
@@ -423,7 +447,7 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)
         }
         case IF_F64:
         {
-            dst += emitOutputByte(dst, opcode);
+            dst += emitOutputOpcode(dst, ins);
             // The int64 bits are actually a double constant we can copy directly
             //  to the output stream.
             cnsval_ssize_t bits = emitGetInsSC(id);
@@ -438,7 +462,7 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)
         }
         case IF_MEMARG:
         {
-            dst += emitOutputByte(dst, opcode);
+            dst += emitOutputOpcode(dst, ins);
             uint64_t align  = emitGetAlignHintLog2(id);
             uint64_t offset = emitGetInsSC(id);
             assert(align <= UINT32_MAX); // spec says memarg alignment is u32
diff --git a/src/coreclr/jit/emitwasm.h b/src/coreclr/jit/emitwasm.h
index 52300e925d183d..f6cb7f831496cc 100644
--- a/src/coreclr/jit/emitwasm.h
+++ b/src/coreclr/jit/emitwasm.h
@@ -66,3 +66,4 @@ insFormat emitInsFormat(instruction ins);
 size_t emitOutputULEB128(uint8_t* destination, uint64_t value);
 size_t emitOutputSLEB128(uint8_t* destination, int64_t value);
 size_t emitRawBytes(uint8_t* destination, const void* source, size_t count);
+size_t emitOutputOpcode(BYTE* dst, instruction ins);
diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp
index ab235263b96d80..2b3ad4e2ec2eb1 100644
--- a/src/coreclr/jit/gentree.cpp
+++ b/src/coreclr/jit/gentree.cpp
@@ -5512,10 +5512,11 @@ unsigned Compiler::gtSetEvalOrder(GenTree* tree)
                     costEx = 1;
                     costSz = 4;
 #elif defined(TARGET_WASM)
-                    // TODO-WASM: 1 byte opcodes except for the int->fp saturating casts which are 2 bytes.
-                    NYI_WASM("Cast costing");
-                    costEx = 0;
-                    costSz = 0;
+                    // TODO-WASM: Determine if we need a better costing model for casts.
+                    // Some operations may use 2-byte opcodes, and some operations may need
+                    // multiple wasm instructions.
+                    costEx = 2;
+                    costSz = varTypeIsFloating(op1) && !varTypeIsFloating(tree->TypeGet()) ? 2 : 1;
 #else
 #error "Unknown TARGET"
 #endif
diff --git a/src/coreclr/jit/instrswasm.h b/src/coreclr/jit/instrswasm.h
index f8562c071c30cb..a039d190ffef92 100644
--- a/src/coreclr/jit/instrswasm.h
+++ b/src/coreclr/jit/instrswasm.h
@@ -41,12 +41,24 @@ INST(br_table,    "br_table",    0, IF_ULEB128, 0x0E)
 INST(return,      "return",      0, IF_OPCODE,  0x0F)
 INST(drop,        "drop",        0, IF_OPCODE,  0x1A)
 
-INST(local_get,   "local.get",   0, IF_ULEB128, 0x20)
-INST(local_set,   "local.set",   0, IF_ULEB128, 0x21)
-INST(i32_load,    "i32.load",    0, IF_MEMARG,  0x28)
-INST(i64_load,    "i64.load",    0, IF_MEMARG,  0x29)
-INST(f32_load,    "f32.load",    0, IF_MEMARG,  0x2A)
-INST(f64_load,    "f64.load",    0, IF_MEMARG,  0x2B)
+INST(local_get,    "local.get",    0, IF_ULEB128, 0x20)
+INST(local_set,    "local.set",    0, IF_ULEB128, 0x21)
+INST(i32_load,     "i32.load",     0, IF_MEMARG,  0x28)
+INST(i64_load,     "i64.load",     0, IF_MEMARG,  0x29)
+INST(f32_load,     "f32.load",     0, IF_MEMARG,  0x2A)
+INST(f64_load,     "f64.load",     0, IF_MEMARG,  0x2B)
+INST(i32_load8_s,  "i32.load8_s",  0, IF_MEMARG,  0x2C)
+INST(i32_load8_u,  "i32.load8_u",  0, IF_MEMARG,  0x2D)
+INST(i32_load16_s, "i32.load16_s", 0, IF_MEMARG, 0x2E)
+INST(i32_load16_u, "i32.load16_u", 0, IF_MEMARG, 0x2F)
+INST(i64_load8_s,  "i64.load8_s",  0, IF_MEMARG,  0x30)
+INST(i64_load8_u,  "i64.load8_u",  0, IF_MEMARG,  0x31)
+INST(i64_load16_s, "i64.load16_s", 0, IF_MEMARG, 0x32)
+INST(i64_load16_u, "i64.load16_u", 0, IF_MEMARG, 0x33)
+INST(i64_load32_s, "i64.load32_s", 0, IF_MEMARG, 0x34)
+INST(i64_load32_u, "i64.load32_u", 0, IF_MEMARG, 0x35)
+
+
 INST(i32_store,   "i32.store",   0, IF_MEMARG,  0x36)
 INST(i64_store,   "i64.store",   0, IF_MEMARG,  0x37)
 INST(f32_store,   "f32.store",   0, IF_MEMARG,  0x38)
@@ -132,34 +144,79 @@ INST(i64_shr_u,   "i64.shr_u",   0, IF_OPCODE,  0x88)
 INST(i64_rotl,    "i64.rotl",    0, IF_OPCODE,  0x89)
 INST(i64_rotr,    "i64.rotr",    0, IF_OPCODE,  0x8A)
 // Floating point arithmetic operations
-INST(f32_abs,     "f32.abs",     0, IF_OPCODE,  0x8B)
-INST(f32_neg,     "f32.neg",     0, IF_OPCODE,  0x8C)
-INST(f32_ceil,    "f32.ceil",    0, IF_OPCODE,  0x8D)
-INST(f32_floor,   "f32.floor",   0, IF_OPCODE,  0x8E)
-INST(f32_trunc,   "f32.trunc",   0, IF_OPCODE,  0x8F)
-INST(f32_nearest, "f32.nearest", 0, IF_OPCODE,  0x90)
-INST(f32_sqrt,    "f32.sqrt",    0, IF_OPCODE,  0x91)
-INST(f32_add,     "f32.add",     0, IF_OPCODE,  0x92)
-INST(f32_sub,     "f32.sub",     0, IF_OPCODE,  0x93)
-INST(f32_mul,     "f32.mul",     0, IF_OPCODE,  0x94)
-INST(f32_div,     "f32.div",     0, IF_OPCODE,  0x95)
-INST(f32_min,     "f32.min",     0, IF_OPCODE,  0x96)
-INST(f32_max,     "f32.max",     0, IF_OPCODE,  0x97)
-INST(f32_copysign,"f32.copysign",0, IF_OPCODE,  0x98)
-INST(f64_abs,     "f64.abs",     0, IF_OPCODE,  0x99)
-INST(f64_neg,     "f64.neg",     0, IF_OPCODE,  0x9A)
-INST(f64_ceil,    "f64.ceil",    0, IF_OPCODE,  0x9B)
-INST(f64_floor,   "f64.floor",   0, IF_OPCODE,  0x9C)
-INST(f64_trunc,   "f64.trunc",   0, IF_OPCODE,  0x9D)
-INST(f64_nearest, "f64.nearest", 0, IF_OPCODE,  0x9E)
-INST(f64_sqrt,    "f64.sqrt",    0, IF_OPCODE,  0x9F)
-INST(f64_add,     "f64.add",     0, IF_OPCODE,  0xA0)
-INST(f64_sub,     "f64.sub",     0, IF_OPCODE,  0xA1)
-INST(f64_mul,     "f64.mul",     0, IF_OPCODE,  0xA2)
-INST(f64_div,     "f64.div",     0, IF_OPCODE,  0xA3)
-INST(f64_min,     "f64.min",     0, IF_OPCODE,  0xA4)
-INST(f64_max,     "f64.max",     0, IF_OPCODE,  0xA5)
-INST(f64_copysign,"f64.copysign",0, IF_OPCODE,  0xA6)
+INST(f32_abs,     "f32.abs",      0, IF_OPCODE,  0x8B)
+INST(f32_neg,     "f32.neg",      0, IF_OPCODE,  0x8C)
+INST(f32_ceil,    "f32.ceil",     0, IF_OPCODE,  0x8D)
+INST(f32_floor,   "f32.floor",    0, IF_OPCODE,  0x8E)
+INST(f32_trunc,   "f32.trunc",    0, IF_OPCODE,  0x8F)
+INST(f32_nearest, "f32.nearest",  0, IF_OPCODE,  0x90)
+INST(f32_sqrt,    "f32.sqrt",     0, IF_OPCODE,  0x91)
+INST(f32_add,     "f32.add",      0, IF_OPCODE,  0x92)
+INST(f32_sub,     "f32.sub",      0, IF_OPCODE,  0x93)
+INST(f32_mul,     "f32.mul",      0, IF_OPCODE,  0x94)
+INST(f32_div,     "f32.div",      0, IF_OPCODE,  0x95)
+INST(f32_min,     "f32.min",      0, IF_OPCODE,  0x96)
+INST(f32_max,     "f32.max",      0, IF_OPCODE,  0x97)
+INST(f32_copysign,"f32.copysign", 0, IF_OPCODE,  0x98)
+INST(f64_abs,     "f64.abs",      0, IF_OPCODE,  0x99)
+INST(f64_neg,     "f64.neg",      0, IF_OPCODE,  0x9A)
+INST(f64_ceil,    "f64.ceil",     0, IF_OPCODE,  0x9B)
+INST(f64_floor,   "f64.floor",    0, IF_OPCODE,  0x9C)
+INST(f64_trunc,   "f64.trunc",    0, IF_OPCODE,  0x9D)
+INST(f64_nearest, "f64.nearest",  0, IF_OPCODE,  0x9E)
+INST(f64_sqrt,    "f64.sqrt",     0, IF_OPCODE,  0x9F)
+INST(f64_add,     "f64.add",      0, IF_OPCODE,  0xA0)
+INST(f64_sub,     "f64.sub",      0, IF_OPCODE,  0xA1)
+INST(f64_mul,     "f64.mul",      0, IF_OPCODE,  0xA2)
+INST(f64_div,     "f64.div",      0, IF_OPCODE,  0xA3)
+INST(f64_min,     "f64.min",      0, IF_OPCODE,  0xA4)
+INST(f64_max,     "f64.max",      0, IF_OPCODE,  0xA5)
+INST(f64_copysign,"f64.copysign", 0, IF_OPCODE,  0xA6)
+// Unary operations
+INST(i32_wrap_i64,      "i32.wrap_i64",        0, IF_OPCODE,  0xA7)
+INST(i32_trunc_s_f32,   "i32.trunc_s_f32",     0, IF_OPCODE,  0xA8)
+INST(i32_trunc_u_f32,   "i32.trunc_u_f32",     0, IF_OPCODE,  0xA9)
+INST(i32_trunc_s_f64,   "i32.trunc_s_f64",     0, IF_OPCODE,  0xAA)
+INST(i32_trunc_u_f64,   "i32.trunc_u_f64",     0, IF_OPCODE,  0xAB)
+INST(i64_extend_s_i32,  "i64.extend_s_i32",    0, IF_OPCODE,  0xAC)
+INST(i64_extend_u_i32,  "i64.extend_u_i32",    0, IF_OPCODE,  0xAD)
+INST(i64_trunc_s_f32,   "i64.trunc_s_f32",     0, IF_OPCODE,  0xAE)
+INST(i64_trunc_u_f32,   "i64.trunc_u_f32",     0, IF_OPCODE,  0xAF)
+INST(i64_trunc_s_f64,   "i64.trunc_s_f64",     0, IF_OPCODE,  0xB0)
+INST(i64_trunc_u_f64,   "i64.trunc_u_f64",     0, IF_OPCODE,  0xB1)
+INST(f32_convert_s_i32, "f32.convert_s_i32",   0, IF_OPCODE,  0xB2)
+INST(f32_convert_u_i32, "f32.convert_u_i32",   0, IF_OPCODE,  0xB3)
+INST(f32_convert_s_i64, "f32.convert_s_i64",   0, IF_OPCODE,  0xB4)
+INST(f32_convert_u_i64, "f32.convert_u_i64",   0, IF_OPCODE,  0xB5)
+INST(f32_demote_f64,    "f32.demote_f64",      0, IF_OPCODE,  0xB6)
+INST(f64_convert_s_i32, "f64.convert_s_i32",   0, IF_OPCODE,  0xB7)
+INST(f64_convert_u_i32, "f64.convert_u_i32",   0, IF_OPCODE,  0xB8)
+INST(f64_convert_s_i64, "f64.convert_s_i64",   0, IF_OPCODE,  0xB9)
+INST(f64_convert_u_i64, "f64.convert_u_i64",   0, IF_OPCODE,  0xBA)
+// NOTE: This is named f32_promote_f64 in the spec, which is wrong.
+INST(f64_promote_f32,     "f64.promote_f32",     0, IF_OPCODE,  0xBB)
+INST(i32_reinterpret_f32, "i32.reinterpret_f32", 0, IF_OPCODE,  0xBC)
+INST(i64_reinterpret_f64, "i64.reinterpret_f64", 0, IF_OPCODE,  0xBD)
+INST(f32_reinterpret_i32, "f32.reinterpret_i32", 0, IF_OPCODE,  0xBE)
+INST(f64_reinterpret_i64, "f64.reinterpret_i64", 0, IF_OPCODE,  0xBF)
+INST(i32_extend8_s,       "i32.extend8_s",       0, IF_OPCODE,  0xC0)
+INST(i32_extend16_s,      "i32.extend16_s",      0, IF_OPCODE,  0xC1)
+INST(i64_extend8_s,       "i64.extend8_s",       0, IF_OPCODE,  0xC2)
+INST(i64_extend16_s,      "i64.extend16_s",      0, IF_OPCODE,  0xC3)
+INST(i64_extend32_s,      "i64.extend32_s",      0, IF_OPCODE,  0xC4)
+
+// NOTE: per https://github.com/dotnet/runtime/issues/122309,
+// we have decided to include saturating float->int conversions
+// in our base Wasm ISA.
+INST(i32_trunc_sat_f32_s, "i32.trunc_sat_f32_s", 0, IF_OPCODE,  0x00FC)
+INST(i32_trunc_sat_f32_u, "i32.trunc_sat_f32_u", 0, IF_OPCODE,  0x01FC)
+INST(i32_trunc_sat_f64_s, "i32.trunc_sat_f64_s", 0, IF_OPCODE,  0x02FC)
+INST(i32_trunc_sat_f64_u, "i32.trunc_sat_f64_u", 0, IF_OPCODE,  0x03FC)
+INST(i64_trunc_sat_f32_s, "i64.trunc_sat_f32_s", 0, IF_OPCODE,  0x04FC)
+INST(i64_trunc_sat_f32_u, "i64.trunc_sat_f32_u", 0, IF_OPCODE,  0x05FC)
+INST(i64_trunc_sat_f64_s, "i64.trunc_sat_f64_s", 0, IF_OPCODE,  0x06FC)
+INST(i64_trunc_sat_f64_u, "i64.trunc_sat_f64_u", 0, IF_OPCODE,  0x07FC)
+
 // clang-format on
 
 #undef INST
diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp
index c44270b9d3afca..cc50a24a44c8dc 100644
--- a/src/coreclr/jit/morph.cpp
+++ b/src/coreclr/jit/morph.cpp
@@ -330,6 +330,8 @@ GenTree* Compiler::fgMorphExpandCast(GenTreeCast* tree)
                     return nullptr;
                 }
 
+// On Wasm, we have direct support for double->(u)long casts using the saturating instructions.
+#ifndef TARGET_WASM
                 switch (dstType)
                 {
                     case TYP_LONG:
@@ -339,6 +341,7 @@ GenTree* Compiler::fgMorphExpandCast(GenTreeCast* tree)
                     default:
                         unreached();
                 }
+#endif
 #endif // TARGET_64BIT
             }
             else