diff --git a/src/passes/GlobalStructInference.cpp b/src/passes/GlobalStructInference.cpp
index e2b728fc10b..6f3514275b4 100644
--- a/src/passes/GlobalStructInference.cpp
+++ b/src/passes/GlobalStructInference.cpp
@@ -79,6 +79,8 @@ struct GlobalStructInference : public Pass {
   // optimizable it will have an entry here, and not if not.
   std::unordered_map<HeapType, std::vector<Name>> typeGlobals;
 
+  std::unique_ptr<SubTypes> subTypes;
+
   void run(Module* module) override {
     if (!module->features.hasGC()) {
       return;
@@ -208,6 +210,12 @@ struct GlobalStructInference : public Pass {
       return;
     }
 
+    // When CD is enabled, we can optimize to ref.get_desc, depending on the
+    // presence of subtypes.
+    if (module->features.hasCustomDescriptors()) {
+      subTypes = std::make_unique<SubTypes>(*module);
+    }
+
     // The above loop on typeGlobalsCopy is on an unsorted data structure, and
     // that can lead to nondeterminism in typeGlobals. Sort the vectors there to
     // ensure determinism.
@@ -528,6 +536,52 @@ struct GlobalStructInference : public Pass {
           right));
       }
 
+      void visitRefCast(RefCast* curr) {
+        // When we see (ref.cast $T), and the type has a descriptor, and that
+        // descriptor only has a single global, then we can do (ref.cast_desc)
+        // using the descriptor. Descriptor casts are usually more efficient
+        // than normal ones (and even more so if we get lucky and are in a loop,
+        // where the global.get of the descriptor can be hoisted).
+        // TODO: only do this when shrinkLevel == 0?
+
+        // Check if we have a descriptor.
+        auto type = curr->type;
+        if (type == Type::unreachable) {
+          return;
+        }
+        auto heapType = type.getHeapType();
+        auto desc = heapType.getDescriptorType();
+        if (!desc) {
+          return;
+        }
+
+        // Check if the type has no (relevant) subtypes, as a ref.cast_desc will
+        // find precisely that type and nothing else.
+        if (!type.isExact() &&
+            !parent.subTypes->getStrictSubTypes(heapType).empty()) {
+          return;
+        }
+
+        // Check if we have a single global for the descriptor.
+        auto iter = parent.typeGlobals.find(*desc);
+        if (iter == parent.typeGlobals.end()) {
+          return;
+        }
+        const auto& globals = iter->second;
+        if (globals.size() != 1) {
+          return;
+        }
+
+        // We can optimize!
+        auto global = globals[0];
+        auto& wasm = *getModule();
+        Builder builder(wasm);
+        auto* getGlobal =
+          builder.makeGlobalGet(global, wasm.getGlobal(global)->type);
+        auto* castDesc = builder.makeRefCast(curr->ref, getGlobal, curr->type);
+        replaceCurrent(castDesc);
+      }
+
       void visitFunction(Function* func) {
         if (refinalize) {
           ReFinalize().walkFunctionInModule(func, getModule());
diff --git a/test/lit/passes/gsi-desc.wast b/test/lit/passes/gsi-desc.wast
index 0ca1806b568..c109bd5ca5e 100644
--- a/test/lit/passes/gsi-desc.wast
+++ b/test/lit/passes/gsi-desc.wast
@@ -186,3 +186,322 @@
   )
 )
 
+;; Two types with descriptors and subtyping between them.
+(module
+  (rec
+    ;; CHECK:      (rec
+    ;; CHECK-NEXT:  (type $A (sub (descriptor $A.desc (struct))))
+    (type $A (sub (descriptor $A.desc (struct))))
+    ;; CHECK:       (type $A.desc (sub (describes $A (struct))))
+    (type $A.desc (sub (describes $A (struct))))
+
+    ;; CHECK:       (type $B (sub $A (descriptor $B.desc (struct))))
+    (type $B (sub $A (descriptor $B.desc (struct))))
+    ;; CHECK:       (type $B.desc (sub $A.desc (describes $B (struct))))
+    (type $B.desc (sub $A.desc (describes $B (struct))))
+  )
+
+  ;; CHECK:      (type $4 (func (param anyref)))
+
+  ;; CHECK:      (global $A.desc (ref $A.desc) (struct.new_default $A.desc))
+  (global $A.desc (ref $A.desc) (struct.new $A.desc))
+
+  ;; CHECK:      (global $B.desc (ref $B.desc) (struct.new_default $B.desc))
+  (global $B.desc (ref $B.desc) (struct.new $B.desc))
+
+  ;; CHECK:      (func $test (type $4) (param $any anyref)
+  ;; CHECK-NEXT:  (drop
+  ;; CHECK-NEXT:   (ref.cast (ref $A)
+  ;; CHECK-NEXT:    (local.get $any)
+  ;; CHECK-NEXT:   )
+  ;; CHECK-NEXT:  )
+  ;; CHECK-NEXT:  (drop
+  ;; CHECK-NEXT:   (ref.cast_desc (ref $B)
+  ;; CHECK-NEXT:    (local.get $any)
+  ;; CHECK-NEXT:    (global.get $B.desc)
+  ;; CHECK-NEXT:   )
+  ;; CHECK-NEXT:  )
+  ;; CHECK-NEXT: )
+  (func $test (param $any anyref)
+    ;; The second cast here is optimizable: it can only be to a single type with
+    ;; no subtypes, so we can use ref.cast_desc.
+    (drop
+      (ref.cast (ref $A)
+        (local.get $any)
+      )
+    )
+    (drop
+      (ref.cast (ref $B)
+        (local.get $any)
+      )
+    )
+  )
+
+  ;; CHECK:      (func $test-exact (type $4) (param $any anyref)
+  ;; CHECK-NEXT:  (drop
+  ;; CHECK-NEXT:   (ref.cast (ref (exact $A))
+  ;; CHECK-NEXT:    (local.get $any)
+  ;; CHECK-NEXT:   )
+  ;; CHECK-NEXT:  )
+  ;; CHECK-NEXT:  (drop
+  ;; CHECK-NEXT:   (ref.cast_desc (ref $B)
+  ;; CHECK-NEXT:    (local.get $any)
+  ;; CHECK-NEXT:    (global.get $B.desc)
+  ;; CHECK-NEXT:   )
+  ;; CHECK-NEXT:  )
+  ;; CHECK-NEXT: )
+  (func $test-exact (param $any anyref)
+    ;; When using exact casts, we can optimize both. TODO: atm we do not
+    ;; optimize $A, as we propagate on |typeGlobals|.
+    (drop
+      (ref.cast (ref (exact $A))
+        (local.get $any)
+      )
+    )
+    (drop
+      (ref.cast (ref (exact $B))
+        (local.get $any)
+      )
+    )
+  )
+)
+
+;; As above, but without subtyping between $A and $B.
+(module
+  (rec
+    ;; CHECK:      (rec
+    ;; CHECK-NEXT:  (type $A (sub (descriptor $A.desc (struct))))
+    (type $A (sub (descriptor $A.desc (struct))))
+    ;; CHECK:       (type $A.desc (sub (describes $A (struct))))
+    (type $A.desc (sub (describes $A (struct))))
+
+    ;; CHECK:       (type $B (sub (descriptor $B.desc (struct))))
+    (type $B (sub (descriptor $B.desc (struct))))
+    ;; CHECK:       (type $B.desc (sub $A.desc (describes $B (struct))))
+    (type $B.desc (sub $A.desc (describes $B (struct))))
+  )
+
+  ;; CHECK:      (type $4 (func (param anyref)))
+
+  ;; CHECK:      (global $A.desc (ref $A.desc) (struct.new_default $A.desc))
+  (global $A.desc (ref $A.desc) (struct.new $A.desc))
+
+  ;; CHECK:      (global $B.desc (ref $B.desc) (struct.new_default $B.desc))
+  (global $B.desc (ref $B.desc) (struct.new $B.desc))
+
+  ;; CHECK:      (func $test (type $4) (param $any anyref)
+  ;; CHECK-NEXT:  (drop
+  ;; CHECK-NEXT:   (ref.cast (ref $A)
+  ;; CHECK-NEXT:    (local.get $any)
+  ;; CHECK-NEXT:   )
+  ;; CHECK-NEXT:  )
+  ;; CHECK-NEXT:  (drop
+  ;; CHECK-NEXT:   (ref.cast_desc (ref $B)
+  ;; CHECK-NEXT:    (local.get $any)
+  ;; CHECK-NEXT:    (global.get $B.desc)
+  ;; CHECK-NEXT:   )
+  ;; CHECK-NEXT:  )
+  ;; CHECK-NEXT: )
+  (func $test (param $any anyref)
+    ;; We still cannot optimize $A: while $A has no subtypes, the descriptor
+    ;; $A.desc has a subtype. We could optimize this TODO
+    (drop
+      (ref.cast (ref $A)
+        (local.get $any)
+      )
+    )
+    (drop
+      (ref.cast (ref $B)
+        (local.get $any)
+      )
+    )
+  )
+)
+
+;; As above, but without subtyping between $A.desc and $B.desc.
+(module
+  (rec
+    ;; CHECK:      (rec
+    ;; CHECK-NEXT:  (type $A (sub (descriptor $A.desc (struct))))
+    (type $A (sub (descriptor $A.desc (struct))))
+    ;; CHECK:       (type $A.desc (sub (describes $A (struct))))
+    (type $A.desc (sub (describes $A (struct))))
+
+    ;; CHECK:       (type $B (sub (descriptor $B.desc (struct))))
+    (type $B (sub (descriptor $B.desc (struct))))
+    ;; CHECK:       (type $B.desc (sub (describes $B (struct))))
+    (type $B.desc (sub (describes $B (struct))))
+  )
+
+  ;; CHECK:      (type $4 (func (param anyref)))
+
+  ;; CHECK:      (global $A.desc (ref $A.desc) (struct.new_default $A.desc))
+  (global $A.desc (ref $A.desc) (struct.new $A.desc))
+
+  ;; CHECK:      (global $B.desc (ref $B.desc) (struct.new_default $B.desc))
+  (global $B.desc (ref $B.desc) (struct.new $B.desc))
+
+  ;; CHECK:      (func $test (type $4) (param $any anyref)
+  ;; CHECK-NEXT:  (drop
+  ;; CHECK-NEXT:   (ref.cast_desc (ref $A)
+  ;; CHECK-NEXT:    (local.get $any)
+  ;; CHECK-NEXT:    (global.get $A.desc)
+  ;; CHECK-NEXT:   )
+  ;; CHECK-NEXT:  )
+  ;; CHECK-NEXT:  (drop
+  ;; CHECK-NEXT:   (ref.cast_desc (ref $B)
+  ;; CHECK-NEXT:    (local.get $any)
+  ;; CHECK-NEXT:    (global.get $B.desc)
+  ;; CHECK-NEXT:   )
+  ;; CHECK-NEXT:  )
+  ;; CHECK-NEXT: )
+  (func $test (param $any anyref)
+    ;; We can fully optimize these two independent cases.
+    (drop
+      (ref.cast (ref $A)
+        (local.get $any)
+      )
+    )
+    (drop
+      (ref.cast (ref $B)
+        (local.get $any)
+      )
+    )
+  )
+)
+
+;; Zero descriptor instances in globals.
+(module
+  (rec
+    ;; CHECK:      (rec
+    ;; CHECK-NEXT:  (type $A (sub (descriptor $A.desc (struct))))
+    (type $A (sub (descriptor $A.desc (struct))))
+    ;; CHECK:       (type $A.desc (sub (describes $A (struct))))
+    (type $A.desc (sub (describes $A (struct))))
+  )
+
+  ;; CHECK:      (type $2 (func (param anyref)))
+
+  ;; CHECK:      (func $test (type $2) (param $any anyref)
+  ;; CHECK-NEXT:  (drop
+  ;; CHECK-NEXT:   (ref.cast (ref $A)
+  ;; CHECK-NEXT:    (local.get $any)
+  ;; CHECK-NEXT:   )
+  ;; CHECK-NEXT:  )
+  ;; CHECK-NEXT: )
+  (func $test (param $any anyref)
+    ;; We do not optimize here. TODO: we could make this trap
+    (drop
+      (ref.cast (ref $A)
+        (local.get $any)
+      )
+    )
+  )
+)
+
+;; Two descriptor instances in globals.
+(module
+  (rec
+    ;; CHECK:      (rec
+    ;; CHECK-NEXT:  (type $A (sub (descriptor $A.desc (struct))))
+    (type $A (sub (descriptor $A.desc (struct))))
+    ;; CHECK:       (type $A.desc (sub (describes $A (struct))))
+    (type $A.desc (sub (describes $A (struct))))
+  )
+
+  ;; CHECK:      (type $2 (func (param anyref)))
+
+  ;; CHECK:      (global $A.desc (ref $A.desc) (struct.new_default $A.desc))
+  (global $A.desc (ref $A.desc) (struct.new $A.desc))
+
+  ;; CHECK:      (global $A.desc2 (ref $A.desc) (struct.new_default $A.desc))
+  (global $A.desc2 (ref $A.desc) (struct.new $A.desc))
+
+  ;; CHECK:      (func $test (type $2) (param $any anyref)
+  ;; CHECK-NEXT:  (drop
+  ;; CHECK-NEXT:   (ref.cast (ref $A)
+  ;; CHECK-NEXT:    (local.get $any)
+  ;; CHECK-NEXT:   )
+  ;; CHECK-NEXT:  )
+  ;; CHECK-NEXT:  (drop
+  ;; CHECK-NEXT:   (block ;; (replaces unreachable RefCast we can't emit)
+  ;; CHECK-NEXT:    (drop
+  ;; CHECK-NEXT:     (unreachable)
+  ;; CHECK-NEXT:    )
+  ;; CHECK-NEXT:    (unreachable)
+  ;; CHECK-NEXT:   )
+  ;; CHECK-NEXT:  )
+  ;; CHECK-NEXT: )
+  (func $test (param $any anyref)
+    ;; We do not optimize here. TODO: we could with a select
+    (drop
+      (ref.cast (ref $A)
+        (local.get $any)
+      )
+    )
+    ;; We do not error on unreachable casts.
+    (drop
+      (ref.cast (ref $A)
+        (unreachable)
+      )
+    )
+  )
+)
+
+(module
+  ;; CHECK:      (type $0 (func (param anyref)))
+
+  ;; CHECK:      (type $A (sub (struct)))
+  (type $A (sub (struct)))
+
+  ;; CHECK:      (func $test (type $0) (param $any anyref)
+  ;; CHECK-NEXT:  (drop
+  ;; CHECK-NEXT:   (ref.cast (ref $A)
+  ;; CHECK-NEXT:    (local.get $any)
+  ;; CHECK-NEXT:   )
+  ;; CHECK-NEXT:  )
+  ;; CHECK-NEXT: )
+  (func $test (param $any anyref)
+    ;; We do not handle casts to types without descriptors.
+    (drop
+      (ref.cast (ref $A)
+        (local.get $any)
+      )
+    )
+  )
+)
+
+;; Nullable cast.
+(module
+  (rec
+    ;; CHECK:      (rec
+    ;; CHECK-NEXT:  (type $A (sub (descriptor $A.desc (struct))))
+    (type $A (sub (descriptor $A.desc (struct))))
+    ;; CHECK:       (type $A.desc (sub (describes $A (struct))))
+    (type $A.desc (sub (describes $A (struct))))
+  )
+
+  ;; CHECK:      (type $2 (func (param anyref)))
+
+  ;; CHECK:      (global $A.desc (ref $A.desc) (struct.new_default $A.desc))
+  (global $A.desc (ref $A.desc) (struct.new $A.desc))
+
+  ;; CHECK:      (func $test (type $2) (param $any anyref)
+  ;; CHECK-NEXT:  (drop
+  ;; CHECK-NEXT:   (ref.cast_desc (ref null $A)
+  ;; CHECK-NEXT:    (local.get $any)
+  ;; CHECK-NEXT:    (global.get $A.desc)
+  ;; CHECK-NEXT:   )
+  ;; CHECK-NEXT:  )
+  ;; CHECK-NEXT: )
+  (func $test (param $any anyref)
+    ;; The cast is nullable, which we can still optimize: null will succeed as
+    ;; expected.
+    (drop
+      (ref.cast (ref null $A)
+        (local.get $any)
+      )
+    )
+  )
+)
+