diff --git a/Include/cpython/pystate.h b/Include/cpython/pystate.h
index ac8798ff6129a0..e43081c22ce75b 100644
--- a/Include/cpython/pystate.h
+++ b/Include/cpython/pystate.h
@@ -113,6 +113,9 @@ struct _ts {
     /* Currently holds the GIL. Must be its own field to avoid data races */
     int holds_gil;
 
+    /* Currently requesting the GIL */
+    int gil_requested;
+
     int _whence;
 
     /* Thread state (_Py_THREAD_ATTACHED, _Py_THREAD_DETACHED, _Py_THREAD_SUSPENDED).
diff --git a/Include/internal/pycore_debug_offsets.h b/Include/internal/pycore_debug_offsets.h
index 8e7cd16acffa48..f6d50bf5df7a9e 100644
--- a/Include/internal/pycore_debug_offsets.h
+++ b/Include/internal/pycore_debug_offsets.h
@@ -106,6 +106,8 @@ typedef struct _Py_DebugOffsets {
         uint64_t native_thread_id;
         uint64_t datastack_chunk;
         uint64_t status;
+        uint64_t holds_gil;
+        uint64_t gil_requested;
     } thread_state;
 
     // InterpreterFrame offset;
@@ -273,6 +275,8 @@ typedef struct _Py_DebugOffsets {
         .native_thread_id = offsetof(PyThreadState, native_thread_id), \
         .datastack_chunk = offsetof(PyThreadState, datastack_chunk), \
         .status = offsetof(PyThreadState, _status), \
+        .holds_gil = offsetof(PyThreadState, holds_gil), \
+        .gil_requested = offsetof(PyThreadState, gil_requested), \
     }, \
     .interpreter_frame = { \
         .size = sizeof(_PyInterpreterFrame), \
diff --git a/Lib/profiling/sampling/collector.py b/Lib/profiling/sampling/collector.py
index b7a033ac0a6637..311933480cbf17 100644
--- a/Lib/profiling/sampling/collector.py
+++ b/Lib/profiling/sampling/collector.py
@@ -1,17 +1,13 @@
 from abc import ABC, abstractmethod
 
-# Enums are slow
-THREAD_STATE_RUNNING = 0
-THREAD_STATE_IDLE = 1
-THREAD_STATE_GIL_WAIT = 2
-THREAD_STATE_UNKNOWN = 3
-
-STATUS = {
-    THREAD_STATE_RUNNING: "running",
-    THREAD_STATE_IDLE: "idle",
-    THREAD_STATE_GIL_WAIT: "gil_wait",
-    THREAD_STATE_UNKNOWN: "unknown",
-}
+# Thread status flags
+try:
+    from _remote_debugging import THREAD_STATUS_HAS_GIL, THREAD_STATUS_ON_CPU, THREAD_STATUS_UNKNOWN
+except ImportError:
+    # Fallback for tests or when module is not available
+    THREAD_STATUS_HAS_GIL = (1 << 0)
+    THREAD_STATUS_ON_CPU = (1 << 1)
+    THREAD_STATUS_UNKNOWN = (1 << 2)
 
 class Collector(ABC):
     @abstractmethod
@@ -26,8 +22,14 @@ def _iter_all_frames(self, stack_frames, skip_idle=False):
         """Iterate over all frame stacks from all interpreters and threads."""
         for interpreter_info in stack_frames:
             for thread_info in interpreter_info.threads:
-                if skip_idle and thread_info.status != THREAD_STATE_RUNNING:
-                    continue
+                # skip_idle now means: skip if thread is not actively running
+                # A thread is "active" if it has the GIL OR is on CPU
+                if skip_idle:
+                    status_flags = thread_info.status
+                    has_gil = bool(status_flags & THREAD_STATUS_HAS_GIL)
+                    on_cpu = bool(status_flags & THREAD_STATUS_ON_CPU)
+                    if not (has_gil or on_cpu):
+                        continue
                 frames = thread_info.frame_info
                 if frames:
                     yield frames, thread_info.thread_id
diff --git a/Lib/profiling/sampling/gecko_collector.py b/Lib/profiling/sampling/gecko_collector.py
index 548acbf24b7fd2..d21aa0ed60b855 100644
--- a/Lib/profiling/sampling/gecko_collector.py
+++ b/Lib/profiling/sampling/gecko_collector.py
@@ -1,9 +1,20 @@
+import itertools
 import json
 import os
 import platform
+import sys
+import threading
 import time
 
-from .collector import Collector, THREAD_STATE_RUNNING
+from .collector import Collector
+try:
+    from _remote_debugging import THREAD_STATUS_HAS_GIL, THREAD_STATUS_ON_CPU, THREAD_STATUS_UNKNOWN, THREAD_STATUS_GIL_REQUESTED
+except ImportError:
+    # Fallback if module not available (shouldn't happen in normal use)
+    THREAD_STATUS_HAS_GIL = (1 << 0)
+    THREAD_STATUS_ON_CPU = (1 << 1)
+    THREAD_STATUS_UNKNOWN = (1 << 2)
+    THREAD_STATUS_GIL_REQUESTED = (1 << 3)
 
 
 # Categories matching Firefox Profiler expectations
@@ -11,14 +22,20 @@
     {"name": "Other", "color": "grey", "subcategories": ["Other"]},
     {"name": "Python", "color": "yellow", "subcategories": ["Other"]},
     {"name": "Native", "color": "blue", "subcategories": ["Other"]},
-    {"name": "Idle", "color": "transparent", "subcategories": ["Other"]},
+    {"name": "GC", "color": "orange", "subcategories": ["Other"]},
+    {"name": "GIL", "color": "green", "subcategories": ["Other"]},
+    {"name": "CPU", "color": "purple", "subcategories": ["Other"]},
+    {"name": "Code Type", "color": "red", "subcategories": ["Other"]},
 ]
 
 # Category indices
 CATEGORY_OTHER = 0
 CATEGORY_PYTHON = 1
 CATEGORY_NATIVE = 2
-CATEGORY_IDLE = 3
+CATEGORY_GC = 3
+CATEGORY_GIL = 4
+CATEGORY_CPU = 5
+CATEGORY_CODE_TYPE = 6
 
 # Subcategory indices
 DEFAULT_SUBCATEGORY = 0
@@ -58,6 +75,43 @@ def __init__(self, *, skip_idle=False):
         self.last_sample_time = 0
         self.interval = 1.0  # Will be calculated from actual sampling
 
+        # State tracking for interval markers (tid -> start_time)
+        self.has_gil_start = {}           # Thread has the GIL
+        self.no_gil_start = {}            # Thread doesn't have the GIL
+        self.on_cpu_start = {}            # Thread is running on CPU
+        self.off_cpu_start = {}           # Thread is off CPU
+        self.python_code_start = {}       # Thread running Python code (has GIL)
+        self.native_code_start = {}       # Thread running native code (on CPU without GIL)
+        self.gil_wait_start = {}          # Thread waiting for GIL
+
+        # GC event tracking: track if we're currently in a GC
+        self.potential_gc_start = None
+
+    def _track_state_transition(self, tid, condition, active_dict, inactive_dict,
+                                  active_name, inactive_name, category, current_time):
+        """Track binary state transitions and emit markers.
+
+        Args:
+            tid: Thread ID
+            condition: Whether the active state is true
+            active_dict: Dict tracking start time of active state
+            inactive_dict: Dict tracking start time of inactive state
+            active_name: Name for active state marker
+            inactive_name: Name for inactive state marker
+            category: Gecko category for the markers
+            current_time: Current timestamp
+        """
+        if condition:
+            active_dict.setdefault(tid, current_time)
+            if tid in inactive_dict:
+                self._add_marker(tid, inactive_name, inactive_dict.pop(tid),
+                               current_time, category)
+        else:
+            inactive_dict.setdefault(tid, current_time)
+            if tid in active_dict:
+                self._add_marker(tid, active_name, active_dict.pop(tid),
+                               current_time, category)
+
     def collect(self, stack_frames):
         """Collect a sample from stack frames."""
         current_time = (time.time() * 1000) - self.start_time
@@ -69,18 +123,16 @@ def collect(self, stack_frames):
             ) / self.sample_count
         self.last_sample_time = current_time
 
+        # GC Event Detection and process threads
+        gc_collecting = False
+
         for interpreter_info in stack_frames:
             for thread_info in interpreter_info.threads:
-                if (
-                    self.skip_idle
-                    and thread_info.status != THREAD_STATE_RUNNING
-                ):
-                    continue
+                # Track GC status
+                if thread_info.gc_collecting:
+                    gc_collecting = True
 
                 frames = thread_info.frame_info
-                if not frames:
-                    continue
-
                 tid = thread_info.thread_id
 
                 # Initialize thread if needed
@@ -89,6 +141,61 @@ def collect(self, stack_frames):
 
                 thread_data = self.threads[tid]
 
+                # Decode status flags
+                status_flags = thread_info.status
+                has_gil = bool(status_flags & THREAD_STATUS_HAS_GIL)
+                on_cpu = bool(status_flags & THREAD_STATUS_ON_CPU)
+                gil_requested = bool(status_flags & THREAD_STATUS_GIL_REQUESTED)
+
+                # Track GIL possession (Has GIL / No GIL)
+                self._track_state_transition(
+                    tid, has_gil, self.has_gil_start, self.no_gil_start,
+                    "Has GIL", "No GIL", CATEGORY_GIL, current_time
+                )
+
+                # Track CPU state (On CPU / Off CPU)
+                self._track_state_transition(
+                    tid, on_cpu, self.on_cpu_start, self.off_cpu_start,
+                    "On CPU", "Off CPU", CATEGORY_CPU, current_time
+                )
+
+                # Track code type (Python Code / Native Code)
+                if has_gil:
+                    self._track_state_transition(
+                        tid, True, self.python_code_start, self.native_code_start,
+                        "Python Code", "Native Code", CATEGORY_CODE_TYPE, current_time
+                    )
+                elif on_cpu:
+                    self._track_state_transition(
+                        tid, True, self.native_code_start, self.python_code_start,
+                        "Native Code", "Python Code", CATEGORY_CODE_TYPE, current_time
+                    )
+                else:
+                    # Neither has GIL nor on CPU - end both if running
+                    if tid in self.python_code_start:
+                        self._add_marker(tid, "Python Code", self.python_code_start.pop(tid),
+                                       current_time, CATEGORY_CODE_TYPE)
+                    if tid in self.native_code_start:
+                        self._add_marker(tid, "Native Code", self.native_code_start.pop(tid),
+                                       current_time, CATEGORY_CODE_TYPE)
+
+                # Track "Waiting for GIL" intervals (one-sided tracking)
+                if gil_requested:
+                    self.gil_wait_start.setdefault(tid, current_time)
+                elif tid in self.gil_wait_start:
+                    self._add_marker(tid, "Waiting for GIL", self.gil_wait_start.pop(tid),
+                                   current_time, CATEGORY_GIL)
+
+                # Categorize: idle if neither has GIL nor on CPU
+                is_idle = not has_gil and not on_cpu
+
+                # Skip idle threads if skip_idle is enabled
+                if self.skip_idle and is_idle:
+                    continue
+
+                if not frames:
+                    continue
+
                 # Process the stack
                 stack_index = self._process_stack(thread_data, frames)
 
@@ -98,11 +205,21 @@ def collect(self, stack_frames):
                 samples["time"].append(current_time)
                 samples["eventDelay"].append(None)
 
+        # Handle GC event markers after processing all threads
+        if gc_collecting:
+            if self.potential_gc_start is None:
+                # Start of GC
+                self.potential_gc_start = current_time
+        else:
+            # End of GC
+            if self.potential_gc_start is not None:
+                self._add_gc_marker(self.potential_gc_start, current_time)
+            self.potential_gc_start = None
+
         self.sample_count += 1
 
     def _create_thread(self, tid):
         """Create a new thread structure with processed profile format."""
-        import threading
 
         # Determine if this is the main thread
         try:
@@ -181,7 +298,7 @@ def _create_thread(self, tid):
                 "functionSize": [],
                 "length": 0,
             },
-            # Markers - processed format
+            # Markers - processed format (arrays)
             "markers": {
                 "data": [],
                 "name": [],
@@ -215,6 +332,36 @@ def _intern_string(self, s):
         self.global_string_map[s] = idx
         return idx
 
+    def _add_marker(self, tid, name, start_time, end_time, category):
+        """Add an interval marker for a specific thread."""
+        if tid not in self.threads:
+            return
+
+        thread_data = self.threads[tid]
+        duration = end_time - start_time
+
+        name_idx = self._intern_string(name)
+        markers = thread_data["markers"]
+        markers["name"].append(name_idx)
+        markers["startTime"].append(start_time)
+        markers["endTime"].append(end_time)
+        markers["phase"].append(1)  # 1 = interval marker
+        markers["category"].append(category)
+        markers["data"].append({
+            "type": name.replace(" ", ""),
+            "duration": duration,
+            "tid": tid
+        })
+
+    def _add_gc_marker(self, start_time, end_time):
+        """Add a GC Collecting event marker to the main thread (or first thread we see)."""
+        if not self.threads:
+            return
+
+        # Add GC marker to the first thread (typically the main thread)
+        first_tid = next(iter(self.threads))
+        self._add_marker(first_tid, "GC Collecting", start_time, end_time, CATEGORY_GC)
+
     def _process_stack(self, thread_data, frames):
         """Process a stack and return the stack index."""
         if not frames:
@@ -383,15 +530,67 @@ def _get_or_create_frame(self, thread_data, func_idx, lineno):
         frame_cache[frame_key] = frame_idx
         return frame_idx
 
+    def _finalize_markers(self):
+        """Close any open markers at the end of profiling."""
+        end_time = self.last_sample_time
+
+        # Close all open markers for each thread using a generic approach
+        marker_states = [
+            (self.has_gil_start, "Has GIL", CATEGORY_GIL),
+            (self.no_gil_start, "No GIL", CATEGORY_GIL),
+            (self.on_cpu_start, "On CPU", CATEGORY_CPU),
+            (self.off_cpu_start, "Off CPU", CATEGORY_CPU),
+            (self.python_code_start, "Python Code", CATEGORY_CODE_TYPE),
+            (self.native_code_start, "Native Code", CATEGORY_CODE_TYPE),
+            (self.gil_wait_start, "Waiting for GIL", CATEGORY_GIL),
+        ]
+
+        for state_dict, marker_name, category in marker_states:
+            for tid in list(state_dict.keys()):
+                self._add_marker(tid, marker_name, state_dict[tid], end_time, category)
+                del state_dict[tid]
+
+        # Close any open GC marker
+        if self.potential_gc_start is not None:
+            self._add_gc_marker(self.potential_gc_start, end_time)
+            self.potential_gc_start = None
+
     def export(self, filename):
         """Export the profile to a Gecko JSON file."""
+
         if self.sample_count > 0 and self.last_sample_time > 0:
             self.interval = self.last_sample_time / self.sample_count
 
-        profile = self._build_profile()
+        # Spinner for progress indication
+        spinner = itertools.cycle(['⠋', '⠙', '⠹', '⠸', '⠼', '⠴', '⠦', '⠧', '⠇', '⠏'])
+        stop_spinner = threading.Event()
+
+        def spin():
+            message = 'Building Gecko profile...'
+            while not stop_spinner.is_set():
+                sys.stderr.write(f'\r{next(spinner)} {message}')
+                sys.stderr.flush()
+                time.sleep(0.1)
+            # Clear the spinner line
+            sys.stderr.write('\r' + ' ' * (len(message) + 3) + '\r')
+            sys.stderr.flush()
+
+        spinner_thread = threading.Thread(target=spin, daemon=True)
+        spinner_thread.start()
+
+        try:
+            # Finalize any open markers before building profile
+            self._finalize_markers()
+
+            profile = self._build_profile()
 
-        with open(filename, "w") as f:
-            json.dump(profile, f, separators=(",", ":"))
+            with open(filename, "w") as f:
+                json.dump(profile, f, separators=(",", ":"))
+        finally:
+            stop_spinner.set()
+            spinner_thread.join(timeout=1.0)
+            # Small delay to ensure the clear happens
+            time.sleep(0.01)
 
         print(f"Gecko profile written to {filename}")
         print(
@@ -416,6 +615,7 @@ def _build_profile(self):
             frame_table["length"] = len(frame_table["func"])
             func_table["length"] = len(func_table["name"])
             resource_table["length"] = len(resource_table["name"])
+            thread_data["markers"]["length"] = len(thread_data["markers"]["name"])
 
             # Clean up internal caches
             del thread_data["_stackCache"]
diff --git a/Lib/profiling/sampling/sample.py b/Lib/profiling/sampling/sample.py
index e0d4583f0a1aec..5cea5ef184eaff 100644
--- a/Lib/profiling/sampling/sample.py
+++ b/Lib/profiling/sampling/sample.py
@@ -21,6 +21,7 @@
 PROFILING_MODE_WALL = 0
 PROFILING_MODE_CPU = 1
 PROFILING_MODE_GIL = 2
+PROFILING_MODE_ALL = 3  # Combines GIL + CPU checks
 
 
 def _parse_mode(mode_string):
@@ -136,18 +137,20 @@ def _run_with_sync(original_cmd):
 
 
 class SampleProfiler:
-    def __init__(self, pid, sample_interval_usec, all_threads, *, mode=PROFILING_MODE_WALL):
+    def __init__(self, pid, sample_interval_usec, all_threads, *, mode=PROFILING_MODE_WALL, skip_non_matching_threads=True):
         self.pid = pid
         self.sample_interval_usec = sample_interval_usec
         self.all_threads = all_threads
         if _FREE_THREADED_BUILD:
             self.unwinder = _remote_debugging.RemoteUnwinder(
-                self.pid, all_threads=self.all_threads, mode=mode
+                self.pid, all_threads=self.all_threads, mode=mode,
+                skip_non_matching_threads=skip_non_matching_threads
             )
         else:
             only_active_threads = bool(self.all_threads)
             self.unwinder = _remote_debugging.RemoteUnwinder(
-                self.pid, only_active_thread=only_active_threads, mode=mode
+                self.pid, only_active_thread=only_active_threads, mode=mode,
+                skip_non_matching_threads=skip_non_matching_threads
             )
         # Track sample intervals and total sample count
         self.sample_intervals = deque(maxlen=100)
@@ -614,14 +617,21 @@ def sample(
     realtime_stats=False,
     mode=PROFILING_MODE_WALL,
 ):
+    # PROFILING_MODE_ALL implies no skipping at all
+    if mode == PROFILING_MODE_ALL:
+        skip_non_matching_threads = False
+        skip_idle = False
+    else:
+        # Determine skip settings based on output format and mode
+        skip_non_matching_threads = output_format != "gecko"
+        skip_idle = mode != PROFILING_MODE_WALL
+
     profiler = SampleProfiler(
-        pid, sample_interval_usec, all_threads=all_threads, mode=mode
+        pid, sample_interval_usec, all_threads=all_threads, mode=mode,
+        skip_non_matching_threads=skip_non_matching_threads
     )
     profiler.realtime_stats = realtime_stats
 
-    # Determine skip_idle for collector compatibility
-    skip_idle = mode != PROFILING_MODE_WALL
-
     collector = None
     match output_format:
         case "pstats":
@@ -633,7 +643,8 @@ def sample(
             collector = FlamegraphCollector(skip_idle=skip_idle)
             filename = filename or f"flamegraph.{pid}.html"
         case "gecko":
-            collector = GeckoCollector(skip_idle=skip_idle)
+            # Gecko format never skips idle threads to show full thread states
+            collector = GeckoCollector(skip_idle=False)
             filename = filename or f"gecko.{pid}.json"
         case _:
             raise ValueError(f"Invalid output format: {output_format}")
@@ -877,6 +888,10 @@ def main():
     if args.format in ("collapsed", "gecko"):
         _validate_collapsed_format_args(args, parser)
 
+    # Validate that --mode is not used with --gecko
+    if args.format == "gecko" and args.mode != "wall":
+        parser.error("--mode option is incompatible with --gecko format. Gecko format automatically uses ALL mode (GIL + CPU analysis).")
+
     sort_value = args.sort if args.sort is not None else 2
 
     if args.module is not None and not args.module:
@@ -895,7 +910,11 @@ def main():
     elif target_count > 1:
         parser.error("only one target type can be specified: -p/--pid, -m/--module, or script")
 
-    mode = _parse_mode(args.mode)
+    # Use PROFILING_MODE_ALL for gecko format, otherwise parse user's choice
+    if args.format == "gecko":
+        mode = PROFILING_MODE_ALL
+    else:
+        mode = _parse_mode(args.mode)
 
     if args.pid:
         sample(
diff --git a/Lib/test/test_external_inspection.py b/Lib/test/test_external_inspection.py
index 01720457e61f5c..60e5000cd72a32 100644
--- a/Lib/test/test_external_inspection.py
+++ b/Lib/test/test_external_inspection.py
@@ -23,6 +23,12 @@
 PROFILING_MODE_WALL = 0
 PROFILING_MODE_CPU = 1
 PROFILING_MODE_GIL = 2
+PROFILING_MODE_ALL = 3
+
+# Thread status flags
+THREAD_STATUS_HAS_GIL = (1 << 0)
+THREAD_STATUS_ON_CPU = (1 << 1)
+THREAD_STATUS_UNKNOWN = (1 << 2)
 
 try:
     from concurrent import interpreters
@@ -1763,11 +1769,14 @@ def busy():
                             for thread_info in interpreter_info.threads:
                                 statuses[thread_info.thread_id] = thread_info.status
 
-                        # Check if sleeper thread is idle and busy thread is running
+                        # Check if sleeper thread is off CPU and busy thread is on CPU
+                        # In the new flags system:
+                        # - sleeper should NOT have ON_CPU flag (off CPU)
+                        # - busy should have ON_CPU flag
                         if (sleeper_tid in statuses and
                             busy_tid in statuses and
-                            statuses[sleeper_tid] == 1 and
-                            statuses[busy_tid] == 0):
+                            not (statuses[sleeper_tid] & THREAD_STATUS_ON_CPU) and
+                            (statuses[busy_tid] & THREAD_STATUS_ON_CPU)):
                             break
                         time.sleep(0.5)  # Give a bit of time to let threads settle
                 except PermissionError:
@@ -1779,8 +1788,8 @@ def busy():
                 self.assertIsNotNone(busy_tid, "Busy thread id not received")
                 self.assertIn(sleeper_tid, statuses, "Sleeper tid not found in sampled threads")
                 self.assertIn(busy_tid, statuses, "Busy tid not found in sampled threads")
-                self.assertEqual(statuses[sleeper_tid], 1, "Sleeper thread should be idle (1)")
-                self.assertEqual(statuses[busy_tid], 0, "Busy thread should be running (0)")
+                self.assertFalse(statuses[sleeper_tid] & THREAD_STATUS_ON_CPU, "Sleeper thread should be off CPU")
+                self.assertTrue(statuses[busy_tid] & THREAD_STATUS_ON_CPU, "Busy thread should be on CPU")
 
             finally:
                 if client_socket is not None:
@@ -1875,11 +1884,14 @@ def busy():
                             for thread_info in interpreter_info.threads:
                                 statuses[thread_info.thread_id] = thread_info.status
 
-                        # Check if sleeper thread is idle (status 2 for GIL mode) and busy thread is running
+                        # Check if sleeper thread doesn't have GIL and busy thread has GIL
+                        # In the new flags system:
+                        # - sleeper should NOT have HAS_GIL flag (waiting for GIL)
+                        # - busy should have HAS_GIL flag
                         if (sleeper_tid in statuses and
                             busy_tid in statuses and
-                            statuses[sleeper_tid] == 2 and
-                            statuses[busy_tid] == 0):
+                            not (statuses[sleeper_tid] & THREAD_STATUS_HAS_GIL) and
+                            (statuses[busy_tid] & THREAD_STATUS_HAS_GIL)):
                             break
                         time.sleep(0.5)  # Give a bit of time to let threads settle
                 except PermissionError:
@@ -1891,8 +1903,8 @@ def busy():
                 self.assertIsNotNone(busy_tid, "Busy thread id not received")
                 self.assertIn(sleeper_tid, statuses, "Sleeper tid not found in sampled threads")
                 self.assertIn(busy_tid, statuses, "Busy tid not found in sampled threads")
-                self.assertEqual(statuses[sleeper_tid], 2, "Sleeper thread should be idle (1)")
-                self.assertEqual(statuses[busy_tid], 0, "Busy thread should be running (0)")
+                self.assertFalse(statuses[sleeper_tid] & THREAD_STATUS_HAS_GIL, "Sleeper thread should not have GIL")
+                self.assertTrue(statuses[busy_tid] & THREAD_STATUS_HAS_GIL, "Busy thread should have GIL")
 
             finally:
                 if client_socket is not None:
@@ -1900,6 +1912,128 @@ def busy():
                 p.terminate()
                 p.wait(timeout=SHORT_TIMEOUT)
 
+    @unittest.skipIf(
+        sys.platform not in ("linux", "darwin", "win32"),
+        "Test only runs on supported platforms (Linux, macOS, or Windows)",
+    )
+    @unittest.skipIf(sys.platform == "android", "Android raises Linux-specific exception")
+    def test_thread_status_all_mode_detection(self):
+        port = find_unused_port()
+        script = textwrap.dedent(
+            f"""\
+            import socket
+            import threading
+            import time
+            import sys
+
+            def sleeper_thread():
+                conn = socket.create_connection(("localhost", {port}))
+                conn.sendall(b"sleeper:" + str(threading.get_native_id()).encode())
+                while True:
+                    time.sleep(1)
+
+            def busy_thread():
+                conn = socket.create_connection(("localhost", {port}))
+                conn.sendall(b"busy:" + str(threading.get_native_id()).encode())
+                while True:
+                    sum(range(100000))
+
+            t1 = threading.Thread(target=sleeper_thread)
+            t2 = threading.Thread(target=busy_thread)
+            t1.start()
+            t2.start()
+            t1.join()
+            t2.join()
+            """
+        )
+
+        with os_helper.temp_dir() as tmp_dir:
+            script_file = make_script(tmp_dir, "script", script)
+            server_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+            server_socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
+            server_socket.bind(("localhost", port))
+            server_socket.listen(2)
+            server_socket.settimeout(SHORT_TIMEOUT)
+
+            p = subprocess.Popen(
+                [sys.executable, script_file],
+                stdout=subprocess.PIPE,
+                stderr=subprocess.PIPE,
+            )
+
+            client_sockets = []
+            try:
+                sleeper_tid = None
+                busy_tid = None
+
+                # Receive thread IDs from the child process
+                for _ in range(2):
+                    client_socket, _ = server_socket.accept()
+                    client_sockets.append(client_socket)
+                    line = client_socket.recv(1024)
+                    if line:
+                        if line.startswith(b"sleeper:"):
+                            try:
+                                sleeper_tid = int(line.split(b":")[-1])
+                            except Exception:
+                                pass
+                        elif line.startswith(b"busy:"):
+                            try:
+                                busy_tid = int(line.split(b":")[-1])
+                            except Exception:
+                                pass
+
+                server_socket.close()
+
+                attempts = 10
+                statuses = {}
+                try:
+                    unwinder = RemoteUnwinder(p.pid, all_threads=True, mode=PROFILING_MODE_ALL,
+                                                skip_non_matching_threads=False)
+                    for _ in range(attempts):
+                        traces = unwinder.get_stack_trace()
+                        # Find threads and their statuses
+                        statuses = {}
+                        for interpreter_info in traces:
+                            for thread_info in interpreter_info.threads:
+                                statuses[thread_info.thread_id] = thread_info.status
+
+                        # Check ALL mode provides both GIL and CPU info
+                        # - sleeper should NOT have ON_CPU and NOT have HAS_GIL
+                        # - busy should have ON_CPU and have HAS_GIL
+                        if (sleeper_tid in statuses and
+                            busy_tid in statuses and
+                            not (statuses[sleeper_tid] & THREAD_STATUS_ON_CPU) and
+                            not (statuses[sleeper_tid] & THREAD_STATUS_HAS_GIL) and
+                            (statuses[busy_tid] & THREAD_STATUS_ON_CPU) and
+                            (statuses[busy_tid] & THREAD_STATUS_HAS_GIL)):
+                            break
+                        time.sleep(0.5)
+                except PermissionError:
+                    self.skipTest(
+                        "Insufficient permissions to read the stack trace"
+                    )
+
+                self.assertIsNotNone(sleeper_tid, "Sleeper thread id not received")
+                self.assertIsNotNone(busy_tid, "Busy thread id not received")
+                self.assertIn(sleeper_tid, statuses, "Sleeper tid not found in sampled threads")
+                self.assertIn(busy_tid, statuses, "Busy tid not found in sampled threads")
+
+                # Sleeper thread: off CPU, no GIL
+                self.assertFalse(statuses[sleeper_tid] & THREAD_STATUS_ON_CPU, "Sleeper should be off CPU")
+                self.assertFalse(statuses[sleeper_tid] & THREAD_STATUS_HAS_GIL, "Sleeper should not have GIL")
+
+                # Busy thread: on CPU, has GIL
+                self.assertTrue(statuses[busy_tid] & THREAD_STATUS_ON_CPU, "Busy should be on CPU")
+                self.assertTrue(statuses[busy_tid] & THREAD_STATUS_HAS_GIL, "Busy should have GIL")
+
+            finally:
+                for client_socket in client_sockets:
+                    client_socket.close()
+                p.terminate()
+                p.wait(timeout=SHORT_TIMEOUT)
+                p.stdout.close()
+                p.stderr.close()
 
 
 if __name__ == "__main__":
diff --git a/Lib/test/test_profiling/test_sampling_profiler.py b/Lib/test/test_profiling/test_sampling_profiler.py
index a1342cafff1f83..00694c551ed06d 100644
--- a/Lib/test/test_profiling/test_sampling_profiler.py
+++ b/Lib/test/test_profiling/test_sampling_profiler.py
@@ -56,12 +56,14 @@ def __repr__(self):
 class MockThreadInfo:
     """Mock ThreadInfo for testing since the real one isn't accessible."""
 
-    def __init__(self, thread_id, frame_info):
+    def __init__(self, thread_id, frame_info, status=0, gc_collecting=False):  # Default to THREAD_STATE_RUNNING (0)
         self.thread_id = thread_id
         self.frame_info = frame_info
+        self.status = status
+        self.gc_collecting = gc_collecting
 
     def __repr__(self):
-        return f"MockThreadInfo(thread_id={self.thread_id}, frame_info={self.frame_info})"
+        return f"MockThreadInfo(thread_id={self.thread_id}, frame_info={self.frame_info}, status={self.status}, gc_collecting={self.gc_collecting})"
 
 
 class MockInterpreterInfo:
@@ -665,6 +667,97 @@ def test_gecko_collector_export(self):
         self.assertIn("func2", string_array)
         self.assertIn("other_func", string_array)
 
+    def test_gecko_collector_markers(self):
+        """Test Gecko profile markers for GIL and CPU state tracking."""
+        try:
+            from _remote_debugging import THREAD_STATUS_HAS_GIL, THREAD_STATUS_ON_CPU, THREAD_STATUS_GIL_REQUESTED
+        except ImportError:
+            THREAD_STATUS_HAS_GIL = (1 << 0)
+            THREAD_STATUS_ON_CPU = (1 << 1)
+            THREAD_STATUS_GIL_REQUESTED = (1 << 3)
+
+        collector = GeckoCollector()
+
+        # Status combinations for different thread states
+        HAS_GIL_ON_CPU = THREAD_STATUS_HAS_GIL | THREAD_STATUS_ON_CPU  # Running Python code
+        NO_GIL_ON_CPU = THREAD_STATUS_ON_CPU  # Running native code
+        WAITING_FOR_GIL = THREAD_STATUS_GIL_REQUESTED  # Waiting for GIL
+
+        # Simulate thread state transitions
+        collector.collect([
+            MockInterpreterInfo(0, [
+                MockThreadInfo(1, [("test.py", 10, "python_func")], status=HAS_GIL_ON_CPU)
+            ])
+        ])
+
+        collector.collect([
+            MockInterpreterInfo(0, [
+                MockThreadInfo(1, [("test.py", 15, "wait_func")], status=WAITING_FOR_GIL)
+            ])
+        ])
+
+        collector.collect([
+            MockInterpreterInfo(0, [
+                MockThreadInfo(1, [("test.py", 20, "python_func2")], status=HAS_GIL_ON_CPU)
+            ])
+        ])
+
+        collector.collect([
+            MockInterpreterInfo(0, [
+                MockThreadInfo(1, [("native.c", 100, "native_func")], status=NO_GIL_ON_CPU)
+            ])
+        ])
+
+        profile_data = collector._build_profile()
+
+        # Verify we have threads with markers
+        self.assertIn("threads", profile_data)
+        self.assertEqual(len(profile_data["threads"]), 1)
+        thread_data = profile_data["threads"][0]
+
+        # Check markers exist
+        self.assertIn("markers", thread_data)
+        markers = thread_data["markers"]
+
+        # Should have marker arrays
+        self.assertIn("name", markers)
+        self.assertIn("startTime", markers)
+        self.assertIn("endTime", markers)
+        self.assertIn("category", markers)
+        self.assertGreater(markers["length"], 0, "Should have generated markers")
+
+        # Get marker names from string table
+        string_array = profile_data["shared"]["stringArray"]
+        marker_names = [string_array[idx] for idx in markers["name"]]
+
+        # Verify we have different marker types
+        marker_name_set = set(marker_names)
+
+        # Should have "Has GIL" markers (when thread had GIL)
+        self.assertIn("Has GIL", marker_name_set, "Should have 'Has GIL' markers")
+
+        # Should have "No GIL" markers (when thread didn't have GIL)
+        self.assertIn("No GIL", marker_name_set, "Should have 'No GIL' markers")
+
+        # Should have "On CPU" markers (when thread was on CPU)
+        self.assertIn("On CPU", marker_name_set, "Should have 'On CPU' markers")
+
+        # Should have "Waiting for GIL" markers (when thread was waiting)
+        self.assertIn("Waiting for GIL", marker_name_set, "Should have 'Waiting for GIL' markers")
+
+        # Verify marker structure
+        for i in range(markers["length"]):
+            # All markers should be interval markers (phase = 1)
+            self.assertEqual(markers["phase"][i], 1, f"Marker {i} should be interval marker")
+
+            # All markers should have valid time range
+            start_time = markers["startTime"][i]
+            end_time = markers["endTime"][i]
+            self.assertLessEqual(start_time, end_time, f"Marker {i} should have valid time range")
+
+            # All markers should have valid category
+            self.assertGreaterEqual(markers["category"][i], 0, f"Marker {i} should have valid category")
+
     def test_pstats_collector_export(self):
         collector = PstatsCollector(
             sample_interval_usec=1000000
@@ -2598,19 +2691,30 @@ def test_mode_validation(self):
 
     def test_frames_filtered_with_skip_idle(self):
         """Test that frames are actually filtered when skip_idle=True."""
+        # Import thread status flags
+        try:
+            from _remote_debugging import THREAD_STATUS_HAS_GIL, THREAD_STATUS_ON_CPU
+        except ImportError:
+            THREAD_STATUS_HAS_GIL = (1 << 0)
+            THREAD_STATUS_ON_CPU = (1 << 1)
+
         # Create mock frames with different thread statuses
         class MockThreadInfoWithStatus:
             def __init__(self, thread_id, frame_info, status):
                 self.thread_id = thread_id
                 self.frame_info = frame_info
                 self.status = status
+                self.gc_collecting = False
+
+        # Create test data: active thread (HAS_GIL | ON_CPU), idle thread (neither), and another active thread
+        ACTIVE_STATUS = THREAD_STATUS_HAS_GIL | THREAD_STATUS_ON_CPU  # Has GIL and on CPU
+        IDLE_STATUS = 0  # Neither has GIL nor on CPU
 
-        # Create test data: running thread, idle thread, and another running thread
         test_frames = [
             MockInterpreterInfo(0, [
-                MockThreadInfoWithStatus(1, [MockFrameInfo("active1.py", 10, "active_func1")], 0),  # RUNNING
-                MockThreadInfoWithStatus(2, [MockFrameInfo("idle.py", 20, "idle_func")], 1),        # IDLE
-                MockThreadInfoWithStatus(3, [MockFrameInfo("active2.py", 30, "active_func2")], 0),  # RUNNING
+                MockThreadInfoWithStatus(1, [MockFrameInfo("active1.py", 10, "active_func1")], ACTIVE_STATUS),
+                MockThreadInfoWithStatus(2, [MockFrameInfo("idle.py", 20, "idle_func")], IDLE_STATUS),
+                MockThreadInfoWithStatus(3, [MockFrameInfo("active2.py", 30, "active_func2")], ACTIVE_STATUS),
             ])
         ]
 
diff --git a/Modules/_remote_debugging_module.c b/Modules/_remote_debugging_module.c
index 5937d4892f550f..8b2478ca9d048c 100644
--- a/Modules/_remote_debugging_module.c
+++ b/Modules/_remote_debugging_module.c
@@ -11,6 +11,7 @@
  * HEADERS AND INCLUDES
  * ============================================================================ */
 
+#include <assert.h>
 #include <errno.h>
 #include <fcntl.h>
 #include <stddef.h>
@@ -81,6 +82,8 @@ typedef enum _WIN32_THREADSTATE {
 #define SIZEOF_TYPE_OBJ sizeof(PyTypeObject)
 #define SIZEOF_UNICODE_OBJ sizeof(PyUnicodeObject)
 #define SIZEOF_LONG_OBJ sizeof(PyLongObject)
+#define SIZEOF_GC_RUNTIME_STATE sizeof(struct _gc_runtime_state)
+#define SIZEOF_INTERPRETER_STATE sizeof(PyInterpreterState)
 
 // Calculate the minimum buffer size needed to read interpreter state fields
 // We need to read code_object_generation and potentially tlbc_generation
@@ -178,8 +181,9 @@ static PyStructSequence_Desc CoroInfo_desc = {
 // ThreadInfo structseq type - replaces 2-tuple (thread_id, frame_info)
 static PyStructSequence_Field ThreadInfo_fields[] = {
     {"thread_id", "Thread ID"},
-    {"status", "Thread status"},
+    {"status", "Thread status (flags: HAS_GIL, ON_CPU, UNKNOWN or legacy enum)"},
     {"frame_info", "Frame information"},
+    {"gc_collecting", "Whether GC is collecting (interpreter-level)"},
     {NULL}
 };
 
@@ -187,7 +191,7 @@ static PyStructSequence_Desc ThreadInfo_desc = {
     "_remote_debugging.ThreadInfo",
     "Information about a thread",
     ThreadInfo_fields,
-    2
+    3
 };
 
 // InterpreterInfo structseq type - replaces 2-tuple (interpreter_id, thread_list)
@@ -247,9 +251,16 @@ enum _ThreadState {
 enum _ProfilingMode {
     PROFILING_MODE_WALL = 0,
     PROFILING_MODE_CPU = 1,
-    PROFILING_MODE_GIL = 2
+    PROFILING_MODE_GIL = 2,
+    PROFILING_MODE_ALL = 3  // Combines GIL + CPU checks
 };
 
+// Thread status flags (can be combined)
+#define THREAD_STATUS_HAS_GIL        (1 << 0)  // Thread has the GIL
+#define THREAD_STATUS_ON_CPU         (1 << 1)  // Thread is running on CPU
+#define THREAD_STATUS_UNKNOWN        (1 << 2)  // Status could not be determined
+#define THREAD_STATUS_GIL_REQUESTED  (1 << 3)  // Thread is waiting for the GIL
+
 typedef struct {
     PyObject_HEAD
     proc_handle_t handle;
@@ -2650,28 +2661,74 @@ unwind_stack_for_thread(
 
     long tid = GET_MEMBER(long, ts, unwinder->debug_offsets.thread_state.native_thread_id);
 
-    // Calculate thread status based on mode
-    int status = THREAD_STATE_UNKNOWN;
-    if (unwinder->mode == PROFILING_MODE_CPU) {
-        long pthread_id = GET_MEMBER(long, ts, unwinder->debug_offsets.thread_state.thread_id);
-        status = get_thread_status(unwinder, tid, pthread_id);
-        if (status == -1) {
-            PyErr_Print();
-            PyErr_SetString(PyExc_RuntimeError, "Failed to get thread status");
-            goto error;
-        }
-    } else if (unwinder->mode == PROFILING_MODE_GIL) {
+    // Read GC collecting state from the interpreter (before any skip checks)
+    uintptr_t interp_addr = GET_MEMBER(uintptr_t, ts, unwinder->debug_offsets.thread_state.interp);
+
+    // Read the GC runtime state from the interpreter state
+    uintptr_t gc_addr = interp_addr + unwinder->debug_offsets.interpreter_state.gc;
+    char gc_state[SIZEOF_GC_RUNTIME_STATE];
+    if (_Py_RemoteDebug_PagedReadRemoteMemory(&unwinder->handle, gc_addr, unwinder->debug_offsets.gc.size, gc_state) < 0) {
+        set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to read GC state");
+        goto error;
+    }
+
+    int gc_collecting = GET_MEMBER(int, gc_state, unwinder->debug_offsets.gc.collecting);
+
+    // Calculate thread status using flags (always)
+    int status_flags = 0;
+
+    // Check GIL status
+    int has_gil = 0;
+    int gil_requested = 0;
 #ifdef Py_GIL_DISABLED
-        // All threads are considered running in free threading builds if they have a thread state attached
-        int active = GET_MEMBER(_thread_status, ts, unwinder->debug_offsets.thread_state.status).active;
-        status = active ? THREAD_STATE_RUNNING : THREAD_STATE_GIL_WAIT;
+    int active = GET_MEMBER(_thread_status, ts, unwinder->debug_offsets.thread_state.status).active;
+    has_gil = active;
 #else
-        status = (*current_tstate == gil_holder_tstate) ? THREAD_STATE_RUNNING : THREAD_STATE_GIL_WAIT;
+    // Read holds_gil directly from thread state
+    has_gil = GET_MEMBER(int, ts, unwinder->debug_offsets.thread_state.holds_gil);
+
+    // Check if thread is actively requesting the GIL
+    if (unwinder->debug_offsets.thread_state.gil_requested != 0) {
+        gil_requested = GET_MEMBER(int, ts, unwinder->debug_offsets.thread_state.gil_requested);
+    }
+
+    // Set GIL_REQUESTED flag if thread is waiting
+    if (!has_gil && gil_requested) {
+        status_flags |= THREAD_STATUS_GIL_REQUESTED;
+    }
 #endif
-    } else {
-        // PROFILING_MODE_WALL - all threads are considered running
-        status = THREAD_STATE_RUNNING;
+    if (has_gil) {
+        status_flags |= THREAD_STATUS_HAS_GIL;
+    }
+
+    // Assert that we never have both HAS_GIL and GIL_REQUESTED set at the same time
+    // This would indicate a race condition in the GIL state tracking
+    assert(!(has_gil && gil_requested));
+
+    // Check CPU status
+    long pthread_id = GET_MEMBER(long, ts, unwinder->debug_offsets.thread_state.thread_id);
+    int cpu_status = get_thread_status(unwinder, tid, pthread_id);
+    if (cpu_status == -1) {
+        status_flags |= THREAD_STATUS_UNKNOWN;
+    } else if (cpu_status == THREAD_STATE_RUNNING) {
+        status_flags |= THREAD_STATUS_ON_CPU;
+    }
+
+    // Determine if we should skip based on mode
+    int status = THREAD_STATE_RUNNING;  // Default for skip logic
+    if (unwinder->mode == PROFILING_MODE_CPU) {
+        status = cpu_status;
+    } else if (unwinder->mode == PROFILING_MODE_GIL) {
+        status = has_gil ? THREAD_STATE_RUNNING : THREAD_STATE_GIL_WAIT;
+    } else if (unwinder->mode == PROFILING_MODE_ALL) {
+        // In ALL mode, considered running if has GIL or on CPU
+        if (has_gil || (status_flags & THREAD_STATUS_ON_CPU)) {
+            status = THREAD_STATE_RUNNING;
+        } else {
+            status = THREAD_STATE_IDLE;
+        }
     }
+    // PROFILING_MODE_WALL defaults to THREAD_STATE_RUNNING
 
     // Check if we should skip this thread based on mode
     int should_skip = 0;
@@ -2719,16 +2776,25 @@ unwind_stack_for_thread(
         goto error;
     }
 
-    PyObject *py_status = PyLong_FromLong(status);
+    // Always use status_flags
+    PyObject *py_status = PyLong_FromLong(status_flags);
     if (py_status == NULL) {
         set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to create thread status");
         goto error;
     }
-    PyErr_Print();
 
+    PyObject *py_gc_collecting = PyBool_FromLong(gc_collecting);
+    if (py_gc_collecting == NULL) {
+        set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to create gc_collecting");
+        Py_DECREF(py_status);
+        goto error;
+    }
+
+    // py_status contains status flags (bitfield)
     PyStructSequence_SetItem(result, 0, thread_id);
     PyStructSequence_SetItem(result, 1, py_status);  // Steals reference
     PyStructSequence_SetItem(result, 2, frame_info); // Steals reference
+    PyStructSequence_SetItem(result, 3, py_gc_collecting); // Steals reference
 
     cleanup_stack_chunks(&chunks);
     return result;
@@ -3401,6 +3467,21 @@ _remote_debugging_exec(PyObject *m)
     if (rc < 0) {
         return -1;
     }
+
+    // Add thread status flag constants
+    if (PyModule_AddIntConstant(m, "THREAD_STATUS_HAS_GIL", THREAD_STATUS_HAS_GIL) < 0) {
+        return -1;
+    }
+    if (PyModule_AddIntConstant(m, "THREAD_STATUS_ON_CPU", THREAD_STATUS_ON_CPU) < 0) {
+        return -1;
+    }
+    if (PyModule_AddIntConstant(m, "THREAD_STATUS_UNKNOWN", THREAD_STATUS_UNKNOWN) < 0) {
+        return -1;
+    }
+    if (PyModule_AddIntConstant(m, "THREAD_STATUS_GIL_REQUESTED", THREAD_STATUS_GIL_REQUESTED) < 0) {
+        return -1;
+    }
+
     if (RemoteDebugging_InitState(st) < 0) {
         return -1;
     }
diff --git a/Python/ceval_gil.c b/Python/ceval_gil.c
index 6bf64868cbb2d3..ff429c797a4774 100644
--- a/Python/ceval_gil.c
+++ b/Python/ceval_gil.c
@@ -207,6 +207,7 @@ drop_gil_impl(PyThreadState *tstate, struct _gil_runtime_state *gil)
     _Py_atomic_store_int_relaxed(&gil->locked, 0);
     if (tstate != NULL) {
         tstate->holds_gil = 0;
+        tstate->gil_requested = 0;
     }
     COND_SIGNAL(gil->cond);
     MUTEX_UNLOCK(gil->mutex);
@@ -320,6 +321,8 @@ take_gil(PyThreadState *tstate)
 
     MUTEX_LOCK(gil->mutex);
 
+    tstate->gil_requested = 1;
+
     int drop_requested = 0;
     while (_Py_atomic_load_int_relaxed(&gil->locked)) {
         unsigned long saved_switchnum = gil->switch_number;
@@ -407,6 +410,7 @@ take_gil(PyThreadState *tstate)
     }
     assert(_PyThreadState_CheckConsistency(tstate));
 
+    tstate->gil_requested = 0;
     tstate->holds_gil = 1;
     _Py_unset_eval_breaker_bit(tstate, _PY_GIL_DROP_REQUEST_BIT);
     update_eval_breaker_for_thread(interp, tstate);