diff --git a/api/docs/release.dox b/api/docs/release.dox
index 56760ee2b5..237203d88c 100644
--- a/api/docs/release.dox
+++ b/api/docs/release.dox
@@ -141,6 +141,11 @@ Further non-compatibility-affecting changes include:
    instructions in the trace. This works for traces that have embedded instruction
    encodings in them, and also for legacy traces without embedded encodings where the
    encodings are obtained from the application binaries instead.
+ - Added a new drmemtrace analyzer flag -sched_syscall_file to allow specifying the
+   system call trace template file to be used for dynamic injection of system call trace
+   templates. Added similar options for the drmemtrace scheduler: #dynamorio::drmemtrace::
+   scheduler_tmpl_t::scheduler_options_t::kernel_syscall_trace_path, and #dynamorio::
+   drmemtrace::scheduler_tmpl_t::scheduler_options_t::kernel_syscall_reader.
 
 **************************************************
 <hr>
diff --git a/clients/drcachesim/analyzer.cpp b/clients/drcachesim/analyzer.cpp
index 2583422ac1..b6cf14a8c8 100644
--- a/clients/drcachesim/analyzer.cpp
+++ b/clients/drcachesim/analyzer.cpp
@@ -320,19 +320,22 @@ analyzer_tmpl_t<RecordType, ReaderType>::init_scheduler_common(
             sched_ops.single_lockstep_output = true;
             worker_count_ = 1;
         }
-    } else if (parallel_) {
-        sched_ops = sched_type_t::make_scheduler_parallel_options(verbosity_);
-        sched_ops.replay_as_traced_istream = options.replay_as_traced_istream;
-        sched_ops.read_inputs_in_init = options.read_inputs_in_init;
-        if (worker_count_ <= 0)
-            worker_count_ = std::thread::hardware_concurrency();
-        output_count = worker_count_;
     } else {
-        sched_ops = sched_type_t::make_scheduler_serial_options(verbosity_);
+        if (parallel_) {
+            sched_ops = sched_type_t::make_scheduler_parallel_options(verbosity_);
+            if (worker_count_ <= 0)
+                worker_count_ = std::thread::hardware_concurrency();
+            output_count = worker_count_;
+        } else {
+            sched_ops = sched_type_t::make_scheduler_serial_options(verbosity_);
+            worker_count_ = 1;
+            output_count = 1;
+        }
+        // As noted in the init_scheduler_common() header comment, we preserve only
+        // some select fields.
         sched_ops.replay_as_traced_istream = options.replay_as_traced_istream;
         sched_ops.read_inputs_in_init = options.read_inputs_in_init;
-        worker_count_ = 1;
-        output_count = 1;
+        sched_ops.kernel_syscall_trace_path = options.kernel_syscall_trace_path;
     }
     sched_mapping_ = options.mapping;
     if (scheduler_.init(workloads, output_count, std::move(sched_ops)) !=
diff --git a/clients/drcachesim/analyzer.h b/clients/drcachesim/analyzer.h
index 2e6c92c048..c3440d39eb 100644
--- a/clients/drcachesim/analyzer.h
+++ b/clients/drcachesim/analyzer.h
@@ -217,6 +217,7 @@ template <typename RecordType, typename ReaderType> class analyzer_tmpl_t {
         operator=(const analyzer_worker_data_t &) = delete;
     };
 
+    // See comment on init_scheduler_common() for some noteworthy details.
     bool
     init_scheduler(const std::vector<std::string> &trace_paths,
                    // To include all threads/shards, use empty sets.
@@ -224,15 +225,17 @@ template <typename RecordType, typename ReaderType> class analyzer_tmpl_t {
                    const std::set<int> &only_shards, int output_limit, int verbosity,
                    typename sched_type_t::scheduler_options_t options);
 
-    // For core-sharded, worker_count_ must be set prior to calling this; for parallel
-    // mode if it is not set it will be set to the underlying core count.
-    // For core-sharded, all of "options" is used; otherwise, only the
-    // read_inputs_in_init field is preserved.
+    // See comment on init_scheduler_common() for some noteworthy details.
     bool
     init_scheduler(std::unique_ptr<ReaderType> reader,
                    std::unique_ptr<ReaderType> reader_end, int verbosity,
                    typename sched_type_t::scheduler_options_t options);
 
+    // For core-sharded, worker_count_ must be set prior to calling this; for parallel
+    // mode if it is not set it will be set to the underlying core count.
+    // For core-sharded, all of "options" is used; otherwise, the
+    // read_inputs_in_init, replay_as_traced_istream, and kernel_syscall_trace_path
+    // fields are preserved.
     bool
     init_scheduler_common(std::vector<typename sched_type_t::input_workload_t> &workloads,
                           typename sched_type_t::scheduler_options_t options);
diff --git a/clients/drcachesim/analyzer_multi.cpp b/clients/drcachesim/analyzer_multi.cpp
index 3d864a29c8..ad77dad5f1 100644
--- a/clients/drcachesim/analyzer_multi.cpp
+++ b/clients/drcachesim/analyzer_multi.cpp
@@ -571,6 +571,8 @@ analyzer_multi_tmpl_t<RecordType, ReaderType>::analyzer_multi_tmpl_t()
 #endif
     }
 
+    sched_ops.kernel_syscall_trace_path = op_sched_syscall_file.get_value();
+
     if (!indirs.empty()) {
         std::vector<std::string> tracedirs;
         for (const std::string &indir : indirs)
diff --git a/clients/drcachesim/common/memtrace_stream.h b/clients/drcachesim/common/memtrace_stream.h
index b02b4dfddf..4a9f255803 100644
--- a/clients/drcachesim/common/memtrace_stream.h
+++ b/clients/drcachesim/common/memtrace_stream.h
@@ -113,7 +113,7 @@ class memtrace_stream_t {
          */
         SCHED_STAT_HIT_OUTPUT_LIMIT,
         /**
-         * Counts the instances when the kernel context switch sequence was injected.
+         * Counts the instances when the kernel context switch sequences were injected.
          */
         SCHED_STAT_KERNEL_SWITCH_SEQUENCE_INJECTIONS,
         /** Count of statistic types. */
diff --git a/clients/drcachesim/common/options.cpp b/clients/drcachesim/common/options.cpp
index 69976820cd..945bb161da 100644
--- a/clients/drcachesim/common/options.cpp
+++ b/clients/drcachesim/common/options.cpp
@@ -1039,12 +1039,20 @@ droption_t<std::string>
 #endif
 droption_t<std::string> op_sched_switch_file(
     DROPTION_SCOPE_FRONTEND, "sched_switch_file", "",
-    "Path to file holding context switch sequences",
+    "Path to file holding kernel context switch sequences",
     "Applies to -core_sharded and -core_serial.  Path to file holding context switch "
     "sequences.  The file can contain multiple sequences each with regular trace headers "
     "and the sequence proper bracketed by TRACE_MARKER_TYPE_CONTEXT_SWITCH_START and "
     "TRACE_MARKER_TYPE_CONTEXT_SWITCH_END markers.");
 
+droption_t<std::string> op_sched_syscall_file(
+    DROPTION_SCOPE_FRONTEND, "sched_syscall_file", "",
+    "Path to file holding kernel system call sequences",
+    "Path to file holding system call sequences.  The file can contain multiple "
+    "sequences each with regular trace headers and the sequence proper bracketed by "
+    "TRACE_MARKER_TYPE_SYSCALL_TRACE_START and TRACE_MARKER_TYPE_SYSCALL_TRACE_END "
+    "markers.");
+
 droption_t<bool> op_sched_randomize(
     DROPTION_SCOPE_FRONTEND, "sched_randomize", false,
     "Pick next inputs randomly on context switches",
diff --git a/clients/drcachesim/common/options.h b/clients/drcachesim/common/options.h
index 562019f64a..50bbbadf43 100644
--- a/clients/drcachesim/common/options.h
+++ b/clients/drcachesim/common/options.h
@@ -216,6 +216,7 @@ extern dynamorio::droption::droption_t<std::string> op_replay_file;
 extern dynamorio::droption::droption_t<std::string> op_cpu_schedule_file;
 #endif
 extern dynamorio::droption::droption_t<std::string> op_sched_switch_file;
+extern dynamorio::droption::droption_t<std::string> op_sched_syscall_file;
 extern dynamorio::droption::droption_t<bool> op_sched_randomize;
 extern dynamorio::droption::droption_t<bool> op_sched_disable_direct_switches;
 extern dynamorio::droption::droption_t<bool> op_sched_infinite_timeouts;
diff --git a/clients/drcachesim/scheduler/scheduler.h b/clients/drcachesim/scheduler/scheduler.h
index f5d6ae4ca4..2caa9356c0 100644
--- a/clients/drcachesim/scheduler/scheduler.h
+++ b/clients/drcachesim/scheduler/scheduler.h
@@ -829,6 +829,31 @@ template <typename RecordType, typename ReaderType> class scheduler_tmpl_t {
          * when raising this value on uneven inputs.
          */
         double exit_if_fraction_inputs_left = 0.1;
+        /**
+         * Input file containing template sequences of kernel system call code.
+         * Each sequence must start with a #TRACE_MARKER_TYPE_SYSCALL_TRACE_START
+         * marker and end with #TRACE_MARKER_TYPE_SYSCALL_TRACE_END.
+         * The value of each marker must hold the system call number for the system call
+         * it corresponds to. Sequences for multiple system calls are concatenated into a
+         * single file. Each sequence should be in the regular offline drmemtrace format.
+         * Whenever a #TRACE_MARKER_TYPE_SYSCALL marker is encountered in a trace, if a
+         * corresponding sequence with the same marker value exists it is inserted into
+         * the output stream after the #TRACE_MARKER_TYPE_SYSCALL marker.
+         * The same file (or reader) must be passed when replaying as this kernel
+         * code is not stored when recording.
+         * An alternative to passing the file path is to pass #kernel_syscall_reader
+         * and #kernel_syscall_reader_end.
+         */
+        std::string kernel_syscall_trace_path;
+        /**
+         * An alternative to #kernel_syscall_trace_path is to pass a reader and
+         * #kernel_syscall_reader_end.  See the description of #kernel_syscall_trace_path.
+         * This field is only examined if #kernel_syscall_trace_path is empty.
+         * The scheduler will call the init() function for the reader.
+         */
+        std::unique_ptr<ReaderType> kernel_syscall_reader;
+        /** The end reader for #kernel_syscall_reader. */
+        std::unique_ptr<ReaderType> kernel_syscall_reader_end;
         // When adding new options, also add to print_configuration().
     };
 
diff --git a/clients/drcachesim/scheduler/scheduler_dynamic.cpp b/clients/drcachesim/scheduler/scheduler_dynamic.cpp
index 8f8dad07eb..ff1b999b6d 100644
--- a/clients/drcachesim/scheduler/scheduler_dynamic.cpp
+++ b/clients/drcachesim/scheduler/scheduler_dynamic.cpp
@@ -1,5 +1,5 @@
 /* **********************************************************
- * Copyright (c) 2023-2024 Google, Inc.  All rights reserved.
+ * Copyright (c) 2023-2025 Google, Inc.  All rights reserved.
  * **********************************************************/
 
 /*
@@ -596,16 +596,10 @@ scheduler_dynamic_tmpl_t<RecordType, ReaderType>::process_marker(
     case TRACE_MARKER_TYPE_CONTEXT_SWITCH_START:
         outputs_[output].in_context_switch_code = true;
         break;
-    case TRACE_MARKER_TYPE_SYSCALL_TRACE_START:
-        outputs_[output].in_syscall_code = true;
-        break;
     case TRACE_MARKER_TYPE_CONTEXT_SWITCH_END:
         // We have to delay until the next record.
         outputs_[output].hit_switch_code_end = true;
         break;
-    case TRACE_MARKER_TYPE_SYSCALL_TRACE_END:
-        outputs_[output].in_syscall_code = false;
-        break;
     case TRACE_MARKER_TYPE_TIMESTAMP:
         // Syscall sequences are not expected to have a timestamp.
         assert(!outputs_[output].in_syscall_code);
diff --git a/clients/drcachesim/scheduler/scheduler_impl.cpp b/clients/drcachesim/scheduler/scheduler_impl.cpp
index eeda7fbe1b..18687ce6a4 100644
--- a/clients/drcachesim/scheduler/scheduler_impl.cpp
+++ b/clients/drcachesim/scheduler/scheduler_impl.cpp
@@ -348,6 +348,23 @@ scheduler_impl_tmpl_t<memref_t, reader_t>::insert_switch_tid_pid(input_info_t &i
     // We do nothing, as every record has a tid from the separate inputs.
 }
 
+template <>
+template <>
+typename scheduler_tmpl_t<memref_t, reader_t>::switch_type_t
+scheduler_impl_tmpl_t<memref_t, reader_t>::invalid_kernel_sequence_key()
+{
+    return switch_type_t::SWITCH_INVALID;
+}
+
+template <>
+template <>
+int
+scheduler_impl_tmpl_t<memref_t, reader_t>::invalid_kernel_sequence_key()
+{
+    // System numbers are small non-negative integers.
+    return -1;
+}
+
 /******************************************************************************
  * Specializations for scheduler_impl_tmpl_t<record_reader_t>, aka
  * record_scheduler_impl_t.
@@ -568,6 +585,23 @@ scheduler_impl_tmpl_t<trace_entry_t, record_reader_t>::insert_switch_tid_pid(
     input.queue.push_front(tid);
 }
 
+template <>
+template <>
+typename scheduler_tmpl_t<trace_entry_t, record_reader_t>::switch_type_t
+scheduler_impl_tmpl_t<trace_entry_t, record_reader_t>::invalid_kernel_sequence_key()
+{
+    return switch_type_t::SWITCH_INVALID;
+}
+
+template <>
+template <>
+int
+scheduler_impl_tmpl_t<trace_entry_t, record_reader_t>::invalid_kernel_sequence_key()
+{
+    // System numbers are small non-negative integers.
+    return -1;
+}
+
 /***************************************************************************
  * Scheduler.
  */
@@ -629,6 +663,12 @@ scheduler_impl_tmpl_t<RecordType, ReaderType>::print_configuration()
            options_.honor_infinite_timeouts);
     VPRINT(this, 1, "  %-25s : %f\n", "exit_if_fraction_inputs_left",
            options_.exit_if_fraction_inputs_left);
+    VPRINT(this, 1, "  %-25s : %s\n", "kernel_syscall_trace_path",
+           options_.kernel_syscall_trace_path.c_str());
+    VPRINT(this, 1, "  %-25s : %p\n", "kernel_syscall_reader",
+           options_.kernel_syscall_reader.get());
+    VPRINT(this, 1, "  %-25s : %p\n", "kernel_syscall_reader_end",
+           options_.kernel_syscall_reader_end.get());
 }
 
 template <typename RecordType, typename ReaderType>
@@ -871,6 +911,10 @@ scheduler_impl_tmpl_t<RecordType, ReaderType>::init(
     if (res != sched_type_t::STATUS_SUCCESS)
         return sched_type_t::STATUS_ERROR_INVALID_PARAMETER;
 
+    res = read_syscall_sequences();
+    if (res != sched_type_t::STATUS_SUCCESS)
+        return sched_type_t::STATUS_ERROR_INVALID_PARAMETER;
+
     // Determine whether we need to read ahead in the inputs.  There are cases where we
     // do not want to do that as it would block forever if the inputs are not available
     // (e.g., online analysis IPC readers); it also complicates ordinals so we avoid it
@@ -1385,28 +1429,54 @@ template <typename RecordType, typename ReaderType>
 typename scheduler_tmpl_t<RecordType, ReaderType>::scheduler_status_t
 scheduler_impl_tmpl_t<RecordType, ReaderType>::read_switch_sequences()
 {
-    std::unique_ptr<ReaderType> reader, reader_end;
-    if (!options_.kernel_switch_trace_path.empty()) {
-        reader = get_reader(options_.kernel_switch_trace_path, verbosity_);
+    return read_kernel_sequences(switch_sequence_, options_.kernel_switch_trace_path,
+                                 std::move(options_.kernel_switch_reader),
+                                 std::move(options_.kernel_switch_reader_end),
+                                 TRACE_MARKER_TYPE_CONTEXT_SWITCH_START,
+                                 TRACE_MARKER_TYPE_CONTEXT_SWITCH_END, "context switch");
+}
+
+template <typename RecordType, typename ReaderType>
+typename scheduler_tmpl_t<RecordType, ReaderType>::scheduler_status_t
+scheduler_impl_tmpl_t<RecordType, ReaderType>::read_syscall_sequences()
+{
+
+    return read_kernel_sequences(syscall_sequence_, options_.kernel_syscall_trace_path,
+                                 std::move(options_.kernel_syscall_reader),
+                                 std::move(options_.kernel_syscall_reader_end),
+                                 TRACE_MARKER_TYPE_SYSCALL_TRACE_START,
+                                 TRACE_MARKER_TYPE_SYSCALL_TRACE_END, "system call");
+}
+
+template <typename RecordType, typename ReaderType>
+template <typename SequenceKey>
+typename scheduler_tmpl_t<RecordType, ReaderType>::scheduler_status_t
+scheduler_impl_tmpl_t<RecordType, ReaderType>::read_kernel_sequences(
+    std::unordered_map<SequenceKey, std::vector<RecordType>, custom_hash_t<SequenceKey>>
+        &sequence,
+    std::string trace_path, std::unique_ptr<ReaderType> reader,
+    std::unique_ptr<ReaderType> reader_end, trace_marker_type_t start_marker,
+    trace_marker_type_t end_marker, std::string sequence_type)
+{
+    if (!trace_path.empty()) {
+        reader = get_reader(trace_path, verbosity_);
         if (!reader || !reader->init()) {
-            error_string_ +=
-                "Failed to open kernel switch file " + options_.kernel_switch_trace_path;
+            error_string_ += "Failed to open file for kernel " + sequence_type +
+                " sequences: " + trace_path;
             return sched_type_t::STATUS_ERROR_FILE_OPEN_FAILED;
         }
         reader_end = get_default_reader();
-    } else if (!options_.kernel_switch_reader) {
-        // No switch data provided.
+    } else if (!reader) {
+        // No kernel data provided.
         return sched_type_t::STATUS_SUCCESS;
     } else {
-        if (!options_.kernel_switch_reader_end) {
-            error_string_ += "Provided kernel switch reader but no end";
+        if (!reader_end) {
+            error_string_ += "Provided kernel " + sequence_type + " reader but no end";
             return sched_type_t::STATUS_ERROR_INVALID_PARAMETER;
         }
-        reader = std::move(options_.kernel_switch_reader);
-        reader_end = std::move(options_.kernel_switch_reader_end);
         // We own calling init() as it can block.
         if (!reader->init()) {
-            error_string_ += "Failed to init kernel switch reader";
+            error_string_ += "Failed to init kernel " + sequence_type + " reader";
             return sched_type_t::STATUS_ERROR_INVALID_PARAMETER;
         }
     }
@@ -1414,34 +1484,53 @@ scheduler_impl_tmpl_t<RecordType, ReaderType>::read_switch_sequences()
     // memory and don't need to stream them on every use.
     // We read a single stream, even if underneath these are split into subfiles
     // in an archive.
-    switch_type_t switch_type = sched_type_t::SWITCH_INVALID;
+    SequenceKey sequence_key = invalid_kernel_sequence_key<SequenceKey>();
+    const SequenceKey INVALID_SEQ_KEY = invalid_kernel_sequence_key<SequenceKey>();
+    bool in_sequence = false;
     while (*reader != *reader_end) {
         RecordType record = **reader;
         // Only remember the records between the markers.
         trace_marker_type_t marker_type = TRACE_MARKER_TYPE_RESERVED_END;
         uintptr_t marker_value = 0;
-        if (record_type_is_marker(record, marker_type, marker_value) &&
-            marker_type == TRACE_MARKER_TYPE_CONTEXT_SWITCH_START) {
-            switch_type = static_cast<switch_type_t>(marker_value);
-            if (!switch_sequence_[switch_type].empty()) {
-                error_string_ += "Duplicate context switch sequence type found";
+        bool is_marker = record_type_is_marker(record, marker_type, marker_value);
+        if (is_marker && marker_type == start_marker) {
+            if (in_sequence) {
+                error_string_ += "Found another " + sequence_type +
+                    " sequence start without prior ending";
+                return sched_type_t::STATUS_ERROR_INVALID_PARAMETER;
+            }
+            sequence_key = static_cast<SequenceKey>(marker_value);
+            in_sequence = true;
+            if (sequence_key == INVALID_SEQ_KEY) {
+                error_string_ +=
+                    "Invalid " + sequence_type + " sequence found with default key";
+                return sched_type_t::STATUS_ERROR_INVALID_PARAMETER;
+            }
+            if (!sequence[sequence_key].empty()) {
+                error_string_ += "Duplicate " + sequence_type + " sequence found";
                 return sched_type_t::STATUS_ERROR_INVALID_PARAMETER;
             }
         }
-        if (switch_type != sched_type_t::SWITCH_INVALID)
-            switch_sequence_[switch_type].push_back(record);
-        if (record_type_is_marker(record, marker_type, marker_value) &&
-            marker_type == TRACE_MARKER_TYPE_CONTEXT_SWITCH_END) {
-            if (static_cast<switch_type_t>(marker_value) != switch_type) {
-                error_string_ += "Context switch marker values mismatched";
+        if (in_sequence)
+            sequence[sequence_key].push_back(record);
+        if (is_marker && marker_type == end_marker) {
+            if (!in_sequence) {
+                error_string_ += "Found " + sequence_type +
+                    " sequence end marker without start marker";
+                return sched_type_t::STATUS_ERROR_INVALID_PARAMETER;
+            }
+            if (static_cast<SequenceKey>(marker_value) != sequence_key) {
+                error_string_ += sequence_type + " marker values mismatched";
                 return sched_type_t::STATUS_ERROR_INVALID_PARAMETER;
             }
-            VPRINT(this, 1, "Read %zu kernel context switch records for type %d\n",
-                   switch_sequence_[switch_type].size(), switch_type);
-            switch_type = sched_type_t::SWITCH_INVALID;
+            VPRINT(this, 1, "Read %zu kernel %s records for key %d\n",
+                   sequence[sequence_key].size(), sequence_type.c_str(), sequence_key);
+            sequence_key = INVALID_SEQ_KEY;
+            in_sequence = false;
         }
         ++(*reader);
     }
+    assert(!in_sequence);
     return sched_type_t::STATUS_SUCCESS;
 }
 
@@ -1602,6 +1691,28 @@ scheduler_impl_tmpl_t<RecordType, ReaderType>::get_initial_input_content(
     return sched_type_t::STATUS_SUCCESS;
 }
 
+template <typename RecordType, typename ReaderType>
+typename scheduler_tmpl_t<RecordType, ReaderType>::stream_status_t
+scheduler_impl_tmpl_t<RecordType, ReaderType>::inject_kernel_sequence(
+    std::vector<RecordType> &sequence, input_info_t *input)
+{
+    // Inject kernel template code.  Since the injected records belong to this
+    // input (the kernel is acting on behalf of this input) we insert them into the
+    // input's queue, but ahead of any prior queued items.  This is why we walk in
+    // reverse, for the push_front calls to the deque.  We update the tid of the
+    // records here to match.  They are considered as is_record_synthetic() and do
+    // not affect input stream ordinals.
+    // XXX: These will appear before the top headers of a new thread which is slightly
+    // odd to have regular records with the new tid before the top headers.
+    if (sequence.empty())
+        return stream_status_t::STATUS_EOF;
+    for (int i = static_cast<int>(sequence.size()) - 1; i >= 0; --i) {
+        RecordType record = sequence[i];
+        record_type_set_tid(record, input->tid);
+        input->queue.push_front(record);
+    }
+    return stream_status_t::STATUS_OK;
+}
 template <typename RecordType, typename ReaderType>
 typename scheduler_tmpl_t<RecordType, ReaderType>::scheduler_status_t
 scheduler_impl_tmpl_t<RecordType, ReaderType>::open_reader(
@@ -1778,7 +1889,7 @@ scheduler_impl_tmpl_t<RecordType, ReaderType>::is_record_synthetic(
     int index = outputs_[output].cur_input;
     if (index < 0)
         return false;
-    if (outputs_[output].in_context_switch_code)
+    if (outputs_[output].in_context_switch_code || outputs_[output].in_syscall_code)
         return true;
     return inputs_[index].reader->is_record_synthetic();
 }
@@ -2380,13 +2491,16 @@ scheduler_impl_tmpl_t<RecordType, ReaderType>::pick_next_input(output_ordinal_t
     }
     // We can't easily place these stats inside set_cur_input() as we call that to
     // temporarily give up our input.
-    on_context_switch(output, prev_index, index);
+    stream_status_t on_switch_res = on_context_switch(output, prev_index, index);
+    if (on_switch_res != stream_status_t::STATUS_OK) {
+        return on_switch_res;
+    }
     set_cur_input(output, index);
     return res;
 }
 
 template <typename RecordType, typename ReaderType>
-void
+typename scheduler_tmpl_t<RecordType, ReaderType>::stream_status_t
 scheduler_impl_tmpl_t<RecordType, ReaderType>::on_context_switch(
     output_ordinal_t output, input_ordinal_t prev_input, input_ordinal_t new_input)
 {
@@ -2415,12 +2529,12 @@ scheduler_impl_tmpl_t<RecordType, ReaderType>::on_context_switch(
     // set_cur_input. Here we get the stolen input events too, and we don't have
     // to filter out the init-time set_cur_input cases.
     if (!do_inject_switch_seq)
-        return;
+        return stream_status_t::STATUS_OK;
     if (inputs_[new_input].pid != INVALID_PID) {
         insert_switch_tid_pid(inputs_[new_input]);
     }
     if (switch_sequence_.empty())
-        return;
+        return stream_status_t::STATUS_OK;
     switch_type_t switch_type = sched_type_t::SWITCH_INVALID;
     if ( // XXX: idle-to-input transitions are assumed to be process switches
          // for now. But we may want to improve this heuristic.
@@ -2429,31 +2543,50 @@ scheduler_impl_tmpl_t<RecordType, ReaderType>::on_context_switch(
         switch_type = sched_type_t::SWITCH_PROCESS;
     else
         switch_type = sched_type_t::SWITCH_THREAD;
-    if (switch_sequence_[switch_type].empty())
+    if (switch_sequence_.find(switch_type) == switch_sequence_.end())
+        return stream_status_t::STATUS_OK;
+    stream_status_t res =
+        inject_kernel_sequence(switch_sequence_[switch_type], &inputs_[new_input]);
+    if (res == stream_status_t::STATUS_OK) {
+        ++outputs_[output]
+              .stats[memtrace_stream_t::SCHED_STAT_KERNEL_SWITCH_SEQUENCE_INJECTIONS];
+        VPRINT(this, 3, "Inserted %zu switch records for type %d from %d.%d to %d.%d\n",
+               switch_sequence_[switch_type].size(), switch_type,
+               prev_input != sched_type_t::INVALID_INPUT_ORDINAL
+                   ? inputs_[prev_input].workload
+                   : -1,
+               prev_input, inputs_[new_input].workload, new_input);
+    } else if (res != stream_status_t::STATUS_EOF) {
+        return res;
+    }
+    return stream_status_t::STATUS_OK;
+}
+
+template <typename RecordType, typename ReaderType>
+void
+scheduler_impl_tmpl_t<RecordType, ReaderType>::update_syscall_state(
+    RecordType record, output_ordinal_t output)
+{
+    if (outputs_[output].hit_syscall_code_end) {
+        // We have to delay so the end marker is still in_syscall_code.
+        outputs_[output].in_syscall_code = false;
+        outputs_[output].hit_syscall_code_end = false;
+    }
+
+    trace_marker_type_t marker_type;
+    uintptr_t marker_value = 0;
+    if (!record_type_is_marker(record, marker_type, marker_value))
         return;
-    // Inject kernel context switch code.  Since the injected records belong to
-    // this input (the kernel is acting on behalf of this input) we insert them
-    // into the input's queue, but ahead of any prior queued items.  This is why
-    // we walk in reverse, for the push_front calls to the deque.  We update the
-    // tid of the records here to match.  They are considered as
-    // is_record_synthetic() and do not affect input stream ordinals.
-    // XXX: These will appear before the top headers of a new thread which is
-    // slightly odd to have regular records with the new tid before the top
-    // headers.
-    ++outputs_[output]
-          .stats[memtrace_stream_t::SCHED_STAT_KERNEL_SWITCH_SEQUENCE_INJECTIONS];
-    for (int i = static_cast<int>(switch_sequence_[switch_type].size()) - 1; i >= 0;
-         --i) {
-        RecordType record = switch_sequence_[switch_type][i];
-        record_type_set_tid(record, inputs_[new_input].tid);
-        inputs_[new_input].queue.emplace_front(record);
-    }
-    VPRINT(this, 3, "Inserted %zu switch for type %d from %d.%d to %d.%d\n",
-           switch_sequence_[switch_type].size(), switch_type,
-           prev_input != sched_type_t::INVALID_INPUT_ORDINAL
-               ? inputs_[prev_input].workload
-               : -1,
-           prev_input, inputs_[new_input].workload, new_input);
+    switch (marker_type) {
+    case TRACE_MARKER_TYPE_SYSCALL_TRACE_START:
+        outputs_[output].in_syscall_code = true;
+        break;
+    case TRACE_MARKER_TYPE_SYSCALL_TRACE_END:
+        // We have to delay until the next record.
+        outputs_[output].hit_syscall_code_end = true;
+        break;
+    default: break;
+    }
 }
 
 template <typename RecordType, typename ReaderType>
@@ -2590,6 +2723,11 @@ scheduler_impl_tmpl_t<RecordType, ReaderType>::next_record(output_ordinal_t outp
         if (input->instrs_pre_read > 0 && record_type_is_instr(record))
             --input->instrs_pre_read;
         VDO(this, 5, print_record(record););
+
+        // We want check_for_input_switch() to have the updated state, so we process
+        // syscall trace related markers now.
+        update_syscall_state(record, output);
+
         bool need_new_input = false;
         bool preempt = false;
         uint64_t blocked_time = 0;
@@ -2672,7 +2810,11 @@ scheduler_impl_tmpl_t<RecordType, ReaderType>::next_record(output_ordinal_t outp
             } else if (res == sched_type_t::STATUS_STOLE) {
                 // We need to loop to get the new record.
                 input = &inputs_[outputs_[output].cur_input];
-                on_context_switch(output, prev_input, input->index);
+                stream_status_t on_switch_res =
+                    on_context_switch(output, prev_input, input->index);
+                if (on_switch_res != stream_status_t::STATUS_OK) {
+                    return on_switch_res;
+                }
                 lock.unlock();
                 lock = std::unique_lock<mutex_dbg_owned>(*input->lock);
                 lock.lock();
@@ -2692,7 +2834,34 @@ scheduler_impl_tmpl_t<RecordType, ReaderType>::next_record(output_ordinal_t outp
     outputs_[output].last_record = record;
     record_type_has_tid(record, input->last_record_tid);
     record_type_has_pid(record, input->pid);
-    return sched_type_t::STATUS_OK;
+    return finalize_next_record(record, input);
+}
+
+template <typename RecordType, typename ReaderType>
+typename scheduler_tmpl_t<RecordType, ReaderType>::stream_status_t
+scheduler_impl_tmpl_t<RecordType, ReaderType>::finalize_next_record(
+    const RecordType &record, input_info_t *input)
+{
+    trace_marker_type_t marker_type;
+    uintptr_t marker_value;
+    // Good to queue the injected records at this point, because we now surely will
+    // be done with TRACE_MARKER_TYPE_SYSCALL.
+    if (record_type_is_marker(record, marker_type, marker_value) &&
+        marker_type == TRACE_MARKER_TYPE_SYSCALL &&
+        syscall_sequence_.find(static_cast<int>(marker_value)) !=
+            syscall_sequence_.end()) {
+        int syscall_num = static_cast<int>(marker_value);
+        stream_status_t res =
+            inject_kernel_sequence(syscall_sequence_[syscall_num], input);
+        if (res == stream_status_t::STATUS_OK) {
+            VPRINT(this, 3, "Inserted %zu syscall records for syscall %d to %d.%d\n",
+                   syscall_sequence_[syscall_num].size(), syscall_num, input->workload,
+                   input->index);
+        } else if (res != stream_status_t::STATUS_EOF) {
+            return res;
+        }
+    }
+    return stream_status_t::STATUS_OK;
 }
 
 template <typename RecordType, typename ReaderType>
diff --git a/clients/drcachesim/scheduler/scheduler_impl.h b/clients/drcachesim/scheduler/scheduler_impl.h
index 9fa0d0fffb..5049614684 100644
--- a/clients/drcachesim/scheduler/scheduler_impl.h
+++ b/clients/drcachesim/scheduler/scheduler_impl.h
@@ -197,6 +197,7 @@ template <typename RecordType, typename ReaderType> class scheduler_impl_tmpl_t
         // We use a deque so we can iterate over it.
         std::deque<RecordType> queue;
         bool cur_from_queue;
+
         std::set<output_ordinal_t> binding;
         int priority = 0;
         std::vector<range_t> regions_of_interest;
@@ -405,10 +406,13 @@ template <typename RecordType, typename ReaderType> class scheduler_impl_tmpl_t
     uint64_t
     scale_blocked_time(uint64_t initial_time) const;
 
-    void
+    stream_status_t
     on_context_switch(output_ordinal_t output, input_ordinal_t prev_input,
                       input_ordinal_t new_input);
 
+    void
+    update_syscall_state(RecordType record, output_ordinal_t output);
+
     ///
     ///////////////////////////////////////////////////////////////////////////
 
@@ -480,7 +484,12 @@ template <typename RecordType, typename ReaderType> class scheduler_impl_tmpl_t
         // This is accessed by other outputs for stealing and rebalancing.
         // Indirected so we can store it in our vector.
         std::unique_ptr<std::atomic<bool>> active;
+        // XXX: in_syscall_code and hit_syscall_code_end arguably are tied to an input
+        // stream and must be a part of input_info_t instead. Today we do not context
+        // switch in the middle of injected kernel syscall code, but if we did, this
+        // state would be incorrect or lost.
         bool in_syscall_code = false;
+        bool hit_syscall_code_end = false;
         bool in_context_switch_code = false;
         bool hit_switch_code_end = false;
         // Used for time-based quanta.
@@ -542,6 +551,15 @@ template <typename RecordType, typename ReaderType> class scheduler_impl_tmpl_t
         uint64_t timestamp;
     };
 
+    // Custom hash function used for switch_type_t and syscall num (int).
+    template <typename IntCastable> struct custom_hash_t {
+        std::size_t
+        operator()(const IntCastable &st) const
+        {
+            return std::hash<int>()(static_cast<int>(st));
+        }
+    };
+
     // Tracks data used while opening inputs.
     struct input_reader_info_t {
         std::set<memref_tid_t> only_threads;
@@ -736,9 +754,25 @@ template <typename RecordType, typename ReaderType> class scheduler_impl_tmpl_t
         std::vector<std::set<uint64_t>> &start2stop,
         std::vector<std::vector<schedule_output_tracker_t>> &all_sched);
 
+    template <typename SequenceKey>
+    SequenceKey
+    invalid_kernel_sequence_key();
+
+    template <typename SequenceKey>
+    scheduler_status_t
+    read_kernel_sequences(std::unordered_map<SequenceKey, std::vector<RecordType>,
+                                             custom_hash_t<SequenceKey>> &sequence,
+                          std::string trace_path, std::unique_ptr<ReaderType> reader,
+                          std::unique_ptr<ReaderType> reader_end,
+                          trace_marker_type_t start_marker,
+                          trace_marker_type_t end_marker, std::string sequence_type);
+
     scheduler_status_t
     read_switch_sequences();
 
+    scheduler_status_t
+    read_syscall_sequences();
+
     uint64_t
     get_time_micros();
 
@@ -842,6 +876,14 @@ template <typename RecordType, typename ReaderType> class scheduler_impl_tmpl_t
     void
     update_next_record(output_ordinal_t output, RecordType &record);
 
+    stream_status_t
+    inject_kernel_sequence(std::vector<RecordType> &sequence, input_info_t *input);
+
+    // Actions that must be taken only when we know for sure that the given record
+    // is going to be the next record for some output stream.
+    stream_status_t
+    finalize_next_record(const RecordType &record, input_info_t *input);
+
     // Used for diagnostics: prints record fields to stderr.
     void
     print_record(const RecordType &record);
@@ -976,15 +1018,14 @@ template <typename RecordType, typename ReaderType> class scheduler_impl_tmpl_t
         }
     };
     std::unordered_map<workload_tid_t, input_ordinal_t, workload_tid_hash_t> tid2input_;
-    struct switch_type_hash_t {
-        std::size_t
-        operator()(const switch_type_t &st) const
-        {
-            return std::hash<int>()(static_cast<int>(st));
-        }
-    };
-    std::unordered_map<switch_type_t, std::vector<RecordType>, switch_type_hash_t>
+
+    std::unordered_map<switch_type_t, std::vector<RecordType>,
+                       custom_hash_t<switch_type_t>>
         switch_sequence_;
+    // We specify a custom hash function only to make it easier to generalize with
+    // switch_sequence_ defined above.
+    std::unordered_map<int, std::vector<RecordType>, custom_hash_t<int>>
+        syscall_sequence_;
     // For single_lockstep_output.
     std::unique_ptr<stream_t> global_stream_;
     // For online where we currently have to map dynamically observed thread ids
diff --git a/clients/drcachesim/tests/burst_syscall_inject.cpp b/clients/drcachesim/tests/burst_syscall_inject.cpp
index aef4590ec9..b5898a5000 100644
--- a/clients/drcachesim/tests/burst_syscall_inject.cpp
+++ b/clients/drcachesim/tests/burst_syscall_inject.cpp
@@ -143,6 +143,10 @@ write_header_entries(std::unique_ptr<std::ostream> &writer)
     }
     write_trace_entry(writer, make_marker(TRACE_MARKER_TYPE_CACHE_LINE_SIZE, 64));
     write_trace_entry(writer, make_marker(TRACE_MARKER_TYPE_PAGE_SIZE, 4096));
+    // Some header read-ahead logic uses the timestamp marker to know when
+    // to stop. It is important to not read-ahead any kernel syscall trace
+    // content, as then is_record_kernel() starts returning true on the stream.
+    write_trace_entry(writer, make_marker(TRACE_MARKER_TYPE_TIMESTAMP, 0));
 }
 
 static void
diff --git a/clients/drcachesim/tests/mock_syscall_sequences.x64 b/clients/drcachesim/tests/mock_syscall_sequences.x64
new file mode 100644
index 0000000000..6cc43cde70
Binary files /dev/null and b/clients/drcachesim/tests/mock_syscall_sequences.x64 differ
diff --git a/clients/drcachesim/tests/scheduler_unit_tests.cpp b/clients/drcachesim/tests/scheduler_unit_tests.cpp
index 0b134488f8..2b3223ba31 100644
--- a/clients/drcachesim/tests/scheduler_unit_tests.cpp
+++ b/clients/drcachesim/tests/scheduler_unit_tests.cpp
@@ -5769,6 +5769,115 @@ test_unscheduled()
     test_unscheduled_no_alternative();
 }
 
+static std::vector<std::string>
+run_lockstep_simulation_for_kernel_seq(scheduler_t &scheduler, int num_outputs,
+                                       memref_tid_t tid_base, int syscall_base,
+                                       std::vector<std::vector<memref_t>> &refs)
+{
+    // We have a custom version of run_lockstep_simulation here for more precise
+    // testing of the markers and instructions and interfaces.
+    // We record the entire sequence for a detailed check of some records, along with
+    // a character representation for a higher-level view of the whole sequence.
+    std::vector<scheduler_t::stream_t *> outputs(num_outputs, nullptr);
+    std::vector<bool> eof(num_outputs, false);
+    for (int i = 0; i < num_outputs; i++)
+        outputs[i] = scheduler.get_stream(i);
+    int num_eof = 0;
+    refs.resize(num_outputs);
+    std::vector<std::string> sched_as_string(num_outputs);
+    std::vector<memref_tid_t> prev_tid(num_outputs, INVALID_THREAD_ID);
+    std::vector<bool> in_switch(num_outputs, false);
+    std::vector<bool> in_syscall(num_outputs, false);
+    std::vector<uint64> prev_in_ord(num_outputs, 0);
+    std::vector<uint64> prev_out_ord(num_outputs, 0);
+    while (num_eof < num_outputs) {
+        for (int i = 0; i < num_outputs; i++) {
+            if (eof[i])
+                continue;
+            memref_t memref;
+            scheduler_t::stream_status_t status = outputs[i]->next_record(memref);
+            if (status == scheduler_t::STATUS_EOF) {
+                ++num_eof;
+                eof[i] = true;
+                continue;
+            }
+            if (status == scheduler_t::STATUS_IDLE) {
+                sched_as_string[i] += '_';
+                continue;
+            }
+            assert(status == scheduler_t::STATUS_OK);
+            refs[i].push_back(memref);
+            if (memref.instr.tid != prev_tid[i]) {
+                if (!sched_as_string[i].empty())
+                    sched_as_string[i] += ',';
+                sched_as_string[i] +=
+                    'A' + static_cast<char>(memref.instr.tid - tid_base);
+            }
+            if (memref.marker.type == TRACE_TYPE_MARKER) {
+                if (memref.marker.marker_type == TRACE_MARKER_TYPE_CONTEXT_SWITCH_START)
+                    in_switch[i] = true;
+                else if (memref.marker.marker_type ==
+                         TRACE_MARKER_TYPE_SYSCALL_TRACE_START)
+                    in_syscall[i] = true;
+            }
+            if (in_switch[i]) {
+                // Test that switch code is marked synthetic.
+                assert(outputs[i]->is_record_synthetic());
+                // Test that switch code doesn't count toward input ordinals, but
+                // does toward output ordinals.
+                assert(outputs[i]->get_input_interface()->get_record_ordinal() ==
+                           prev_in_ord[i] ||
+                       // Won't match if we just switched inputs.
+                       (memref.marker.type == TRACE_TYPE_MARKER &&
+                        memref.marker.marker_type ==
+                            TRACE_MARKER_TYPE_CONTEXT_SWITCH_START));
+                assert(outputs[i]->get_record_ordinal() > prev_out_ord[i]);
+            } else if (in_syscall[i]) {
+                // Test that syscall code is marked synthetic.
+                assert(outputs[i]->is_record_synthetic());
+                // Test that dynamically injected syscall code doesn't count toward
+                // input ordinals, but does toward output ordinals.
+                assert(outputs[i]->get_input_interface()->get_record_ordinal() ==
+                       prev_in_ord[i]);
+                assert(outputs[i]->get_record_ordinal() > prev_out_ord[i]);
+            } else
+                assert(!outputs[i]->is_record_synthetic());
+            if (type_is_instr(memref.instr.type))
+                sched_as_string[i] += 'i';
+            else if (memref.marker.type == TRACE_TYPE_MARKER) {
+                switch (memref.marker.marker_type) {
+                case TRACE_MARKER_TYPE_VERSION: sched_as_string[i] += 'v'; break;
+                case TRACE_MARKER_TYPE_TIMESTAMP: sched_as_string[i] += '0'; break;
+                case TRACE_MARKER_TYPE_CONTEXT_SWITCH_END:
+                    in_switch[i] = false;
+                    ANNOTATE_FALLTHROUGH;
+                case TRACE_MARKER_TYPE_CONTEXT_SWITCH_START:
+                    if (memref.marker.marker_value == scheduler_t::SWITCH_PROCESS)
+                        sched_as_string[i] += 'p';
+                    else if (memref.marker.marker_value == scheduler_t::SWITCH_THREAD)
+                        sched_as_string[i] += 't';
+                    else
+                        assert(false && "unknown context switch type");
+                    break;
+                case TRACE_MARKER_TYPE_SYSCALL: sched_as_string[i] += 's'; break;
+                case TRACE_MARKER_TYPE_SYSCALL_TRACE_END:
+                    in_syscall[i] = false;
+                    ANNOTATE_FALLTHROUGH;
+                case TRACE_MARKER_TYPE_SYSCALL_TRACE_START:
+                    sched_as_string[i] += '1' +
+                        static_cast<char>(memref.marker.marker_value - syscall_base);
+                    break;
+                default: sched_as_string[i] += '?'; break;
+                }
+            }
+            prev_tid[i] = memref.instr.tid;
+            prev_in_ord[i] = outputs[i]->get_input_interface()->get_record_ordinal();
+            prev_out_ord[i] = outputs[i]->get_record_ordinal();
+        }
+    }
+    return sched_as_string;
+}
+
 static void
 test_kernel_switch_sequences()
 {
@@ -5850,86 +5959,9 @@ test_kernel_switch_sequences()
         scheduler_t::STATUS_SUCCESS)
         assert(false);
 
-    // We have a custom version of run_lockstep_simulation here for more precise
-    // testing of the markers and instructions and interfaces.
-    // We record the entire sequence for a detailed check of some records, along with
-    // a character representation for a higher-level view of the whole sequence.
-    std::vector<scheduler_t::stream_t *> outputs(NUM_OUTPUTS, nullptr);
-    std::vector<bool> eof(NUM_OUTPUTS, false);
-    for (int i = 0; i < NUM_OUTPUTS; i++)
-        outputs[i] = scheduler.get_stream(i);
-    int num_eof = 0;
-    std::vector<std::vector<memref_t>> refs(NUM_OUTPUTS);
-    std::vector<std::string> sched_as_string(NUM_OUTPUTS);
-    std::vector<memref_tid_t> prev_tid(NUM_OUTPUTS, INVALID_THREAD_ID);
-    std::vector<bool> in_switch(NUM_OUTPUTS, false);
-    std::vector<uint64> prev_in_ord(NUM_OUTPUTS, 0);
-    std::vector<uint64> prev_out_ord(NUM_OUTPUTS, 0);
-    while (num_eof < NUM_OUTPUTS) {
-        for (int i = 0; i < NUM_OUTPUTS; i++) {
-            if (eof[i])
-                continue;
-            memref_t memref;
-            scheduler_t::stream_status_t status = outputs[i]->next_record(memref);
-            if (status == scheduler_t::STATUS_EOF) {
-                ++num_eof;
-                eof[i] = true;
-                continue;
-            }
-            if (status == scheduler_t::STATUS_IDLE) {
-                sched_as_string[i] += '_';
-                continue;
-            }
-            assert(status == scheduler_t::STATUS_OK);
-            refs[i].push_back(memref);
-            if (memref.instr.tid != prev_tid[i]) {
-                if (!sched_as_string[i].empty())
-                    sched_as_string[i] += ',';
-                sched_as_string[i] +=
-                    'A' + static_cast<char>(memref.instr.tid - TID_BASE);
-            }
-            if (memref.marker.type == TRACE_TYPE_MARKER &&
-                memref.marker.marker_type == TRACE_MARKER_TYPE_CONTEXT_SWITCH_START)
-                in_switch[i] = true;
-            if (in_switch[i]) {
-                // Test that switch code is marked synthetic.
-                assert(outputs[i]->is_record_synthetic());
-                // Test that switch code doesn't count toward input ordinals, but
-                // does toward output ordinals.
-                assert(outputs[i]->get_input_interface()->get_record_ordinal() ==
-                           prev_in_ord[i] ||
-                       // Won't match if we just switched inputs.
-                       (memref.marker.type == TRACE_TYPE_MARKER &&
-                        memref.marker.marker_type ==
-                            TRACE_MARKER_TYPE_CONTEXT_SWITCH_START));
-                assert(outputs[i]->get_record_ordinal() > prev_out_ord[i]);
-            } else
-                assert(!outputs[i]->is_record_synthetic());
-            if (type_is_instr(memref.instr.type))
-                sched_as_string[i] += 'i';
-            else if (memref.marker.type == TRACE_TYPE_MARKER) {
-                switch (memref.marker.marker_type) {
-                case TRACE_MARKER_TYPE_VERSION: sched_as_string[i] += 'v'; break;
-                case TRACE_MARKER_TYPE_TIMESTAMP: sched_as_string[i] += '0'; break;
-                case TRACE_MARKER_TYPE_CONTEXT_SWITCH_END:
-                    in_switch[i] = false;
-                    ANNOTATE_FALLTHROUGH;
-                case TRACE_MARKER_TYPE_CONTEXT_SWITCH_START:
-                    if (memref.marker.marker_value == scheduler_t::SWITCH_PROCESS)
-                        sched_as_string[i] += 'p';
-                    else if (memref.marker.marker_value == scheduler_t::SWITCH_THREAD)
-                        sched_as_string[i] += 't';
-                    else
-                        assert(false && "unknown context switch type");
-                    break;
-                default: sched_as_string[i] += '?'; break;
-                }
-            }
-            prev_tid[i] = memref.instr.tid;
-            prev_in_ord[i] = outputs[i]->get_input_interface()->get_record_ordinal();
-            prev_out_ord[i] = outputs[i]->get_record_ordinal();
-        }
-    }
+    std::vector<std::vector<memref_t>> refs;
+    std::vector<std::string> sched_as_string =
+        run_lockstep_simulation_for_kernel_seq(scheduler, NUM_OUTPUTS, TID_BASE, 0, refs);
     // Check the high-level strings.
     for (int i = 0; i < NUM_OUTPUTS; i++) {
         std::cerr << "cpu #" << i << " schedule: " << sched_as_string[i] << "\n";
@@ -6038,6 +6070,191 @@ test_kernel_switch_sequences()
     }
 }
 
+static void
+test_kernel_syscall_sequences()
+{
+    std::cerr << "\n----------------\nTesting kernel syscall sequences\n";
+    static constexpr memref_tid_t TID_IN_SYSCALLS = 1;
+    static constexpr int SYSCALL_BASE = 42;
+    static constexpr addr_t SYSCALL_PC_START = 0xfeed101;
+    static constexpr int NUM_OUTPUTS = 2;
+    static constexpr memref_tid_t TID_BASE = 100;
+    {
+        std::vector<trace_entry_t> syscall_sequence = {
+            /* clang-format off */
+            make_header(TRACE_ENTRY_VERSION),
+            make_thread(TID_IN_SYSCALLS),
+            make_pid(TID_IN_SYSCALLS),
+            make_version(TRACE_ENTRY_VERSION),
+            make_timestamp(0),
+            make_marker(TRACE_MARKER_TYPE_SYSCALL_TRACE_START, SYSCALL_BASE),
+            make_instr(SYSCALL_PC_START),
+            make_instr(SYSCALL_PC_START + 1),
+            make_marker(TRACE_MARKER_TYPE_SYSCALL_TRACE_END, SYSCALL_BASE),
+            // XXX: Currently all syscall traces are concatenated. We may change
+            // this to use an archive file instead.
+            make_marker(TRACE_MARKER_TYPE_SYSCALL_TRACE_START, SYSCALL_BASE + 1),
+            make_instr(SYSCALL_PC_START + 10),
+            make_instr(SYSCALL_PC_START + 11),
+            make_instr(SYSCALL_PC_START + 12),
+            make_marker(TRACE_MARKER_TYPE_SYSCALL_TRACE_END, SYSCALL_BASE + 1),
+            make_exit(TID_IN_SYSCALLS),
+            make_footer(),
+            /* clang-format on */
+        };
+        auto syscall_reader =
+            std::unique_ptr<mock_reader_t>(new mock_reader_t(syscall_sequence));
+        auto syscall_reader_end = std::unique_ptr<mock_reader_t>(new mock_reader_t());
+        static constexpr int NUM_INPUTS = 3;
+        static constexpr int NUM_INSTRS = 9;
+        static constexpr int INSTR_QUANTUM = 3;
+        static constexpr uint64_t TIMESTAMP = 44226688;
+        std::vector<scheduler_t::input_workload_t> sched_inputs;
+        std::vector<scheduler_t::input_reader_t> readers;
+        for (int input_idx = 0; input_idx < NUM_INPUTS; input_idx++) {
+            std::vector<trace_entry_t> inputs;
+            inputs.push_back(make_header(TRACE_ENTRY_VERSION));
+            memref_tid_t tid = TID_BASE + input_idx;
+            inputs.push_back(make_thread(tid));
+            inputs.push_back(make_pid(1));
+            inputs.push_back(make_version(TRACE_ENTRY_VERSION));
+            inputs.push_back(make_timestamp(TIMESTAMP));
+            for (int instr_idx = 0; instr_idx < NUM_INSTRS; instr_idx++) {
+                inputs.push_back(make_instr(42 + instr_idx * 4));
+                if (instr_idx % 2 == 0) {
+                    inputs.push_back(make_marker(TRACE_MARKER_TYPE_SYSCALL,
+                                                 SYSCALL_BASE + (instr_idx / 2) % 2));
+                }
+            }
+            inputs.push_back(make_exit(tid));
+            readers.emplace_back(
+                std::unique_ptr<mock_reader_t>(new mock_reader_t(inputs)),
+                std::unique_ptr<mock_reader_t>(new mock_reader_t()), tid);
+        }
+        sched_inputs.emplace_back(std::move(readers));
+        scheduler_t::scheduler_options_t sched_ops(scheduler_t::MAP_TO_ANY_OUTPUT,
+                                                   scheduler_t::DEPENDENCY_TIMESTAMPS,
+                                                   scheduler_t::SCHEDULER_DEFAULTS,
+                                                   /*verbosity=*/3);
+        sched_ops.quantum_duration_instrs = INSTR_QUANTUM;
+        sched_ops.kernel_syscall_reader = std::move(syscall_reader);
+        sched_ops.kernel_syscall_reader_end = std::move(syscall_reader_end);
+        scheduler_t scheduler;
+        if (scheduler.init(sched_inputs, NUM_OUTPUTS, std::move(sched_ops)) !=
+            scheduler_t::STATUS_SUCCESS)
+            assert(false);
+        std::vector<std::vector<memref_t>> refs;
+        std::vector<std::string> sched_as_string = run_lockstep_simulation_for_kernel_seq(
+            scheduler, NUM_OUTPUTS, TID_BASE, SYSCALL_BASE, refs);
+        // Check the high-level strings.
+        for (int i = 0; i < NUM_OUTPUTS; i++) {
+            std::cerr << "cpu #" << i << " schedule: " << sched_as_string[i] << "\n";
+        }
+        // The instrs in the injected syscall sequence count towards the #instr
+        // quantum, but no context switch happens in the middle of the syscall seq.
+        assert(sched_as_string[0] ==
+               "Av0is1ii1,Cv0is1ii1,Aiis2iii2,Ciis2iii2,Aiis1ii1,Ciis1ii1,Aiis2iii2,"
+               "Ciis2iii2,Aiis1ii1,Ciis1ii1");
+        assert(sched_as_string[1] ==
+               "Bv0is1ii1iis2iii2iis1ii1iis2iii2iis1ii1____________________________"
+               "___________");
+
+        // Zoom in and check the first few syscall sequences on the first output record
+        // by record with value checks.
+        int idx = 0;
+        bool res = true;
+        res = res &&
+            check_ref(refs[0], idx, TID_BASE, TRACE_TYPE_MARKER,
+                      TRACE_MARKER_TYPE_VERSION) &&
+            check_ref(refs[0], idx, TID_BASE, TRACE_TYPE_MARKER,
+                      TRACE_MARKER_TYPE_TIMESTAMP, TIMESTAMP) &&
+            check_ref(refs[0], idx, TID_BASE, TRACE_TYPE_INSTR) &&
+            check_ref(refs[0], idx, TID_BASE, TRACE_TYPE_MARKER,
+                      TRACE_MARKER_TYPE_SYSCALL, SYSCALL_BASE) &&
+
+            // Syscall_1 trace on first thread.
+            check_ref(refs[0], idx, TID_BASE, TRACE_TYPE_MARKER,
+                      TRACE_MARKER_TYPE_SYSCALL_TRACE_START, SYSCALL_BASE) &&
+            check_ref(refs[0], idx, TID_BASE, TRACE_TYPE_INSTR) &&
+            check_ref(refs[0], idx, TID_BASE, TRACE_TYPE_INSTR) &&
+            check_ref(refs[0], idx, TID_BASE, TRACE_TYPE_MARKER,
+                      TRACE_MARKER_TYPE_SYSCALL_TRACE_END, SYSCALL_BASE) &&
+
+            check_ref(refs[0], idx, TID_BASE + 2, TRACE_TYPE_MARKER,
+                      TRACE_MARKER_TYPE_VERSION) &&
+            check_ref(refs[0], idx, TID_BASE + 2, TRACE_TYPE_MARKER,
+                      TRACE_MARKER_TYPE_TIMESTAMP, TIMESTAMP) &&
+            check_ref(refs[0], idx, TID_BASE + 2, TRACE_TYPE_INSTR) &&
+            check_ref(refs[0], idx, TID_BASE + 2, TRACE_TYPE_MARKER,
+                      TRACE_MARKER_TYPE_SYSCALL, SYSCALL_BASE) &&
+
+            // Syscall_1 trace on second thread.
+            check_ref(refs[0], idx, TID_BASE + 2, TRACE_TYPE_MARKER,
+                      TRACE_MARKER_TYPE_SYSCALL_TRACE_START, SYSCALL_BASE) &&
+            check_ref(refs[0], idx, TID_BASE + 2, TRACE_TYPE_INSTR) &&
+            check_ref(refs[0], idx, TID_BASE + 2, TRACE_TYPE_INSTR) &&
+            check_ref(refs[0], idx, TID_BASE + 2, TRACE_TYPE_MARKER,
+                      TRACE_MARKER_TYPE_SYSCALL_TRACE_END, SYSCALL_BASE) &&
+
+            check_ref(refs[0], idx, TID_BASE, TRACE_TYPE_INSTR) &&
+            check_ref(refs[0], idx, TID_BASE, TRACE_TYPE_INSTR) &&
+            check_ref(refs[0], idx, TID_BASE, TRACE_TYPE_MARKER,
+                      TRACE_MARKER_TYPE_SYSCALL, SYSCALL_BASE + 1) &&
+            // Syscall_2 trace on first thread.
+            check_ref(refs[0], idx, TID_BASE, TRACE_TYPE_MARKER,
+                      TRACE_MARKER_TYPE_SYSCALL_TRACE_START, SYSCALL_BASE + 1) &&
+            check_ref(refs[0], idx, TID_BASE, TRACE_TYPE_INSTR) &&
+            check_ref(refs[0], idx, TID_BASE, TRACE_TYPE_INSTR) &&
+            check_ref(refs[0], idx, TID_BASE, TRACE_TYPE_INSTR) &&
+            check_ref(refs[0], idx, TID_BASE, TRACE_TYPE_MARKER,
+                      TRACE_MARKER_TYPE_SYSCALL_TRACE_END, SYSCALL_BASE + 1);
+        assert(res);
+    }
+    {
+        // Test a bad input sequence.
+        std::vector<trace_entry_t> bad_syscall_sequence = {
+            /* clang-format off */
+        make_header(TRACE_ENTRY_VERSION),
+        make_thread(TID_IN_SYSCALLS),
+        make_pid(TID_IN_SYSCALLS),
+        make_version(TRACE_ENTRY_VERSION),
+        make_marker(TRACE_MARKER_TYPE_SYSCALL_TRACE_START, SYSCALL_BASE),
+        make_instr(SYSCALL_PC_START),
+        make_instr(SYSCALL_PC_START + 1),
+        make_marker(TRACE_MARKER_TYPE_SYSCALL_TRACE_END, SYSCALL_BASE),
+        // Error: duplicate trace for the same syscall.
+        make_marker(TRACE_MARKER_TYPE_SYSCALL_TRACE_START, SYSCALL_BASE),
+        make_instr(SYSCALL_PC_START),
+        make_instr(SYSCALL_PC_START + 1),
+        make_marker(TRACE_MARKER_TYPE_SYSCALL_TRACE_END, SYSCALL_BASE),
+        make_exit(TID_IN_SYSCALLS),
+        make_footer(),
+            /* clang-format on */
+        };
+        auto bad_syscall_reader =
+            std::unique_ptr<mock_reader_t>(new mock_reader_t(bad_syscall_sequence));
+        auto bad_syscall_reader_end = std::unique_ptr<mock_reader_t>(new mock_reader_t());
+        std::vector<scheduler_t::input_workload_t> test_sched_inputs;
+        std::vector<scheduler_t::input_reader_t> readers;
+        std::vector<trace_entry_t> inputs;
+        inputs.push_back(make_header(TRACE_ENTRY_VERSION));
+        readers.emplace_back(std::unique_ptr<mock_reader_t>(new mock_reader_t(inputs)),
+                             std::unique_ptr<mock_reader_t>(new mock_reader_t()),
+                             TID_BASE);
+        test_sched_inputs.emplace_back(std::move(readers));
+        scheduler_t::scheduler_options_t test_sched_ops(
+            scheduler_t::MAP_TO_ANY_OUTPUT, scheduler_t::DEPENDENCY_TIMESTAMPS,
+            scheduler_t::SCHEDULER_DEFAULTS);
+        test_sched_ops.kernel_syscall_reader = std::move(bad_syscall_reader);
+        test_sched_ops.kernel_syscall_reader_end = std::move(bad_syscall_reader_end);
+        scheduler_t test_scheduler;
+        if (test_scheduler.init(test_sched_inputs, NUM_OUTPUTS,
+                                std::move(test_sched_ops)) !=
+            scheduler_t::STATUS_ERROR_INVALID_PARAMETER)
+            assert(false);
+    }
+}
+
 void
 test_random_schedule()
 {
@@ -6731,6 +6948,7 @@ test_main(int argc, const char *argv[])
     test_direct_switch();
     test_unscheduled();
     test_kernel_switch_sequences();
+    test_kernel_syscall_sequences();
     test_random_schedule();
     test_record_scheduler();
     test_rebalancing();
diff --git a/clients/drcachesim/tests/syscall_file_invariants.templatex b/clients/drcachesim/tests/syscall_file_invariants.templatex
new file mode 100644
index 0000000000..30eaadb34c
--- /dev/null
+++ b/clients/drcachesim/tests/syscall_file_invariants.templatex
@@ -0,0 +1 @@
+Trace invariant checks passed
diff --git a/clients/drcachesim/tests/syscall_insertion.templatex b/clients/drcachesim/tests/syscall_insertion.templatex
new file mode 100644
index 0000000000..33f8729f99
--- /dev/null
+++ b/clients/drcachesim/tests/syscall_insertion.templatex
@@ -0,0 +1,8 @@
+Basic counts tool results:
+Total counts:
+      [1-9][0-9][0-9][0-9][0-9][0-9] total \(fetched\) instructions
+        5971 total unique \(fetched\) instructions
+      638938 total userspace instructions
+         109 total kernel instructions
+      [1-9][0-9][0-9][0-9][0-9][0-9] total non-fetched instructions
+.*
diff --git a/clients/drcachesim/tests/syscall_insertion_core_sharded.templatex b/clients/drcachesim/tests/syscall_insertion_core_sharded.templatex
new file mode 100644
index 0000000000..33f8729f99
--- /dev/null
+++ b/clients/drcachesim/tests/syscall_insertion_core_sharded.templatex
@@ -0,0 +1,8 @@
+Basic counts tool results:
+Total counts:
+      [1-9][0-9][0-9][0-9][0-9][0-9] total \(fetched\) instructions
+        5971 total unique \(fetched\) instructions
+      638938 total userspace instructions
+         109 total kernel instructions
+      [1-9][0-9][0-9][0-9][0-9][0-9] total non-fetched instructions
+.*
diff --git a/suite/tests/CMakeLists.txt b/suite/tests/CMakeLists.txt
index 9ec5d3d6f4..d32baf60e1 100644
--- a/suite/tests/CMakeLists.txt
+++ b/suite/tests/CMakeLists.txt
@@ -4076,6 +4076,24 @@ if (BUILD_CLIENTS)
           "")
         set(tool.switch_file_invariants_rawtemp ON) # no preprocessor
 
+        set(syscall_file
+          "${PROJECT_SOURCE_DIR}/clients/drcachesim/tests/mock_syscall_sequences.x64")
+
+        torunonly_simtool(syscall_insertion ${ci_shared_app}
+          "-indir ${thread_trace_dir} -tool basic_counts -sched_syscall_file ${syscall_file}"
+          "")
+        set(tool.syscall_insertion_rawtemp ON) # no preprocessor
+
+        torunonly_simtool(syscall_insertion_core_sharded ${ci_shared_app}
+          "-indir ${thread_trace_dir} -tool basic_counts -core_sharded -sched_quantum 1000 -sched_syscall_file ${syscall_file}"
+          "")
+        set(tool.syscall_insertion_core_sharded_rawtemp ON) # no preprocessor
+
+        torunonly_simtool(syscall_file_invariants ${ci_shared_app}
+          "-infile ${syscall_file} -tool invariant_checker"
+          "")
+        set(tool.syscall_file_invariants_rawtemp ON) # no preprocessor
+
         # Test -multi_indir with 3 copies of our sample dir.
         torunonly_simtool(multi_indir ${ci_shared_app}
           "-multi_indir ${thread_trace_dir}:${thread_trace_dir}:${thread_trace_dir} -tool schedule_stats -cores 3"