diff --git a/components/core/config/schemas.txt b/components/core/config/schemas.txt
index e0b777859d..9c09fb5d45 100644
--- a/components/core/config/schemas.txt
+++ b/components/core/config/schemas.txt
@@ -16,4 +16,4 @@ float:\-{0,1}[0-9]+\.[0-9]+
 // Dictionary variables
 hex:[a-fA-F]+
 hasNumber:.*\d.*
-equals:.*=.*[a-zA-Z0-9].*
+equals:.*=(?<var>.*[a-zA-Z0-9].*)
diff --git a/components/core/src/clp/GrepCore.cpp b/components/core/src/clp/GrepCore.cpp
index 1a4bf499e2..b8abf5a980 100644
--- a/components/core/src/clp/GrepCore.cpp
+++ b/components/core/src/clp/GrepCore.cpp
@@ -257,9 +257,9 @@ bool GrepCore::get_bounds_of_next_potential_var(
                         return false;
                     }
                     search_token = SearchToken{token.value()};
-                    search_token.m_type_ids_set.insert(search_token.m_type_ids_ptr->at(0));
+                    search_token.m_type_ids_set.insert(search_token.get_type_ids()->at(0));
                 }
-                auto const& type = search_token.m_type_ids_ptr->at(0);
+                auto const& type = search_token.get_type_ids()->at(0);
                 if (type != static_cast<int>(log_surgeon::SymbolId::TokenUncaughtString)
                     && type != static_cast<int>(log_surgeon::SymbolId::TokenEnd))
                 {
diff --git a/components/core/src/clp/Utils.cpp b/components/core/src/clp/Utils.cpp
index 5a4dc87b6f..dfb753fbd1 100644
--- a/components/core/src/clp/Utils.cpp
+++ b/components/core/src/clp/Utils.cpp
@@ -8,6 +8,7 @@
 #include <iostream>
 #include <memory>
 #include <set>
+#include <string>
 
 #include <boost/algorithm/string.hpp>
 #include <boost/lexical_cast.hpp>
@@ -187,6 +188,18 @@ load_lexer_from_file(std::string const& schema_file_path, log_surgeon::lexers::B
     for (std::unique_ptr<log_surgeon::ParserAST> const& parser_ast : schema_ast->m_schema_vars) {
         auto* rule = dynamic_cast<log_surgeon::SchemaVarAST*>(parser_ast.get());
 
+        // Currently, we only support at most a single capture group in each variable. If a capture
+        // group is present its match will be treated as the variable rather than the full match.
+        auto const num_captures = rule->m_regex_ptr->get_subtree_positive_captures().size();
+        if (1 < num_captures) {
+            throw std::runtime_error(
+                    schema_file_path + ":" + std::to_string(rule->m_line_num + 1)
+                    + ": error: the schema rule '" + rule->m_name
+                    + "' has a regex pattern containing > 1 capture groups (found "
+                    + std::to_string(num_captures) + ").\n"
+            );
+        }
+
         if ("timestamp" == rule->m_name) {
             continue;
         }
diff --git a/components/core/src/clp/streaming_archive/writer/Archive.cpp b/components/core/src/clp/streaming_archive/writer/Archive.cpp
index 3b8df0d73a..dcc191f5bd 100644
--- a/components/core/src/clp/streaming_archive/writer/Archive.cpp
+++ b/components/core/src/clp/streaming_archive/writer/Archive.cpp
@@ -2,16 +2,20 @@
 
 #include <sys/stat.h>
 
+#include <cstdint>
 #include <filesystem>
-#include <fstream>
-#include <iostream>
+#include <stdexcept>
+#include <string>
+#include <vector>
 
 #include <boost/asio.hpp>
 #include <boost/uuid/uuid.hpp>
 #include <boost/uuid/uuid_generators.hpp>
 #include <boost/uuid/uuid_io.hpp>
+#include <log_surgeon/Constants.hpp>
 #include <log_surgeon/LogEvent.hpp>
 #include <log_surgeon/LogParser.hpp>
+#include <log_surgeon/Token.hpp>
 #include <nlohmann/json.hpp>
 
 #include "../../EncodedVariableInterpreter.hpp"
@@ -19,15 +23,12 @@
 #include "../../spdlog_with_specializations.hpp"
 #include "../../Utils.hpp"
 #include "../Constants.hpp"
+#include "TimestampPattern.hpp"
 #include "utils.hpp"
 
 using clp::ir::eight_byte_encoded_variable_t;
 using clp::ir::four_byte_encoded_variable_t;
-using log_surgeon::LogEventView;
-using std::list;
-using std::make_unique;
 using std::string;
-using std::unordered_set;
 using std::vector;
 
 namespace clp::streaming_archive::writer {
@@ -315,15 +316,124 @@ Archive::write_msg(epochtime_t timestamp, string const& message, size_t num_unco
     update_segment_indices(logtype_id, var_ids);
 }
 
-void Archive::write_msg_using_schema(LogEventView const& log_view) {
-    epochtime_t timestamp = 0;
-    TimestampPattern* timestamp_pattern = nullptr;
-    auto const& log_output_buffer = log_view.get_log_output_buffer();
-    if (log_output_buffer->has_timestamp()) {
-        size_t start;
-        size_t end;
-        timestamp_pattern = (TimestampPattern*)TimestampPattern::search_known_ts_patterns(
-                log_output_buffer->get_mutable_token(0).to_string(),
+auto Archive::add_token_to_dicts(
+        log_surgeon::LogEventView const& log_view,
+        log_surgeon::Token token_view
+) -> void {
+    auto const* type_ids{token_view.get_type_ids()};
+    if (nullptr == type_ids || type_ids->empty()) {
+        throw std::runtime_error("Token has no type IDs: " + token_view.to_string());
+    }
+    auto const token_type{type_ids->at(0)};
+    switch (token_type) {
+        case static_cast<int>(log_surgeon::SymbolId::TokenNewline):
+        case static_cast<int>(log_surgeon::SymbolId::TokenUncaughtString): {
+            m_logtype_dict_entry.add_constant(token_view.to_string(), 0, token_view.get_length());
+            break;
+        }
+        case static_cast<int>(log_surgeon::SymbolId::TokenInt): {
+            encoded_variable_t encoded_var{};
+            if (false
+                == EncodedVariableInterpreter::convert_string_to_representable_integer_var(
+                        token_view.to_string(),
+                        encoded_var
+                ))
+            {
+                variable_dictionary_id_t id{};
+                m_var_dict.add_entry(token_view.to_string(), id);
+                encoded_var = EncodedVariableInterpreter::encode_var_dict_id(id);
+                m_logtype_dict_entry.add_dictionary_var();
+            } else {
+                m_logtype_dict_entry.add_int_var();
+            }
+            m_encoded_vars.push_back(encoded_var);
+            break;
+        }
+        case static_cast<int>(log_surgeon::SymbolId::TokenFloat): {
+            encoded_variable_t encoded_var{};
+            if (false
+                == EncodedVariableInterpreter::convert_string_to_representable_float_var(
+                        token_view.to_string(),
+                        encoded_var
+                ))
+            {
+                variable_dictionary_id_t id{};
+                m_var_dict.add_entry(token_view.to_string(), id);
+                encoded_var = EncodedVariableInterpreter::encode_var_dict_id(id);
+                m_logtype_dict_entry.add_dictionary_var();
+            } else {
+                m_logtype_dict_entry.add_float_var();
+            }
+            m_encoded_vars.push_back(encoded_var);
+            break;
+        }
+        default: {
+            // If there are no capture groups the entire variable token is stored as a variable.
+            // If the variable token contains capture groups, we break the token up by storing
+            // each capture as a variable and any substrings surrounding the capture as part of
+            // the logtype. Capture repetition currently does not work so we explicitly only
+            // store the first capture.
+
+            auto const& lexer{log_view.get_log_parser().m_lexer};
+            auto capture_ids{lexer.get_capture_ids_from_rule_id(token_type)};
+            if (false == capture_ids.has_value()) {
+                variable_dictionary_id_t id{};
+                m_var_dict.add_entry(token_view.to_string(), id);
+                m_var_ids.push_back(id);
+                m_encoded_vars.push_back(EncodedVariableInterpreter::encode_var_dict_id(id));
+                m_logtype_dict_entry.add_dictionary_var();
+                break;
+            }
+
+            auto const register_ids{lexer.get_reg_ids_from_capture_id(capture_ids.value().at(0))};
+            if (false == register_ids.has_value()) {
+                throw(std::runtime_error(
+                        "No register IDs found for variable's capture group. Full token: "
+                        + token_view.to_string()
+                ));
+            }
+
+            auto const [start_reg_id, end_reg_id]{register_ids.value()};
+            auto const start_positions{token_view.get_reversed_reg_positions(start_reg_id)};
+            auto const end_positions{token_view.get_reversed_reg_positions(end_reg_id)};
+
+            if (false == start_positions.empty() && -1 < start_positions[0]
+                && false == end_positions.empty() && -1 < end_positions[0])
+            {
+                auto token_end{token_view.get_end_pos()};
+
+                token_view.set_end_pos(start_positions[0]);
+                auto const before_capture{token_view.to_string_view()};
+                m_logtype_dict_entry.add_constant(before_capture, 0, before_capture.size());
+
+                token_view.set_start_pos(start_positions[0]);
+                token_view.set_end_pos(end_positions[0]);
+
+                variable_dictionary_id_t id{};
+                m_var_dict.add_entry(token_view.to_string_view(), id);
+                m_var_ids.push_back(id);
+                m_encoded_vars.push_back(EncodedVariableInterpreter::encode_var_dict_id(id));
+                m_logtype_dict_entry.add_dictionary_var();
+
+                token_view.set_start_pos(end_positions[0]);
+                token_view.set_end_pos(token_end);
+                auto const after_capture{token_view.to_string_view()};
+                m_logtype_dict_entry.add_constant(after_capture, 0, after_capture.size());
+            }
+            break;
+        }
+    }
+}
+
+void Archive::write_msg_using_schema(log_surgeon::LogEventView const& log_view) {
+    epochtime_t timestamp{0};
+    TimestampPattern const* timestamp_pattern{nullptr};
+    auto const& log_buf = log_view.get_log_output_buffer();
+    if (log_buf->has_timestamp()) {
+        size_t start{};
+        size_t end{};
+        timestamp_pattern = TimestampPattern::search_known_ts_patterns(
+                log_buf->get_mutable_token(0).to_string(),
                 timestamp,
                 start,
                 end
@@ -331,14 +441,16 @@ void Archive::write_msg_using_schema(LogEventView const& log_view) {
         if (nullptr == timestamp_pattern) {
             throw(std::runtime_error(
                     "Schema contains a timestamp regex that matches "
-                    + log_output_buffer->get_mutable_token(0).to_string()
+                    + log_buf->get_mutable_token(0).to_string()
                     + " which does not match any known timestamp pattern."
             ));
         }
         if (m_old_ts_pattern != timestamp_pattern) {
             change_ts_pattern(timestamp_pattern);
-            m_old_ts_pattern = timestamp_pattern;
+            m_old_ts_pattern = const_cast<TimestampPattern*>(timestamp_pattern);
         }
+    } else {
+        timestamp_pattern = nullptr;
     }
     if (get_data_size_of_dictionaries() >= m_target_data_size_of_dicts) {
         split_file_and_archive(
@@ -354,89 +466,37 @@ void Archive::write_msg_using_schema(LogEventView const& log_view) {
     m_encoded_vars.clear();
     m_var_ids.clear();
     m_logtype_dict_entry.clear();
-    size_t num_uncompressed_bytes = 0;
+
+    size_t num_uncompressed_bytes{0};
     // Timestamp is included in the uncompressed message size
-    uint32_t start_pos = log_output_buffer->get_token(0).m_start_pos;
+    auto start_pos{log_buf->get_token(0).get_start_pos()};
     if (timestamp_pattern == nullptr) {
-        start_pos = log_output_buffer->get_token(1).m_start_pos;
+        start_pos = log_buf->get_token(1).get_start_pos();
     }
-    uint32_t end_pos = log_output_buffer->get_token(log_output_buffer->pos() - 1).m_end_pos;
+    auto const end_pos{log_buf->get_token(log_buf->pos() - 1).get_end_pos()};
     if (start_pos <= end_pos) {
         num_uncompressed_bytes = end_pos - start_pos;
     } else {
-        num_uncompressed_bytes
-                = log_output_buffer->get_token(0).m_buffer_size - start_pos + end_pos;
+        num_uncompressed_bytes = log_buf->get_token(0).get_buffer_size() - start_pos + end_pos;
     }
-    for (uint32_t i = 1; i < log_output_buffer->pos(); i++) {
-        log_surgeon::Token& token = log_output_buffer->get_mutable_token(i);
-        int token_type = token.m_type_ids_ptr->at(0);
-        if (log_output_buffer->has_delimiters() && (timestamp_pattern != nullptr || i > 1)
+    for (auto token_idx{1}; token_idx < log_buf->pos(); token_idx++) {
+        auto token_view{log_buf->get_token(token_idx)};
+        auto const* type_ids{token_view.get_type_ids()};
+        if (nullptr == type_ids || type_ids->empty()) {
+            throw std::runtime_error("Token has no type IDs: " + token_view.to_string());
+        }
+        auto const token_type{type_ids->at(0)};
+        if (log_buf->has_delimiters() && (timestamp_pattern != nullptr || token_idx > 1)
             && token_type != static_cast<int>(log_surgeon::SymbolId::TokenUncaughtString)
             && token_type != static_cast<int>(log_surgeon::SymbolId::TokenNewline))
         {
-            m_logtype_dict_entry.add_constant(token.get_delimiter(), 0, 1);
-            if (token.m_start_pos == token.m_buffer_size - 1) {
-                token.m_start_pos = 0;
-            } else {
-                token.m_start_pos++;
-            }
-        }
-        switch (token_type) {
-            case static_cast<int>(log_surgeon::SymbolId::TokenNewline):
-            case static_cast<int>(log_surgeon::SymbolId::TokenUncaughtString): {
-                m_logtype_dict_entry.add_constant(token.to_string(), 0, token.get_length());
-                break;
-            }
-            case static_cast<int>(log_surgeon::SymbolId::TokenInt): {
-                encoded_variable_t encoded_var;
-                if (!EncodedVariableInterpreter::convert_string_to_representable_integer_var(
-                            token.to_string(),
-                            encoded_var
-                    ))
-                {
-                    variable_dictionary_id_t id;
-                    m_var_dict.add_entry(token.to_string(), id);
-                    encoded_var = EncodedVariableInterpreter::encode_var_dict_id(id);
-                    m_logtype_dict_entry.add_dictionary_var();
-                } else {
-                    m_logtype_dict_entry.add_int_var();
-                }
-                m_encoded_vars.push_back(encoded_var);
-                break;
-            }
-            case static_cast<int>(log_surgeon::SymbolId::TokenFloat): {
-                encoded_variable_t encoded_var;
-                if (!EncodedVariableInterpreter::convert_string_to_representable_float_var(
-                            token.to_string(),
-                            encoded_var
-                    ))
-                {
-                    variable_dictionary_id_t id;
-                    m_var_dict.add_entry(token.to_string(), id);
-                    encoded_var = EncodedVariableInterpreter::encode_var_dict_id(id);
-                    m_logtype_dict_entry.add_dictionary_var();
-                } else {
-                    m_logtype_dict_entry.add_float_var();
-                }
-                m_encoded_vars.push_back(encoded_var);
-                break;
-            }
-            default: {
-                // Variable string looks like a dictionary variable, so encode it as so
-                encoded_variable_t encoded_var;
-                variable_dictionary_id_t id;
-                m_var_dict.add_entry(token.to_string(), id);
-                encoded_var = EncodedVariableInterpreter::encode_var_dict_id(id);
-                m_var_ids.push_back(id);
-
-                m_logtype_dict_entry.add_dictionary_var();
-                m_encoded_vars.push_back(encoded_var);
-                break;
-            }
+            m_logtype_dict_entry.add_constant(token_view.get_delimiter(), 0, 1);
+            token_view.increment_start_pos();
         }
+        add_token_to_dicts(log_view, token_view);
     }
-    if (!m_logtype_dict_entry.get_value().empty()) {
-        logtype_dictionary_id_t logtype_id;
+    if (false == m_logtype_dict_entry.get_value().empty()) {
+        logtype_dictionary_id_t logtype_id{};
         m_logtype_dict.add_entry(m_logtype_dict_entry, logtype_id);
         m_file->write_encoded_msg(
                 timestamp,
diff --git a/components/core/src/clp/streaming_archive/writer/Archive.hpp b/components/core/src/clp/streaming_archive/writer/Archive.hpp
index 2b589881a5..b1351199a2 100644
--- a/components/core/src/clp/streaming_archive/writer/Archive.hpp
+++ b/components/core/src/clp/streaming_archive/writer/Archive.hpp
@@ -12,7 +12,6 @@
 #include <boost/uuid/random_generator.hpp>
 #include <boost/uuid/uuid.hpp>
 #include <log_surgeon/LogEvent.hpp>
-#include <log_surgeon/ReaderParser.hpp>
 
 #include "../../ArrayBackedPosIntSet.hpp"
 #include "../../ErrorCode.hpp"
@@ -150,7 +149,7 @@ class Archive {
      * @param log_event_view
      * @throw FileWriter::OperationFailed if any write fails
      */
-    void write_msg_using_schema(log_surgeon::LogEventView const& log_event_view);
+    auto write_msg_using_schema(log_surgeon::LogEventView const& log_view) -> void;
 
     /**
      * Writes an IR log event to the current encoded file
@@ -290,6 +289,15 @@ class Archive {
      */
     auto update_global_metadata() -> void;
 
+    /**
+     * Inspect a log surgeon token and add its information to the logtype and variable dictionaries.
+     * @param log_view The log event containing the token.
+     * @param token_view The token to add to the dictionaries.
+     */
+    auto
+    add_token_to_dicts(log_surgeon::LogEventView const& log_view, log_surgeon::Token token_view)
+            -> void;
+
     // Variables
     boost::uuids::uuid m_id;
     std::string m_id_as_string;
diff --git a/components/core/tests/test-ParserWithUserSchema.cpp b/components/core/tests/test-ParserWithUserSchema.cpp
index 3ab9bfad72..b51d212f9a 100644
--- a/components/core/tests/test-ParserWithUserSchema.cpp
+++ b/components/core/tests/test-ParserWithUserSchema.cpp
@@ -1,37 +1,49 @@
 // TODO: move this test to log_surgeon
 // TODO: move load_lexer_from_file into SearchParser in log_surgeon
 
-#include <sys/stat.h>
-
 #include <filesystem>
 #include <string>
+#include <string_view>
 #include <utility>
+#include <vector>
 
 #include <catch2/catch_test_macros.hpp>
 #include <catch2/matchers/catch_matchers.hpp>
+#include <fmt/format.h>
+#include <log_surgeon/Lexer.hpp>
 #include <log_surgeon/LogParser.hpp>
 #include <log_surgeon/SchemaParser.hpp>
+#include <log_surgeon/Token.hpp>
+#include <spdlog/spdlog.h>
+
+#include <clp/clp/run.hpp>
+#include <clp/FileReader.hpp>
+#include <clp/ir/types.hpp>
+#include <clp/LogSurgeonReader.hpp>
+#include <clp/streaming_archive/Constants.hpp>
+#include <clp/streaming_archive/reader/Archive.hpp>
+#include <clp/type_utils.hpp>
+#include <clp/Utils.hpp>
 
-#include "../src/clp/clp/run.hpp"
-#include "../src/clp/GlobalMySQLMetadataDB.hpp"
-#include "../src/clp/LogSurgeonReader.hpp"
-#include "../src/clp/Utils.hpp"
+#include "TestOutputCleaner.hpp"
 
-using clp::FileReader;
 using clp::load_lexer_from_file;
-using clp::LogSurgeonReader;
-using log_surgeon::DelimiterStringAST;
 using log_surgeon::lexers::ByteLexer;
 using log_surgeon::LogParser;
-using log_surgeon::ParserAST;
 using log_surgeon::SchemaAST;
-using log_surgeon::SchemaVarAST;
-using log_surgeon::Token;
 
 namespace {
+constexpr std::string_view cTestArchiveDirectory{"test-parser-with-user-schema-archive"};
+
+auto run_clp_compress(
+        std::filesystem::path const& schema_path,
+        std::filesystem::path const& output_path,
+        std::filesystem::path const& input_path
+) -> int;
 [[nodiscard]] auto get_tests_dir() -> std::filesystem::path;
 [[nodiscard]] auto get_test_schema_files_dir() -> std::filesystem::path;
 [[nodiscard]] auto get_test_queries_dir() -> std::filesystem::path;
+[[nodiscard]] auto get_test_log_dir() -> std::filesystem::path;
 
 auto get_tests_dir() -> std::filesystem::path {
     std::filesystem::path const current_file_path{__FILE__};
@@ -45,6 +57,30 @@ auto get_test_schema_files_dir() -> std::filesystem::path {
 auto get_test_queries_dir() -> std::filesystem::path {
     return get_tests_dir() / "test_search_queries";
 }
+
+auto get_test_log_dir() -> std::filesystem::path {
+    return get_tests_dir() / "test_log_files";
+}
+
+auto run_clp_compress(
+        std::filesystem::path const& schema_path,
+        std::filesystem::path const& output_path,
+        std::filesystem::path const& input_path
+) -> int {
+    auto const schema_path_str{schema_path.string()};
+    auto const output_path_str{output_path.string()};
+    auto const input_path_str{input_path.string()};
+    std::vector<char const*> argv{
+            "clp",
+            "c",
+            "--schema-path",
+            schema_path_str.data(),
+            output_path_str.data(),
+            input_path_str.data(),
+            nullptr
+    };
+    return clp::clp::run(static_cast<int>(argv.size() - 1), argv.data());
+}
 }  // namespace
 
 std::unique_ptr<SchemaAST> generate_schema_ast(std::string const& schema_file) {
@@ -60,43 +96,6 @@ std::unique_ptr<LogParser> generate_log_parser(std::string const& schema_file) {
     return log_parser;
 }
 
-void compress(
-        std::string const& output_dir,
-        std::string const& file_to_compress,
-        std::string schema_file,
-        bool old = false
-) {
-    std::vector<std::string> arguments;
-    if (old) {
-        arguments = {"main.cpp", "c", output_dir, file_to_compress};
-    } else {
-        arguments
-                = {"main.cpp",
-                   "c",
-                   output_dir,
-                   file_to_compress,
-                   "--schema-path",
-                   std::move(schema_file)};
-    }
-    std::vector<char const*> argv;
-    for (auto const& arg : arguments) {
-        argv.push_back(arg.data());
-    }
-    argv.push_back(nullptr);
-    clp::clp::run(argv.size() - 1, argv.data());
-}
-
-void decompress(std::string archive_dir, std::string output_dir) {
-    std::vector<std::string> arguments
-            = {"main.cpp", "x", std::move(archive_dir), std::move(output_dir)};
-    std::vector<char const*> argv;
-    for (auto const& arg : arguments) {
-        argv.push_back(arg.data());
-    }
-    argv.push_back(nullptr);
-    clp::clp::run(argv.size() - 1, argv.data());
-}
-
 TEST_CASE("Test error for missing schema file", "[LALR1Parser][SchemaParser]") {
     auto const file_path = get_test_schema_files_dir() / "missing_schema.txt";
     auto const file_path_string = file_path.string();
@@ -156,49 +155,86 @@ TEST_CASE("Test creating log parser without delimiters", "[LALR1Parser][LogParse
     );
 }
 
-// TODO: This test doesn't currently work because delimiters are allowed in
-// schema files, and there is no option to disable this yet
-// TEST_CASE("Test error for creating log file with delimiter in regex pattern",
-//          "[LALR1Parser]SchemaParser]") {
-//    std::string file_path = "../tests/test_schema_files/schema_with_delimiter_in_regex_error.txt";
-//    std::string file_name = boost::filesystem::canonical(file_path).string();
-//    REQUIRE_THROWS_WITH(generate_log_parser(file_path),
-//                        file_name +
-//                        ":2: error: 'equals' has regex pattern which contains delimiter '='.\n"
-//                        + "          equals:.*=.*\n"
-//                        + "                 ^^^^^\n");
-//}
-
-// TODO: This error check is performed correctly by CLP, but it is handled by
-// something different now so this test will fail as is
-// TEST_CASE("Test error for missing log file", "[LALR1Parser][LogParser]") {
-//    std::string file_name = "../tests/test_log_files/missing_log.txt";
-//    std::string file_path = boost::filesystem::weakly_canonical(file_name).string();
-//    REQUIRE_THROWS(compress("../tests/test_archives", file_name,
-//                            "../tests/test_schema_files/schema_that_does_not_exist.txt"),
-//                   "Specified schema file does not exist.");
-//}
-
 TEST_CASE("Test lexer", "[Search]") {
     ByteLexer lexer;
     auto const schema_file_path = get_test_schema_files_dir() / "search_schema.txt";
     load_lexer_from_file(schema_file_path.string(), lexer);
     auto const query_file_path = get_test_queries_dir() / "easy.txt";
-    FileReader file_reader{query_file_path.string()};
-    LogSurgeonReader reader_wrapper(file_reader);
+    clp::FileReader file_reader{query_file_path.string()};
+    clp::LogSurgeonReader reader_wrapper(file_reader);
     log_surgeon::ParserInputBuffer parser_input_buffer;
     parser_input_buffer.read_if_safe(reader_wrapper);
     lexer.reset();
     auto [error_code, opt_token] = lexer.scan(parser_input_buffer);
     REQUIRE(error_code == log_surgeon::ErrorCode::Success);
-    Token token{opt_token.value()};
-    while (token.m_type_ids_ptr->at(0) != static_cast<int>(log_surgeon::SymbolId::TokenEnd)) {
+    auto token{opt_token.value()};
+    while (token.get_type_ids()->at(0) != static_cast<int>(log_surgeon::SymbolId::TokenEnd)) {
         SPDLOG_INFO("token:" + token.to_string() + "\n");
         SPDLOG_INFO(
-                "token.m_type_ids->back():" + lexer.m_id_symbol[token.m_type_ids_ptr->back()] + "\n"
+                "token.get_type_ids()->back():" + lexer.m_id_symbol[token.get_type_ids()->back()]
+                + "\n"
         );
         auto [error_code, opt_token] = lexer.scan(parser_input_buffer);
         REQUIRE(error_code == log_surgeon::ErrorCode::Success);
         token = opt_token.value();
     }
 }
+
+TEST_CASE("Test schema with single capture group", "[load_lexer]") {
+    auto const schema_file_path{get_test_schema_files_dir() / "single_capture_group.txt"};
+    ByteLexer lexer;
+    load_lexer_from_file(schema_file_path, lexer);
+
+    auto const rule_id{lexer.m_symbol_id.at("capture")};
+    auto const capture_ids{lexer.get_capture_ids_from_rule_id(rule_id)};
+    REQUIRE(capture_ids.has_value());
+    REQUIRE(1 == capture_ids->size());
+    REQUIRE("group" == lexer.m_id_symbol.at(capture_ids->at(0)));
+}
+
+TEST_CASE("Error on schema rule with multiple capture groups", "[load_lexer]") {
+    auto const schema_file_path{get_test_schema_files_dir() / "multiple_capture_groups.txt"};
+    ByteLexer lexer;
+    REQUIRE_THROWS_WITH(
+            load_lexer_from_file(schema_file_path, lexer),
+            schema_file_path.string()
+                    + ":3: error: the schema rule 'multicapture' has a regex pattern containing > "
+                      "1 capture groups (found 2).\n"
+    );
+}
+
+TEST_CASE("Verify dictionary contents", "[Compression]") {
+    auto const log_file_path{get_test_log_dir() / "log_with_capture.txt"};
+    auto const schema_file_path{get_test_schema_files_dir() / "single_capture_group.txt"};
+    TestOutputCleaner const cleaner{{std::string{cTestArchiveDirectory}}};
+    std::filesystem::create_directory(cTestArchiveDirectory);
+
+    REQUIRE(0 == run_clp_compress(schema_file_path, cTestArchiveDirectory, log_file_path));
+
+    std::vector<std::filesystem::path> archive_paths;
+    for (auto const& entry : std::filesystem::directory_iterator{cTestArchiveDirectory}) {
+        auto const& path{entry.path()};
+        if (false == path.string().ends_with(clp::streaming_archive::cMetadataDBFileName)) {
+            archive_paths.emplace_back(path);
+        }
+    }
+    REQUIRE(1 == archive_paths.size());
+
+    clp::streaming_archive::reader::Archive archive_reader;
+    archive_reader.open(archive_paths.at(0));
+    archive_reader.refresh_dictionaries();
+
+    auto const& logtype_dict{archive_reader.get_logtype_dictionary()};
+    REQUIRE(1 == logtype_dict.get_entries().size());
+    REQUIRE(fmt::format(
+                    "2016-05-08 07:34:05.251 MyDog{} APet{} test.txt\n",
+                    clp::enum_to_underlying_type(clp::ir::VariablePlaceholder::Dictionary),
+                    clp::enum_to_underlying_type(clp::ir::VariablePlaceholder::Dictionary)
+            )
+            == logtype_dict.get_value(0));
+
+    auto const& var_dict{archive_reader.get_var_dictionary()};
+    REQUIRE(2 == var_dict.get_entries().size());
+    REQUIRE("123" == var_dict.get_value(0));
+    REQUIRE("4123" == var_dict.get_value(1));
+}
diff --git a/components/core/tests/test_log_files/log_with_capture.txt b/components/core/tests/test_log_files/log_with_capture.txt
new file mode 100644
index 0000000000..6210ea8bdc
--- /dev/null
+++ b/components/core/tests/test_log_files/log_with_capture.txt
@@ -0,0 +1 @@
+2016-05-08 07:34:05.251 MyDog123 APet4123 test.txt
diff --git a/components/core/tests/test_schema_files/multiple_capture_groups.txt b/components/core/tests/test_schema_files/multiple_capture_groups.txt
new file mode 100644
index 0000000000..09998bc19d
--- /dev/null
+++ b/components/core/tests/test_schema_files/multiple_capture_groups.txt
@@ -0,0 +1,3 @@
+delimiters: \r\n
+
+multicapture:text(?<group0>var0)text(?<group1>var1)text
diff --git a/components/core/tests/test_schema_files/search_schema.txt b/components/core/tests/test_schema_files/search_schema.txt
index 60d0c12f00..f49a6dbfa4 100644
--- a/components/core/tests/test_schema_files/search_schema.txt
+++ b/components/core/tests/test_schema_files/search_schema.txt
@@ -1,5 +1,5 @@
 // Delimiters 
-delimiters: \r\n:,=!;%\?
+delimiters: \r\n:,=!;%?
 
 // First set of variables
 timestamp:[0-9]{4}\-[0-9]{2}\-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}(\.[0-9]{3}){0,1}
diff --git a/components/core/tests/test_schema_files/single_capture_group.txt b/components/core/tests/test_schema_files/single_capture_group.txt
new file mode 100644
index 0000000000..b9543fbdcc
--- /dev/null
+++ b/components/core/tests/test_schema_files/single_capture_group.txt
@@ -0,0 +1,3 @@
+delimiters: \r\n
+
+capture:[A-Za-z]+(?<group>\d+)
diff --git a/taskfiles/deps/main.yaml b/taskfiles/deps/main.yaml
index 927cedb2a1..a5a4785b26 100644
--- a/taskfiles/deps/main.yaml
+++ b/taskfiles/deps/main.yaml
@@ -367,8 +367,8 @@ tasks:
             - "-DCMAKE_INSTALL_MESSAGE=LAZY"
             - "-Dlog_surgeon_BUILD_TESTING=OFF"
           LIB_NAME: "log_surgeon"
-          TARBALL_SHA256: "4551ea50cd22e8423770fd66a167e1c86053b1f4957f72c582a2da93e7820210"
-          TARBALL_URL: "https://github.com/y-scope/log-surgeon/archive/840f262.tar.gz"
+          TARBALL_SHA256: "396ef8822e687b0bb8ca433a360e9e1d39a51620df89cac606db5e0f49d1b265"
+          TARBALL_URL: "https://github.com/y-scope/log-surgeon/archive/1135c2e.tar.gz"
 
   lz4:
     internal: true