-
Notifications
You must be signed in to change notification settings - Fork 250
Add Regex Engine to FEX for config option loading #5120
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Open
badumbatish
wants to merge
10
commits into
FEX-Emu:main
Choose a base branch
from
badumbatish:regex_engine
base: main
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
Open
Changes from 8 commits
Commits
Show all changes
10 commits
Select commit
Hold shift + click to select a range
55dbab2
Initial implementation for regex engine
badumbatish 8600427
Add support for question mark and plus mark in regex, supply testing …
badumbatish 06843cf
Added more characters to the regex alphabets, add more test case
badumbatish 6db5672
Added support for regex matching of configs, awaiting reviews
badumbatish dc1c1c5
Rename variable to CamelCase
badumbatish 806e266
Addresses PR reviews
badumbatish 99eca32
Merge from main
badumbatish 4573dce
Remove unnecessary features and test cases
badumbatish d212911
Rewrite to naive regex with dp
badumbatish cd965c5
Addresses PR reviews
badumbatish File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,190 @@ | ||
| /* | ||
| * Copyright (c) 2025 Rakshit Awasthi | ||
| * SPDX-License-Identifier: MIT | ||
| */ | ||
| #include "FEXCore/fextl/string.h" | ||
| #include <FEXCore/Utils/Regex.h> | ||
|
|
||
| #include <algorithm> | ||
| #include <cassert> | ||
| #include <cstdlib> | ||
| #include <functional> | ||
| #include <iostream> | ||
|
|
||
| // Implementation for FEX regex enginee, please see unittests/APITests/Regex.cpp | ||
| // for test cases | ||
| namespace FEXCore::Utils { | ||
| ///////////// | ||
| // STATE IMPL | ||
| ///////////// | ||
| void State::addEpsilonTransition(State *nextState) { | ||
| assert(nextState && "state needs to be non null for addEpsilonTransition"); | ||
| epsilonTransitions.push_back(nextState); | ||
| } | ||
|
|
||
| void State::addTransition(char c, State *nextState) { | ||
| assert(nextState && "state needs to be non null for addTransition"); | ||
| transitions[c].push_back(nextState); | ||
| } | ||
|
|
||
| ///////////// | ||
| // NFA IMPL | ||
| ///////////// | ||
| NFA::NFA() { | ||
| fextl::unique_ptr<State> start = fextl::make_unique<State>(); | ||
| fextl::unique_ptr<State> accepting = fextl::make_unique<State>(true); | ||
| startState = start.get(); | ||
| acceptingState = accepting.get(); | ||
|
|
||
| // transfer the ownership to the states vector | ||
| states.push_back(std::move(start)); | ||
| states.push_back(std::move(accepting)); | ||
| } | ||
|
|
||
| void NFA::acquireStatesFrom(NFA &other) { | ||
| for (auto &s : other.states) | ||
| this->states.push_back(std::move(s)); | ||
|
|
||
| other.states.clear(); | ||
| } | ||
|
|
||
| NFA NFA::createForEpsilon() { | ||
| NFA nfa; | ||
| nfa.startState->addEpsilonTransition(nfa.acceptingState); | ||
| return nfa; | ||
| } | ||
| NFA NFA::createForDot() { | ||
| NFA nfa; | ||
|
|
||
| // INFO: For now i think let's keep it simple and spawn NFA for the whole | ||
| // alphabet | ||
| // | ||
| // See if performance is acceptable or not and then we can read more dragon | ||
| // book to find optimization | ||
| for (auto ch : Regex::Alphabet) | ||
| nfa.startState->addTransition(ch, nfa.acceptingState); | ||
| return nfa; | ||
| } | ||
|
|
||
| NFA NFA::createForChar(char c) { | ||
| NFA nfa; | ||
| nfa.startState->addTransition(c, nfa.acceptingState); | ||
| return nfa; | ||
| } | ||
|
|
||
| // Dragon book 2nd edition, figure 3.41: NFA for the concat of two regular | ||
| // expressions | ||
| NFA NFA::createForConcatenation(NFA &nfa1, NFA &nfa2) { | ||
| NFA newNFA; | ||
| nfa1.acceptingState->addEpsilonTransition(nfa2.startState); | ||
| nfa1.acceptingState->isAccepting = false; | ||
| newNFA.startState = nfa1.startState; | ||
| newNFA.acceptingState = nfa2.acceptingState; | ||
| newNFA.acquireStatesFrom(nfa1); | ||
| newNFA.acquireStatesFrom(nfa2); | ||
| return newNFA; | ||
| } | ||
|
|
||
| // Dragon book 2nd edition, figure 3.42: NFA for the closure of a regular | ||
| // expression | ||
| NFA NFA::createForKleeneStar(NFA &originalNFA) { | ||
| NFA newNFA; | ||
| newNFA.startState->addEpsilonTransition(originalNFA.startState); | ||
| newNFA.startState->addEpsilonTransition(newNFA.acceptingState); | ||
| originalNFA.acceptingState->addEpsilonTransition(originalNFA.startState); | ||
| originalNFA.acceptingState->addEpsilonTransition(newNFA.acceptingState); | ||
| originalNFA.acceptingState->isAccepting = false; | ||
| newNFA.acquireStatesFrom(originalNFA); | ||
| return newNFA; | ||
| } | ||
|
|
||
| // Find all the states that can be reached from the current set of states using | ||
| // only epsilon transitions | ||
| fextl::set<State *> NFA::epsilonClosure(const fextl::set<State *> &states) { | ||
| fextl::stack<State *> stateStack; | ||
| fextl::set<State *> result = states; | ||
|
|
||
| for (State *state : states) | ||
| stateStack.push(state); | ||
|
|
||
| while (!stateStack.empty()) { | ||
| State *currState = stateStack.top(); | ||
| stateStack.pop(); | ||
| for (State *next : currState->epsilonTransitions) { | ||
| if (result.find(next) == result.end()) { | ||
| stateStack.push(next); | ||
| result.insert(next); | ||
| } | ||
| } | ||
| } | ||
| return result; | ||
| } | ||
|
|
||
| // Find all the states that can be reached from the current set of states using | ||
| // only character transition | ||
| fextl::set<State *> NFA::move(const fextl::set<State *> &states, const char c) { | ||
| fextl::set<State *> result; | ||
| for (auto *state : states) { | ||
| const decltype(state->transitions) &transitionMap = state->transitions; | ||
| if (auto itr = transitionMap.find(c); itr != transitionMap.end()) { | ||
| for (auto *transition : itr->second) { | ||
| result.insert(transition); | ||
| } | ||
| } | ||
| } | ||
| return result; | ||
| } | ||
|
|
||
| ///////////// | ||
| // REGEX IMPL | ||
| ///////////// | ||
| Regex::Regex(const fextl::string &s) : Pattern(s), Pos(0) { | ||
| Nfa = parseExpression(); | ||
| } | ||
|
|
||
| NFA Regex::parseExpression() { return parseConcatenation(); } | ||
|
|
||
| NFA Regex::parseConcatenation() { | ||
| NFA result = parseStarOrAtom(); | ||
| while (Pos < Pattern.size() && Pattern[Pos] != '|') { | ||
| NFA nfaToConcat = parseStarOrAtom(); | ||
| result = NFA::createForConcatenation(result, nfaToConcat); | ||
| } | ||
| return result; | ||
| } | ||
|
|
||
| NFA Regex::parseStarOrAtom() { | ||
| NFA result; | ||
| if (Pattern[Pos] == '*') { | ||
| result = NFA::createForDot(); | ||
| result = NFA::createForKleeneStar(result); | ||
| Pos++; | ||
| } else { | ||
| result = parseAtom(); | ||
| } | ||
| return result; | ||
| } | ||
| // Algo 3.23: Basis | ||
| NFA Regex::parseAtom() { | ||
|
|
||
| if (Pos >= Pattern.size()) { | ||
| return NFA::createForEpsilon(); | ||
| } | ||
| char curChar = Pattern[Pos++]; | ||
|
|
||
| return NFA::createForChar(curChar); | ||
| } | ||
|
|
||
| // Dragon book 2nd edition, algorithm 3.22: Simulating an NFA | ||
| bool Regex::matches(const fextl::string &target) { | ||
| fextl::set<State *> currentStates = NFA::epsilonClosure({Nfa.startState}); | ||
|
|
||
| for (const auto c : target) { | ||
| currentStates = NFA::epsilonClosure(NFA::move(currentStates, c)); | ||
| if (currentStates.empty()) | ||
| return false; | ||
| } | ||
| return std::ranges::any_of(currentStates, &State::isAccepting); | ||
| } | ||
|
|
||
| } // namespace FEXCore::Utils | ||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,70 @@ | ||
| /* | ||
| * Copyright (c) 2025 Rakshit Awasthi | ||
| * SPDX-License-Identifier: MIT | ||
| */ | ||
| #pragma once | ||
badumbatish marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| #include "FEXCore/fextl/memory.h" | ||
| #include "FEXCore/fextl/string.h" | ||
| #include <FEXCore/fextl/map.h> | ||
| #include <FEXCore/fextl/stack.h> | ||
| #include <FEXCore/fextl/set.h> | ||
| #include <FEXCore/fextl/vector.h> | ||
|
|
||
| namespace FEXCore::Utils { | ||
|
|
||
| class State { | ||
| public: | ||
| fextl::vector<State *> epsilonTransitions; | ||
| fextl::map<char, fextl::vector<State *>> transitions; | ||
| bool isAccepting; | ||
| State(bool accepting = false) : isAccepting(accepting) {} | ||
| void addEpsilonTransition(State *nextState); | ||
| void addTransition(char c, State *nextState); | ||
| }; | ||
| class NFA { | ||
| public: | ||
| State *startState; | ||
| State *acceptingState; | ||
| fextl::vector<fextl::unique_ptr<State>> states; | ||
|
|
||
| NFA(); | ||
| // Transfers the ownership of the states (unique_ptr) of other NFA to the | ||
| // current NFA. | ||
| void acquireStatesFrom(NFA &other); | ||
|
|
||
| // Functions for creating NFA using the McNaughton-Yamada-Thompson algorithm | ||
| static NFA createForEpsilon(); | ||
| static NFA createForChar(char c); | ||
| static NFA createForDot(); | ||
| static NFA createForConcatenation(NFA &nfa1, NFA &nfa2); | ||
| static NFA createForKleeneStar(NFA &originalNFA); | ||
| static fextl::set<State *> epsilonClosure(const fextl::set<State *> &states); | ||
| static fextl::set<State *> move(const fextl::set<State *> &states, char c); | ||
| }; | ||
|
|
||
| // TODO: probably an NFA vector would be better instead of State vector inside | ||
| // each NFA | ||
|
|
||
| // TODO: Better error reporting? | ||
| class Regex { | ||
| fextl::string Pattern; | ||
| int Pos; | ||
| NFA Nfa; | ||
|
|
||
| // Top level parser, calls parseUnion | ||
| NFA parseExpression(); | ||
|
|
||
| // INFO: "ab" | ||
| NFA parseConcatenation(); | ||
|
|
||
| NFA parseStarOrAtom(); | ||
|
|
||
| // INFO: "(abc)" or a | ||
| NFA parseAtom(); | ||
|
|
||
| public: | ||
| static inline fextl::string Alphabet = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ,./<>?;':\"[]\\{}|1234567890!@#$%^&*()-=_+"; | ||
badumbatish marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| Regex(const fextl::string &s); | ||
| bool matches(const fextl::string &s); | ||
| }; | ||
| } // namespace FEXCore::Utils | ||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -10,7 +10,7 @@ | |
| #include <FEXCore/Utils/FileLoading.h> | ||
| #include <FEXHeaderUtils/Filesystem.h> | ||
| #include <FEXHeaderUtils/SymlinkChecks.h> | ||
|
|
||
| #include <FEXCore/Utils/Regex.h> | ||
| #include <cstring> | ||
| #include <functional> | ||
| #ifndef _WIN32 | ||
|
|
@@ -43,22 +43,59 @@ namespace JSON { | |
| return; | ||
| } | ||
|
|
||
| for (const json_t* ConfigItem = json_getChild(ConfigList); ConfigItem != nullptr; ConfigItem = json_getSibling(ConfigItem)) { | ||
| const char* ConfigName = json_getName(ConfigItem); | ||
| const char* ConfigString = json_getValue(ConfigItem); | ||
| auto ListApplier = [&Config, &Func](const json_t* jsonList) { | ||
| for (const json_t* ConfigItem = json_getChild(jsonList); ConfigItem != nullptr; ConfigItem = json_getSibling(ConfigItem)) { | ||
badumbatish marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| const char* ConfigName = json_getName(ConfigItem); | ||
| const char* ConfigString = json_getValue(ConfigItem); | ||
|
|
||
| if (!ConfigName) { | ||
| LogMan::Msg::EFmt("JSON file '{}': Couldn't get config name for an item", Config); | ||
| return; | ||
| } | ||
|
|
||
| if (!ConfigName) { | ||
| if (!ConfigString) { | ||
| LogMan::Msg::EFmt("JSON file '{}': Couldn't get value for config item '{}'", Config, ConfigName); | ||
| return; | ||
| } | ||
| Func(ConfigName, ConfigString); | ||
| } | ||
|
|
||
| }; | ||
|
|
||
| ListApplier(ConfigList); | ||
|
|
||
| const json_t* RegexList = json_getProperty(json, "RegexConfig"); | ||
badumbatish marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| if (!RegexList) { | ||
| // This is a non-error if the configuration file exists but no RegexConfigList section | ||
| return; | ||
| } | ||
|
|
||
| using FEXCore::Utils::Regex; | ||
badumbatish marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
|
|
||
| for (const json_t* RegexItem = json_getChild(RegexList); RegexItem != nullptr; RegexItem = json_getSibling(RegexItem)) { | ||
| const char* RegexName = json_getName(RegexItem); | ||
| const json_t* RegexNamedList = json_getProperty(RegexList, RegexName); | ||
|
|
||
| if (!RegexName) { | ||
| LogMan::Msg::EFmt("JSON file '{}': Couldn't get config name for an item", Config); | ||
| return; | ||
| } | ||
|
|
||
| if (!ConfigString) { | ||
| LogMan::Msg::EFmt("JSON file '{}': Couldn't get value for config item '{}'", Config, ConfigName); | ||
| if (!RegexNamedList) { | ||
| LogMan::Msg::EFmt("JSON file '{}': Couldn't get value for config item '{}'", Config, RegexName); | ||
| return; | ||
| } | ||
|
|
||
| Func(ConfigName, ConfigString); | ||
| // Matches the first and then get out | ||
| // Needs PR review on this | ||
|
||
| if (Regex(RegexName).matches(Config)) { | ||
| // Safe to assume its just pairs of strings at this point? | ||
| ListApplier(RegexNamedList); | ||
| break; | ||
| } | ||
|
|
||
| } | ||
|
|
||
| } | ||
| } // namespace JSON | ||
|
|
||
|
|
||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,33 @@ | ||
| #include "FEXCore/fextl/string.h" | ||
badumbatish marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| #include <FEXCore/Utils/Regex.h> | ||
| #include <catch2/catch_test_macros.hpp> | ||
|
|
||
| using namespace FEXCore::Utils; | ||
|
|
||
| TEST_CASE("Singular regex") { | ||
| CHECK(Regex("a").matches("a") == true); | ||
| CHECK(Regex("a*").matches("aaaaaaa") == true); | ||
| } | ||
|
|
||
| TEST_CASE("Concat regex") { | ||
| CHECK(Regex("aaa").matches("aaa") == true); | ||
| CHECK(Regex("ab").matches("ab") == true); | ||
| CHECK(Regex("a").matches("ab") == false); | ||
| CHECK(Regex("ab").matches("a") == false); | ||
| } | ||
|
|
||
| TEST_CASE("Dot regex") { | ||
| CHECK(Regex("*").matches("") == true); | ||
| CHECK(Regex("*").matches("setup.json") == true); | ||
| CHECK(Regex("setup.*").matches("setupjson") == false); | ||
| CHECK(Regex("setup*").matches("setup.json") == true); | ||
| CHECK(Regex("setup*").matches("setup/setup.json") == true); | ||
| CHECK(Regex("*setup*").matches("setup/setup.json") == true); | ||
| } | ||
|
|
||
|
|
||
| // Tests potential usage inside fex itself | ||
| TEST_CASE("FEX regex") { | ||
| CHECK(Regex("*Config*").matches("/home/ubuntu/.fex-emu/Config.json") == true); | ||
| CHECK(Regex("*Config.json").matches("/home/ubuntu/.fex-emu/Config.json") == true); | ||
| } | ||
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.