Skip to content
4 changes: 3 additions & 1 deletion FEXCore/Source/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,9 @@ set(FEXCORE_BASE_SRCS
Utils/FileLoading.cpp
Utils/ForcedAssert.cpp
Utils/LogManager.cpp
Utils/SpinWaitLock.cpp)
Utils/SpinWaitLock.cpp
Utils/Regex.cpp
)

if (NOT MINGW)
list(APPEND FEXCORE_BASE_SRCS
Expand Down
190 changes: 190 additions & 0 deletions FEXCore/Source/Utils/Regex.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,190 @@
/*
* Copyright (c) 2025 Rakshit Awasthi
* SPDX-License-Identifier: MIT
*/
#include "FEXCore/fextl/string.h"
#include <FEXCore/Utils/Regex.h>

#include <algorithm>
#include <cassert>
#include <cstdlib>
#include <functional>
#include <iostream>

// Implementation for FEX regex enginee, please see unittests/APITests/Regex.cpp
// for test cases
namespace FEXCore::Utils {
/////////////
// STATE IMPL
/////////////
void State::addEpsilonTransition(State *nextState) {
assert(nextState && "state needs to be non null for addEpsilonTransition");
epsilonTransitions.push_back(nextState);
}

void State::addTransition(char c, State *nextState) {
assert(nextState && "state needs to be non null for addTransition");
transitions[c].push_back(nextState);
}

/////////////
// NFA IMPL
/////////////
NFA::NFA() {
fextl::unique_ptr<State> start = fextl::make_unique<State>();
fextl::unique_ptr<State> accepting = fextl::make_unique<State>(true);
startState = start.get();
acceptingState = accepting.get();

// transfer the ownership to the states vector
states.push_back(std::move(start));
states.push_back(std::move(accepting));
}

void NFA::acquireStatesFrom(NFA &other) {
for (auto &s : other.states)
this->states.push_back(std::move(s));

other.states.clear();
}

NFA NFA::createForEpsilon() {
NFA nfa;
nfa.startState->addEpsilonTransition(nfa.acceptingState);
return nfa;
}
NFA NFA::createForDot() {
NFA nfa;

// INFO: For now i think let's keep it simple and spawn NFA for the whole
// alphabet
//
// See if performance is acceptable or not and then we can read more dragon
// book to find optimization
for (auto ch : Regex::Alphabet)
nfa.startState->addTransition(ch, nfa.acceptingState);
return nfa;
}

NFA NFA::createForChar(char c) {
NFA nfa;
nfa.startState->addTransition(c, nfa.acceptingState);
return nfa;
}

// Dragon book 2nd edition, figure 3.41: NFA for the concat of two regular
// expressions
NFA NFA::createForConcatenation(NFA &nfa1, NFA &nfa2) {
NFA newNFA;
nfa1.acceptingState->addEpsilonTransition(nfa2.startState);
nfa1.acceptingState->isAccepting = false;
newNFA.startState = nfa1.startState;
newNFA.acceptingState = nfa2.acceptingState;
newNFA.acquireStatesFrom(nfa1);
newNFA.acquireStatesFrom(nfa2);
return newNFA;
}

// Dragon book 2nd edition, figure 3.42: NFA for the closure of a regular
// expression
NFA NFA::createForKleeneStar(NFA &originalNFA) {
NFA newNFA;
newNFA.startState->addEpsilonTransition(originalNFA.startState);
newNFA.startState->addEpsilonTransition(newNFA.acceptingState);
originalNFA.acceptingState->addEpsilonTransition(originalNFA.startState);
originalNFA.acceptingState->addEpsilonTransition(newNFA.acceptingState);
originalNFA.acceptingState->isAccepting = false;
newNFA.acquireStatesFrom(originalNFA);
return newNFA;
}

// Find all the states that can be reached from the current set of states using
// only epsilon transitions
fextl::set<State *> NFA::epsilonClosure(const fextl::set<State *> &states) {
fextl::stack<State *> stateStack;
fextl::set<State *> result = states;

for (State *state : states)
stateStack.push(state);

while (!stateStack.empty()) {
State *currState = stateStack.top();
stateStack.pop();
for (State *next : currState->epsilonTransitions) {
if (result.find(next) == result.end()) {
stateStack.push(next);
result.insert(next);
}
}
}
return result;
}

// Find all the states that can be reached from the current set of states using
// only character transition
fextl::set<State *> NFA::move(const fextl::set<State *> &states, const char c) {
fextl::set<State *> result;
for (auto *state : states) {
const decltype(state->transitions) &transitionMap = state->transitions;
if (auto itr = transitionMap.find(c); itr != transitionMap.end()) {
for (auto *transition : itr->second) {
result.insert(transition);
}
}
}
return result;
}

/////////////
// REGEX IMPL
/////////////
Regex::Regex(const fextl::string &s) : Pattern(s), Pos(0) {
Nfa = parseExpression();
}

NFA Regex::parseExpression() { return parseConcatenation(); }

NFA Regex::parseConcatenation() {
NFA result = parseStarOrAtom();
while (Pos < Pattern.size() && Pattern[Pos] != '|') {
NFA nfaToConcat = parseStarOrAtom();
result = NFA::createForConcatenation(result, nfaToConcat);
}
return result;
}

NFA Regex::parseStarOrAtom() {
NFA result;
if (Pattern[Pos] == '*') {
result = NFA::createForDot();
result = NFA::createForKleeneStar(result);
Pos++;
} else {
result = parseAtom();
}
return result;
}
// Algo 3.23: Basis
NFA Regex::parseAtom() {

if (Pos >= Pattern.size()) {
return NFA::createForEpsilon();
}
char curChar = Pattern[Pos++];

return NFA::createForChar(curChar);
}

// Dragon book 2nd edition, algorithm 3.22: Simulating an NFA
bool Regex::matches(const fextl::string &target) {
fextl::set<State *> currentStates = NFA::epsilonClosure({Nfa.startState});

for (const auto c : target) {
currentStates = NFA::epsilonClosure(NFA::move(currentStates, c));
if (currentStates.empty())
return false;
}
return std::ranges::any_of(currentStates, &State::isAccepting);
}

} // namespace FEXCore::Utils
70 changes: 70 additions & 0 deletions FEXCore/include/FEXCore/Utils/Regex.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
/*
* Copyright (c) 2025 Rakshit Awasthi
* SPDX-License-Identifier: MIT
*/
#pragma once
#include "FEXCore/fextl/memory.h"
#include "FEXCore/fextl/string.h"
#include <FEXCore/fextl/map.h>
#include <FEXCore/fextl/stack.h>
#include <FEXCore/fextl/set.h>
#include <FEXCore/fextl/vector.h>

namespace FEXCore::Utils {

class State {
public:
fextl::vector<State *> epsilonTransitions;
fextl::map<char, fextl::vector<State *>> transitions;
bool isAccepting;
State(bool accepting = false) : isAccepting(accepting) {}
void addEpsilonTransition(State *nextState);
void addTransition(char c, State *nextState);
};
class NFA {
public:
State *startState;
State *acceptingState;
fextl::vector<fextl::unique_ptr<State>> states;

NFA();
// Transfers the ownership of the states (unique_ptr) of other NFA to the
// current NFA.
void acquireStatesFrom(NFA &other);

// Functions for creating NFA using the McNaughton-Yamada-Thompson algorithm
static NFA createForEpsilon();
static NFA createForChar(char c);
static NFA createForDot();
static NFA createForConcatenation(NFA &nfa1, NFA &nfa2);
static NFA createForKleeneStar(NFA &originalNFA);
static fextl::set<State *> epsilonClosure(const fextl::set<State *> &states);
static fextl::set<State *> move(const fextl::set<State *> &states, char c);
};

// TODO: probably an NFA vector would be better instead of State vector inside
// each NFA

// TODO: Better error reporting?
class Regex {
fextl::string Pattern;
int Pos;
NFA Nfa;

// Top level parser, calls parseUnion
NFA parseExpression();

// INFO: "ab"
NFA parseConcatenation();

NFA parseStarOrAtom();

// INFO: "(abc)" or a
NFA parseAtom();

public:
static inline fextl::string Alphabet = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ,./<>?;':\"[]\\{}|1234567890!@#$%^&*()-=_+";
Regex(const fextl::string &s);
bool matches(const fextl::string &s);
};
} // namespace FEXCore::Utils
53 changes: 45 additions & 8 deletions Source/Common/Config.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
#include <FEXCore/Utils/FileLoading.h>
#include <FEXHeaderUtils/Filesystem.h>
#include <FEXHeaderUtils/SymlinkChecks.h>

#include <FEXCore/Utils/Regex.h>
#include <cstring>
#include <functional>
#ifndef _WIN32
Expand Down Expand Up @@ -43,22 +43,59 @@ namespace JSON {
return;
}

for (const json_t* ConfigItem = json_getChild(ConfigList); ConfigItem != nullptr; ConfigItem = json_getSibling(ConfigItem)) {
const char* ConfigName = json_getName(ConfigItem);
const char* ConfigString = json_getValue(ConfigItem);
auto ListApplier = [&Config, &Func](const json_t* jsonList) {
for (const json_t* ConfigItem = json_getChild(jsonList); ConfigItem != nullptr; ConfigItem = json_getSibling(ConfigItem)) {
const char* ConfigName = json_getName(ConfigItem);
const char* ConfigString = json_getValue(ConfigItem);

if (!ConfigName) {
LogMan::Msg::EFmt("JSON file '{}': Couldn't get config name for an item", Config);
return;
}

if (!ConfigName) {
if (!ConfigString) {
LogMan::Msg::EFmt("JSON file '{}': Couldn't get value for config item '{}'", Config, ConfigName);
return;
}
Func(ConfigName, ConfigString);
}

};

ListApplier(ConfigList);

const json_t* RegexList = json_getProperty(json, "RegexConfig");
if (!RegexList) {
// This is a non-error if the configuration file exists but no RegexConfigList section
return;
}

using FEXCore::Utils::Regex;

for (const json_t* RegexItem = json_getChild(RegexList); RegexItem != nullptr; RegexItem = json_getSibling(RegexItem)) {
const char* RegexName = json_getName(RegexItem);
const json_t* RegexNamedList = json_getProperty(RegexList, RegexName);

if (!RegexName) {
LogMan::Msg::EFmt("JSON file '{}': Couldn't get config name for an item", Config);
return;
}

if (!ConfigString) {
LogMan::Msg::EFmt("JSON file '{}': Couldn't get value for config item '{}'", Config, ConfigName);
if (!RegexNamedList) {
LogMan::Msg::EFmt("JSON file '{}': Couldn't get value for config item '{}'", Config, RegexName);
return;
}

Func(ConfigName, ConfigString);
// Matches the first and then get out
// Needs PR review on this
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What do you need feedback on?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I mainly would want to know if the current approach of applying the config is good or not

if (Regex(RegexName).matches(Config)) {
// Safe to assume its just pairs of strings at this point?
ListApplier(RegexNamedList);
break;
}

}

}
} // namespace JSON

Expand Down
4 changes: 3 additions & 1 deletion unittests/APITests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,9 @@ set(TESTS
FileMappingBaseAddress
Filesystem
InterruptableConditionVariable
StringUtils)
StringUtils
Regex
)

list(APPEND LIBS Common FEXCore JemallocLibs)

Expand Down
33 changes: 33 additions & 0 deletions unittests/APITests/Regex.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
#include "FEXCore/fextl/string.h"
#include <FEXCore/Utils/Regex.h>
#include <catch2/catch_test_macros.hpp>

using namespace FEXCore::Utils;

TEST_CASE("Singular regex") {
CHECK(Regex("a").matches("a") == true);
CHECK(Regex("a*").matches("aaaaaaa") == true);
}

TEST_CASE("Concat regex") {
CHECK(Regex("aaa").matches("aaa") == true);
CHECK(Regex("ab").matches("ab") == true);
CHECK(Regex("a").matches("ab") == false);
CHECK(Regex("ab").matches("a") == false);
}

TEST_CASE("Dot regex") {
CHECK(Regex("*").matches("") == true);
CHECK(Regex("*").matches("setup.json") == true);
CHECK(Regex("setup.*").matches("setupjson") == false);
CHECK(Regex("setup*").matches("setup.json") == true);
CHECK(Regex("setup*").matches("setup/setup.json") == true);
CHECK(Regex("*setup*").matches("setup/setup.json") == true);
}


// Tests potential usage inside fex itself
TEST_CASE("FEX regex") {
CHECK(Regex("*Config*").matches("/home/ubuntu/.fex-emu/Config.json") == true);
CHECK(Regex("*Config.json").matches("/home/ubuntu/.fex-emu/Config.json") == true);
}