Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/CI-unixish.yml
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ jobs:
if: matrix.os == 'ubuntu-24.04'
run: |
make clean
make -j$(nproc) CXXOPTS="-O1"
make -j$(nproc) CXXOPTS="-O1 -fno-omit-frame-pointer -fno-optimize-sibling-calls"
valgrind --leak-check=full --num-callers=50 --show-reachable=yes --track-origins=yes --gen-suppressions=all --error-exitcode=42 ./testrunner
# TODO: run Python tests with valgrind
VALGRIND_TOOL=memcheck ./selfcheck.sh
Expand Down
84 changes: 79 additions & 5 deletions simplecpp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,12 @@
# include <sys/stat.h>
#endif

#ifdef __GNUC__
# define unlikely(x) __builtin_expect(!!(x), 0)
#else
# define unlikely(x) (x)
#endif

static bool isHex(const std::string &s)
{
return s.size()>2 && (s.compare(0,2,"0x")==0 || s.compare(0,2,"0X")==0);
Expand Down Expand Up @@ -253,12 +259,12 @@ class simplecpp::TokenList::Stream {
virtual bool good() = 0;

unsigned char readChar() {
unsigned char ch = static_cast<unsigned char>(get());
unsigned char ch = static_cast<unsigned char>(get()); // TODO: check EOF?

// For UTF-16 encoded files the BOM is 0xfeff/0xfffe. If the
// character is non-ASCII character then replace it with 0xff
if (isUtf16) {
const unsigned char ch2 = static_cast<unsigned char>(get());
const unsigned char ch2 = static_cast<unsigned char>(get()); // TODO: check EOF?
const int ch16 = makeUtf16Char(ch, ch2);
ch = static_cast<unsigned char>(((ch16 >= 0x80) ? 0xff : ch16));
}
Expand All @@ -281,13 +287,13 @@ class simplecpp::TokenList::Stream {
}

unsigned char peekChar() {
unsigned char ch = static_cast<unsigned char>(peek());
unsigned char ch = static_cast<unsigned char>(peek()); // TODO: check EOF?

// For UTF-16 encoded files the BOM is 0xfeff/0xfffe. If the
// character is non-ASCII character then replace it with 0xff
if (isUtf16) {
(void)get();
const unsigned char ch2 = static_cast<unsigned char>(peek());
const unsigned char ch2 = static_cast<unsigned char>(peek()); // TODO: check EOF?
unget();
const int ch16 = makeUtf16Char(ch, ch2);
ch = static_cast<unsigned char>(((ch16 >= 0x80) ? 0xff : ch16));
Expand Down Expand Up @@ -467,6 +473,74 @@ class FileStream : public simplecpp::TokenList::Stream {
int lastStatus{};
};

class FileStreamBuffered : public simplecpp::TokenList::Stream {
public:
FileStreamBuffered(const std::string &filename, std::vector<std::string> &files)
: file(fopen(filename.c_str(), "rb"))
{
if (!file) {
files.push_back(filename);
throw simplecpp::Output(files, simplecpp::Output::FILE_NOT_FOUND, "File is missing: " + filename);
}
init();
}

~FileStreamBuffered() override {
fclose(file);
file = nullptr;
}

int get() override {
read_internal();
return buf[buf_idx++];
}
int peek() override {
read_internal();
return buf[buf_idx];
}
void unget() override {
--buf_idx;
}
bool good() override {
return lastStatus != EOF;
}

private:
void read_internal() {
// check if we are in the last chunk
if (unlikely(buf_idx >= buf_len)) {
if (buf_len != sizeof(buf)) {
lastStatus = EOF;
return;
}
}

if (unlikely(buf_idx == -1 || buf_idx == buf_len))
{
buf_idx = 0;
buf_len = fread(buf, 1, sizeof(buf), file);
if (buf_len == 0) {
lastStatus = EOF;
}
else if (buf_len != sizeof(buf)) {
if (ferror(file)) {
// TODO: is this correct?
lastStatus = EOF;
}
}
}
}

FileStreamBuffered(const FileStreamBuffered&);
FileStreamBuffered &operator=(const FileStreamBuffered&);

FILE *file;
int lastStatus{};
unsigned char buf[8192];
int buf_len{};
int buf_idx{-1};
};

simplecpp::TokenList::TokenList(std::vector<std::string> &filenames) : frontToken(nullptr), backToken(nullptr), files(filenames) {}

simplecpp::TokenList::TokenList(std::istream &istr, std::vector<std::string> &filenames, const std::string &filename, OutputList *outputList)
Expand All @@ -487,7 +561,7 @@ simplecpp::TokenList::TokenList(const std::string &filename, std::vector<std::st
: frontToken(nullptr), backToken(nullptr), files(filenames)
{
try {
FileStream stream(filename, filenames);
FileStreamBuffered stream(filename, filenames);
readfile(stream,filename,outputList);
} catch (const simplecpp::Output & e) { // TODO handle extra type of errors
outputList->push_back(e);
Expand Down
41 changes: 40 additions & 1 deletion test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2504,7 +2504,7 @@ static void readfile_nullbyte()
const char code[] = "ab\0cd";
simplecpp::OutputList outputList;
ASSERT_EQUALS("ab cd", readfile(code,sizeof(code), &outputList));
ASSERT_EQUALS(true, outputList.empty()); // should warning be written?
ASSERT_EQUALS(true, outputList.empty()); // TODO: should warning be written?
}

static void readfile_char()
Expand Down Expand Up @@ -2654,6 +2654,41 @@ static void readfile_file_not_found()
ASSERT_EQUALS("file0,1,file_not_found,File is missing: NotAFile\n", toString(outputList));
}

static void readfile_empty()
{
const char code[] = "";
simplecpp::OutputList outputList;
ASSERT_EQUALS("", readfile(code,sizeof(code), &outputList));
ASSERT_EQUALS(true, outputList.empty());
}

// the BOM/UTF-16 detection reads two bytes
static void readfile_onebyte()
{
const char code[] = ".";
simplecpp::OutputList outputList;
ASSERT_EQUALS(".", readfile(code,sizeof(code), &outputList));
ASSERT_EQUALS(true, outputList.empty());
}

static void readfile_utf16_unsupported()
{
const char code[] = "\xfe\xff\xd8\x3d\xde\x42"; // smiley emoji
simplecpp::OutputList outputList;
ASSERT_EQUALS("", readfile(code,sizeof(code), &outputList));
ASSERT_EQUALS("file0,1,unhandled_char_error,The code contains unhandled character(s) (character code=255). Neither unicode nor extended ascii is supported.\n", toString(outputList));
}

static void readfile_utf16_incomplete()
{
const char code[] = "\xfe\xff\x00\x31\x00\x32\x00"; // the last UTF16 char is incomplete
simplecpp::OutputList outputList;
ASSERT_EQUALS("12", readfile(code,sizeof(code), &outputList));
ASSERT_EQUALS(true, outputList.empty());
}

// TODO: test with incomplete BOMs

static void stringify1()
{
const char code_c[] = "#include \"A.h\"\n"
Expand Down Expand Up @@ -3532,6 +3567,10 @@ int main(int argc, char **argv)
TEST_CASE(readfile_unhandled_chars);
TEST_CASE(readfile_error);
TEST_CASE(readfile_file_not_found);
TEST_CASE(readfile_empty);
TEST_CASE(readfile_onebyte);
TEST_CASE(readfile_utf16_unsupported);
TEST_CASE(readfile_utf16_incomplete);

TEST_CASE(stringify1);

Expand Down
Loading