From 097a7658473912c062e3836813dbb29f4d541a0f Mon Sep 17 00:00:00 2001 From: firewave Date: Sat, 5 Mar 2022 11:31:52 +0100 Subject: [PATCH 01/21] added wrapper class "Stream" to TokenList --- simplecpp.cpp | 41 +++++++++++++++++++++++++++++++++-------- simplecpp.h | 6 ++++-- 2 files changed, 37 insertions(+), 10 deletions(-) diff --git a/simplecpp.cpp b/simplecpp.cpp index 95d72136..61a48f56 100755 --- a/simplecpp.cpp +++ b/simplecpp.cpp @@ -230,12 +230,36 @@ void simplecpp::Token::printOut() const std::cout << std::endl; } +class simplecpp::TokenList::Stream { +public: + Stream(std::istream &istr) + : istr(istr) + {} + + int get() { + return istr.get(); + } + int peek() { + return istr.peek(); + } + void unget() { + istr.unget(); + } + bool good() { + return istr.good(); + } + +private: + std::istream &istr; +}; + simplecpp::TokenList::TokenList(std::vector &filenames) : frontToken(nullptr), backToken(nullptr), files(filenames) {} simplecpp::TokenList::TokenList(std::istream &istr, std::vector &filenames, const std::string &filename, OutputList *outputList) : frontToken(nullptr), backToken(nullptr), files(filenames) { - readfile(istr,filename,outputList); + simplecpp::TokenList::Stream stream(istr); + readfile(stream,filename,outputList); } simplecpp::TokenList::TokenList(const TokenList &other) : frontToken(nullptr), backToken(nullptr), files(other.files) @@ -335,7 +359,7 @@ std::string simplecpp::TokenList::stringify() const return ret.str(); } -static unsigned char readChar(std::istream &istr, unsigned int bom) +static unsigned char readChar(simplecpp::TokenList::Stream &istr, unsigned int bom) { unsigned char ch = static_cast(istr.get()); @@ -366,7 +390,7 @@ static unsigned char readChar(std::istream &istr, unsigned int bom) return ch; } -static unsigned char peekChar(std::istream &istr, unsigned int bom) +static unsigned char peekChar(simplecpp::TokenList::Stream &istr, unsigned int bom) { unsigned char ch = static_cast(istr.peek()); @@ -387,14 +411,14 @@ static unsigned char peekChar(std::istream &istr, unsigned int bom) return ch; } -static void ungetChar(std::istream &istr, unsigned int bom) +static void ungetChar(simplecpp::TokenList::Stream &istr, unsigned int bom) { istr.unget(); if (bom == 0xfeff || bom == 0xfffe) istr.unget(); } -static unsigned short getAndSkipBOM(std::istream &istr) +static unsigned short getAndSkipBOM(simplecpp::TokenList::Stream &istr) { const int ch1 = istr.peek(); @@ -476,7 +500,7 @@ void simplecpp::TokenList::lineDirective(unsigned int fileIndex, unsigned int li static const std::string COMMENT_END("*/"); -void simplecpp::TokenList::readfile(std::istream &istr, const std::string &filename, OutputList *outputList) +void simplecpp::TokenList::readfile(Stream &istr, const std::string &filename, OutputList *outputList) { std::stack loc; @@ -1169,7 +1193,7 @@ void simplecpp::TokenList::removeComments() } } -std::string simplecpp::TokenList::readUntil(std::istream &istr, const Location &location, const char start, const char end, OutputList *outputList, unsigned int bom) +std::string simplecpp::TokenList::readUntil(Stream &istr, const Location &location, const char start, const char end, OutputList *outputList, unsigned int bom) { std::string ret; ret += start; @@ -1300,7 +1324,8 @@ namespace simplecpp { Macro(const std::string &name, const std::string &value, std::vector &f) : nameTokDef(nullptr), files(f), tokenListDefine(f), valueDefinedInCode_(false) { const std::string def(name + ' ' + value); std::istringstream istr(def); - tokenListDefine.readfile(istr); + simplecpp::TokenList::Stream stream(istr); + tokenListDefine.readfile(stream); if (!parseDefine(tokenListDefine.cfront())) throw std::runtime_error("bad macro syntax. macroname=" + name + " value=" + value); } diff --git a/simplecpp.h b/simplecpp.h index 3412da59..fe4025f4 100755 --- a/simplecpp.h +++ b/simplecpp.h @@ -193,6 +193,8 @@ namespace simplecpp { /** List of tokens. */ class SIMPLECPP_LIB TokenList { public: + class Stream; + explicit TokenList(std::vector &filenames); TokenList(std::istream &istr, std::vector &filenames, const std::string &filename=std::string(), OutputList *outputList = nullptr); TokenList(const TokenList &other); @@ -214,7 +216,7 @@ namespace simplecpp { void dump() const; std::string stringify() const; - void readfile(std::istream &istr, const std::string &filename=std::string(), OutputList *outputList = nullptr); + void readfile(Stream &istr, const std::string &filename=std::string(), OutputList *outputList = nullptr); void constFold(); void removeComments(); @@ -279,7 +281,7 @@ namespace simplecpp { void constFoldLogicalOp(Token *tok); void constFoldQuestionOp(Token **tok1); - std::string readUntil(std::istream &istr, const Location &location, char start, char end, OutputList *outputList, unsigned int bom); + std::string readUntil(Stream &istr, const Location &location, char start, char end, OutputList *outputList, unsigned int bom); void lineDirective(unsigned int fileIndex, unsigned int line, Location *location); std::string lastLine(int maxsize=100000) const; From c2ae44915d81595152207b7812eb035bb30914f2 Mon Sep 17 00:00:00 2001 From: firewave Date: Sat, 5 Mar 2022 11:43:58 +0100 Subject: [PATCH 02/21] moved some helper function into TokenList::Stream --- simplecpp.cpp | 247 +++++++++++++++++++++++++------------------------- simplecpp.h | 4 +- 2 files changed, 126 insertions(+), 125 deletions(-) diff --git a/simplecpp.cpp b/simplecpp.cpp index 61a48f56..f0d7dfc0 100755 --- a/simplecpp.cpp +++ b/simplecpp.cpp @@ -234,7 +234,9 @@ class simplecpp::TokenList::Stream { public: Stream(std::istream &istr) : istr(istr) - {} + { + bom = getAndSkipBOM(); + } int get() { return istr.get(); @@ -249,8 +251,95 @@ class simplecpp::TokenList::Stream { return istr.good(); } + unsigned char readChar() + { + unsigned char ch = static_cast(get()); + + // For UTF-16 encoded files the BOM is 0xfeff/0xfffe. If the + // character is non-ASCII character then replace it with 0xff + if (bom == 0xfeff || bom == 0xfffe) { + const unsigned char ch2 = static_cast(get()); + const int ch16 = (bom == 0xfeff) ? (ch<<8 | ch2) : (ch2<<8 | ch); + ch = static_cast(((ch16 >= 0x80) ? 0xff : ch16)); + } + + // Handling of newlines.. + if (ch == '\r') { + ch = '\n'; + if (bom == 0 && static_cast(peek()) == '\n') + (void)get(); + else if (bom == 0xfeff || bom == 0xfffe) { + int c1 = get(); + int c2 = get(); + int ch16 = (bom == 0xfeff) ? (c1<<8 | c2) : (c2<<8 | c1); + if (ch16 != '\n') { + unget(); + unget(); + } + } + } + + return ch; + } + + unsigned char peekChar() + { + unsigned char ch = static_cast(peek()); + + // For UTF-16 encoded files the BOM is 0xfeff/0xfffe. If the + // character is non-ASCII character then replace it with 0xff + if (bom == 0xfeff || bom == 0xfffe) { + (void)get(); + const unsigned char ch2 = static_cast(peek()); + unget(); + const int ch16 = (bom == 0xfeff) ? (ch<<8 | ch2) : (ch2<<8 | ch); + ch = static_cast(((ch16 >= 0x80) ? 0xff : ch16)); + } + + // Handling of newlines.. + if (ch == '\r') + ch = '\n'; + + return ch; + } + + void ungetChar() + { + unget(); + if (bom == 0xfeff || bom == 0xfffe) + unget(); + } + private: + unsigned short getAndSkipBOM() + { + const int ch1 = peek(); + + // The UTF-16 BOM is 0xfffe or 0xfeff. + if (ch1 >= 0xfe) { + unsigned short bom = (static_cast(get()) << 8); + if (peek() >= 0xfe) + return bom | static_cast(get()); + unget(); + return 0; + } + + // Skip UTF-8 BOM 0xefbbbf + if (ch1 == 0xef) { + (void)get(); + if (get() == 0xbb && peek() == 0xbf) { + (void)get(); + } else { + unget(); + unget(); + } + } + + return 0; + } + std::istream &istr; + unsigned short bom; }; simplecpp::TokenList::TokenList(std::vector &filenames) : frontToken(nullptr), backToken(nullptr), files(filenames) {} @@ -359,92 +448,6 @@ std::string simplecpp::TokenList::stringify() const return ret.str(); } -static unsigned char readChar(simplecpp::TokenList::Stream &istr, unsigned int bom) -{ - unsigned char ch = static_cast(istr.get()); - - // For UTF-16 encoded files the BOM is 0xfeff/0xfffe. If the - // character is non-ASCII character then replace it with 0xff - if (bom == 0xfeff || bom == 0xfffe) { - const unsigned char ch2 = static_cast(istr.get()); - const int ch16 = (bom == 0xfeff) ? (ch<<8 | ch2) : (ch2<<8 | ch); - ch = static_cast(((ch16 >= 0x80) ? 0xff : ch16)); - } - - // Handling of newlines.. - if (ch == '\r') { - ch = '\n'; - if (bom == 0 && static_cast(istr.peek()) == '\n') - (void)istr.get(); - else if (bom == 0xfeff || bom == 0xfffe) { - const int c1 = istr.get(); - const int c2 = istr.get(); - const int ch16 = (bom == 0xfeff) ? (c1<<8 | c2) : (c2<<8 | c1); - if (ch16 != '\n') { - istr.unget(); - istr.unget(); - } - } - } - - return ch; -} - -static unsigned char peekChar(simplecpp::TokenList::Stream &istr, unsigned int bom) -{ - unsigned char ch = static_cast(istr.peek()); - - // For UTF-16 encoded files the BOM is 0xfeff/0xfffe. If the - // character is non-ASCII character then replace it with 0xff - if (bom == 0xfeff || bom == 0xfffe) { - (void)istr.get(); - const unsigned char ch2 = static_cast(istr.peek()); - istr.unget(); - const int ch16 = (bom == 0xfeff) ? (ch<<8 | ch2) : (ch2<<8 | ch); - ch = static_cast(((ch16 >= 0x80) ? 0xff : ch16)); - } - - // Handling of newlines.. - if (ch == '\r') - ch = '\n'; - - return ch; -} - -static void ungetChar(simplecpp::TokenList::Stream &istr, unsigned int bom) -{ - istr.unget(); - if (bom == 0xfeff || bom == 0xfffe) - istr.unget(); -} - -static unsigned short getAndSkipBOM(simplecpp::TokenList::Stream &istr) -{ - const int ch1 = istr.peek(); - - // The UTF-16 BOM is 0xfffe or 0xfeff. - if (ch1 >= 0xfe) { - const unsigned short bom = (static_cast(istr.get()) << 8); - if (istr.peek() >= 0xfe) - return bom | static_cast(istr.get()); - istr.unget(); - return 0; - } - - // Skip UTF-8 BOM 0xefbbbf - if (ch1 == 0xef) { - (void)istr.get(); - if (istr.get() == 0xbb && istr.peek() == 0xbf) { - (void)istr.get(); - } else { - istr.unget(); - istr.unget(); - } - } - - return 0; -} - static bool isNameChar(unsigned char ch) { return std::isalnum(ch) || ch == '_' || ch == '$'; @@ -500,7 +503,7 @@ void simplecpp::TokenList::lineDirective(unsigned int fileIndex, unsigned int li static const std::string COMMENT_END("*/"); -void simplecpp::TokenList::readfile(Stream &istr, const std::string &filename, OutputList *outputList) +void simplecpp::TokenList::readfile(Stream &stream, const std::string &filename, OutputList *outputList) { std::stack loc; @@ -508,15 +511,13 @@ void simplecpp::TokenList::readfile(Stream &istr, const std::string &filename, O const Token *oldLastToken = nullptr; - const unsigned short bom = getAndSkipBOM(istr); - Location location(files); location.fileIndex = fileIndex(filename); location.line = 1U; location.col = 1U; - while (istr.good()) { - unsigned char ch = readChar(istr,bom); - if (!istr.good()) + while (stream.good()) { + unsigned char ch = stream.readChar(); + if (!stream.good()) break; if (ch < ' ' && ch != '\t' && ch != '\n' && ch != '\r') ch = ' '; @@ -594,12 +595,12 @@ void simplecpp::TokenList::readfile(Stream &istr, const std::string &filename, O if (cback() && cback()->location.line == location.line && cback()->previous && cback()->previous->op == '#' && isLastLinePreprocessor() && (lastLine() == "# error" || lastLine() == "# warning")) { char prev = ' '; - while (istr.good() && (prev == '\\' || (ch != '\r' && ch != '\n'))) { + while (stream.good() && (prev == '\\' || (ch != '\r' && ch != '\n'))) { currentToken += ch; prev = ch; - ch = readChar(istr, bom); + ch = stream.readChar(); } - ungetChar(istr, bom); + stream.ungetChar(); push_back(new Token(currentToken, location)); location.adjust(currentToken); continue; @@ -608,21 +609,21 @@ void simplecpp::TokenList::readfile(Stream &istr, const std::string &filename, O // number or name if (isNameChar(ch)) { const bool num = std::isdigit(ch); - while (istr.good() && isNameChar(ch)) { + while (stream.good() && isNameChar(ch)) { currentToken += ch; - ch = readChar(istr,bom); - if (num && ch=='\'' && isNameChar(peekChar(istr,bom))) - ch = readChar(istr,bom); + ch = stream.readChar(); + if (num && ch=='\'' && isNameChar(stream.peekChar())) + ch = stream.readChar(); } - ungetChar(istr,bom); + stream.ungetChar(); } // comment - else if (ch == '/' && peekChar(istr,bom) == '/') { - while (istr.good() && ch != '\r' && ch != '\n') { + else if (ch == '/' && stream.peekChar() == '/') { + while (stream.good() && ch != '\r' && ch != '\n') { currentToken += ch; - ch = readChar(istr, bom); + ch = stream.readChar(); } const std::string::size_type pos = currentToken.find_last_not_of(" \t"); if (pos < currentToken.size() - 1U && currentToken[pos] == '\\') @@ -631,20 +632,20 @@ void simplecpp::TokenList::readfile(Stream &istr, const std::string &filename, O ++multiline; currentToken.erase(currentToken.size() - 1U); } else { - ungetChar(istr, bom); + stream.ungetChar(); } } // comment - else if (ch == '/' && peekChar(istr,bom) == '*') { + else if (ch == '/' && stream.peekChar() == '*') { currentToken = "/*"; - (void)readChar(istr,bom); - ch = readChar(istr,bom); - while (istr.good()) { + (void)stream.readChar(); + ch = stream.readChar(); + while (stream.good()) { currentToken += ch; if (currentToken.size() >= 4U && endsWith(currentToken, COMMENT_END)) break; - ch = readChar(istr,bom); + ch = stream.readChar(); } // multiline.. @@ -675,12 +676,12 @@ void simplecpp::TokenList::readfile(Stream &istr, const std::string &filename, O std::string delim; currentToken = ch; prefix.resize(prefix.size() - 1); - ch = readChar(istr,bom); - while (istr.good() && ch != '(' && ch != '\n') { + ch = stream.readChar(); + while (stream.good() && ch != '(' && ch != '\n') { delim += ch; - ch = readChar(istr,bom); + ch = stream.readChar(); } - if (!istr.good() || ch == '\n') { + if (!stream.good() || ch == '\n') { if (outputList) { Output err(files); err.type = Output::SYNTAX_ERROR; @@ -691,8 +692,8 @@ void simplecpp::TokenList::readfile(Stream &istr, const std::string &filename, O return; } const std::string endOfRawString(')' + delim + currentToken); - while (istr.good() && !(endsWith(currentToken, endOfRawString) && currentToken.size() > 1)) - currentToken += readChar(istr,bom); + while (stream.good() && !(endsWith(currentToken, endOfRawString) && currentToken.size() > 1)) + currentToken += stream.readChar(); if (!endsWith(currentToken, endOfRawString)) { if (outputList) { Output err(files); @@ -716,7 +717,7 @@ void simplecpp::TokenList::readfile(Stream &istr, const std::string &filename, O continue; } - currentToken = readUntil(istr,location,ch,ch,outputList,bom); + currentToken = readUntil(stream,location,ch,ch,outputList); if (currentToken.size() < 2U) // Error is reported by readUntil() return; @@ -748,7 +749,7 @@ void simplecpp::TokenList::readfile(Stream &istr, const std::string &filename, O } if (*currentToken.begin() == '<' && isLastLinePreprocessor() && lastLine() == "# include") { - currentToken = readUntil(istr, location, '<', '>', outputList, bom); + currentToken = readUntil(stream, location, '<', '>', outputList); if (currentToken.size() < 2U) return; } @@ -1193,15 +1194,15 @@ void simplecpp::TokenList::removeComments() } } -std::string simplecpp::TokenList::readUntil(Stream &istr, const Location &location, const char start, const char end, OutputList *outputList, unsigned int bom) +std::string simplecpp::TokenList::readUntil(Stream &stream, const Location &location, const char start, const char end, OutputList *outputList) { std::string ret; ret += start; bool backslash = false; char ch = 0; - while (ch != end && ch != '\r' && ch != '\n' && istr.good()) { - ch = readChar(istr, bom); + while (ch != end && ch != '\r' && ch != '\n' && stream.good()) { + ch = stream.readChar(); if (backslash && ch == '\n') { ch = 0; backslash = false; @@ -1213,7 +1214,7 @@ std::string simplecpp::TokenList::readUntil(Stream &istr, const Location &locati bool update_ch = false; char next = 0; do { - next = readChar(istr, bom); + next = stream.readChar(); if (next == '\r' || next == '\n') { ret.erase(ret.size()-1U); backslash = (next == '\r'); @@ -1227,7 +1228,7 @@ std::string simplecpp::TokenList::readUntil(Stream &istr, const Location &locati } } - if (!istr.good() || ch != end) { + if (!stream.good() || ch != end) { clear(); if (outputList) { Output err(files); diff --git a/simplecpp.h b/simplecpp.h index fe4025f4..45a720f5 100755 --- a/simplecpp.h +++ b/simplecpp.h @@ -216,7 +216,7 @@ namespace simplecpp { void dump() const; std::string stringify() const; - void readfile(Stream &istr, const std::string &filename=std::string(), OutputList *outputList = nullptr); + void readfile(Stream &stream, const std::string &filename=std::string(), OutputList *outputList = nullptr); void constFold(); void removeComments(); @@ -281,7 +281,7 @@ namespace simplecpp { void constFoldLogicalOp(Token *tok); void constFoldQuestionOp(Token **tok1); - std::string readUntil(Stream &istr, const Location &location, char start, char end, OutputList *outputList, unsigned int bom); + std::string readUntil(Stream &istr, const Location &location, char start, char end, OutputList *outputList); void lineDirective(unsigned int fileIndex, unsigned int line, Location *location); std::string lastLine(int maxsize=100000) const; From 62acc5826dc822345898100ebc4df504c6acc844 Mon Sep 17 00:00:00 2001 From: firewave Date: Sat, 5 Mar 2022 11:47:17 +0100 Subject: [PATCH 03/21] simplified UTF-16 checks --- simplecpp.cpp | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/simplecpp.cpp b/simplecpp.cpp index f0d7dfc0..ace4835b 100755 --- a/simplecpp.cpp +++ b/simplecpp.cpp @@ -234,8 +234,9 @@ class simplecpp::TokenList::Stream { public: Stream(std::istream &istr) : istr(istr) + , bom(getAndSkipBOM()) + , isUtf16(bom == 0xfeff || bom == 0xfffe) { - bom = getAndSkipBOM(); } int get() { @@ -257,7 +258,7 @@ class simplecpp::TokenList::Stream { // For UTF-16 encoded files the BOM is 0xfeff/0xfffe. If the // character is non-ASCII character then replace it with 0xff - if (bom == 0xfeff || bom == 0xfffe) { + if (isUtf16) { const unsigned char ch2 = static_cast(get()); const int ch16 = (bom == 0xfeff) ? (ch<<8 | ch2) : (ch2<<8 | ch); ch = static_cast(((ch16 >= 0x80) ? 0xff : ch16)); @@ -268,7 +269,7 @@ class simplecpp::TokenList::Stream { ch = '\n'; if (bom == 0 && static_cast(peek()) == '\n') (void)get(); - else if (bom == 0xfeff || bom == 0xfffe) { + else if (isUtf16) { int c1 = get(); int c2 = get(); int ch16 = (bom == 0xfeff) ? (c1<<8 | c2) : (c2<<8 | c1); @@ -288,7 +289,7 @@ class simplecpp::TokenList::Stream { // For UTF-16 encoded files the BOM is 0xfeff/0xfffe. If the // character is non-ASCII character then replace it with 0xff - if (bom == 0xfeff || bom == 0xfffe) { + if (isUtf16) { (void)get(); const unsigned char ch2 = static_cast(peek()); unget(); @@ -306,7 +307,7 @@ class simplecpp::TokenList::Stream { void ungetChar() { unget(); - if (bom == 0xfeff || bom == 0xfffe) + if (isUtf16) unget(); } @@ -339,7 +340,8 @@ class simplecpp::TokenList::Stream { } std::istream &istr; - unsigned short bom; + const unsigned short bom; + const bool isUtf16; }; simplecpp::TokenList::TokenList(std::vector &filenames) : frontToken(nullptr), backToken(nullptr), files(filenames) {} From a2cc676ccbe2fafaafb9baffa06c0086f177351e Mon Sep 17 00:00:00 2001 From: firewave Date: Sat, 5 Mar 2022 13:28:13 +0100 Subject: [PATCH 04/21] made simplecpp::TokenList::Stream an abstract class and moved implementation-specific parts into StdIStream --- simplecpp.cpp | 62 ++++++++++++++++++++++++++++++++------------------- 1 file changed, 39 insertions(+), 23 deletions(-) diff --git a/simplecpp.cpp b/simplecpp.cpp index ace4835b..a44dc028 100755 --- a/simplecpp.cpp +++ b/simplecpp.cpp @@ -232,25 +232,12 @@ void simplecpp::Token::printOut() const class simplecpp::TokenList::Stream { public: - Stream(std::istream &istr) - : istr(istr) - , bom(getAndSkipBOM()) - , isUtf16(bom == 0xfeff || bom == 0xfffe) - { - } + virtual ~Stream() {} - int get() { - return istr.get(); - } - int peek() { - return istr.peek(); - } - void unget() { - istr.unget(); - } - bool good() { - return istr.good(); - } + virtual int get() = 0; + virtual int peek() = 0; + virtual void unget() = 0; + virtual bool good() = 0; unsigned char readChar() { @@ -311,7 +298,12 @@ class simplecpp::TokenList::Stream { unget(); } -private: +protected: + void init() { + bom = getAndSkipBOM(); + isUtf16 = (bom == 0xfeff || bom == 0xfffe); + } + unsigned short getAndSkipBOM() { const int ch1 = peek(); @@ -339,9 +331,33 @@ class simplecpp::TokenList::Stream { return 0; } + unsigned short bom; + bool isUtf16; +}; + +class StdIStream : public simplecpp::TokenList::Stream { +public: + StdIStream(std::istream &istr) + : istr(istr) + { + init(); + } + + virtual int get() { + return istr.get(); + } + virtual int peek() { + return istr.peek(); + } + virtual void unget() { + istr.unget(); + } + virtual bool good() { + return istr.good(); + } + +private: std::istream &istr; - const unsigned short bom; - const bool isUtf16; }; simplecpp::TokenList::TokenList(std::vector &filenames) : frontToken(nullptr), backToken(nullptr), files(filenames) {} @@ -349,7 +365,7 @@ simplecpp::TokenList::TokenList(std::vector &filenames) : frontToke simplecpp::TokenList::TokenList(std::istream &istr, std::vector &filenames, const std::string &filename, OutputList *outputList) : frontToken(nullptr), backToken(nullptr), files(filenames) { - simplecpp::TokenList::Stream stream(istr); + StdIStream stream(istr); readfile(stream,filename,outputList); } @@ -1327,7 +1343,7 @@ namespace simplecpp { Macro(const std::string &name, const std::string &value, std::vector &f) : nameTokDef(nullptr), files(f), tokenListDefine(f), valueDefinedInCode_(false) { const std::string def(name + ' ' + value); std::istringstream istr(def); - simplecpp::TokenList::Stream stream(istr); + StdIStream stream(istr); tokenListDefine.readfile(stream); if (!parseDefine(tokenListDefine.cfront())) throw std::runtime_error("bad macro syntax. macroname=" + name + " value=" + value); From 07ec136dc71d89bfd4d363bab2daf133bcf43396 Mon Sep 17 00:00:00 2001 From: firewave Date: Sat, 5 Mar 2022 13:38:46 +0100 Subject: [PATCH 05/21] added simplecpp::TokenList::Stream implementation "FileStream" which uses C I/O functions --- CMakeLists.txt | 4 +++- main.cpp | 4 ++-- simplecpp.cpp | 41 +++++++++++++++++++++++++++++++++++++++++ simplecpp.h | 1 + 4 files changed, 47 insertions(+), 3 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 26398823..e88afe5b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -10,9 +10,11 @@ elseif (CMAKE_CXX_COMPILER_ID MATCHES "Clang") # no need for c++98 compatibility add_compile_options(-Wno-c++98-compat-pedantic) # these are not really fixable - add_compile_options(-Wno-exit-time-destructors -Wno-global-constructors) + add_compile_options(-Wno-exit-time-destructors -Wno-global-constructors -Wno-weak-vtables) # we are not interested in these add_compile_options(-Wno-multichar -Wno-four-char-constants) + # ignore C++11-specific warning + add_compile_options(-Wno-suggest-override -Wno-suggest-destructor-override) # TODO: fix these? add_compile_options(-Wno-padded -Wno-sign-conversion -Wno-implicit-int-conversion -Wno-shorten-64-to-32 -Wno-shadow-field-in-constructor) diff --git a/main.cpp b/main.cpp index e84d9de7..fb3d4256 100644 --- a/main.cpp +++ b/main.cpp @@ -93,8 +93,8 @@ int main(int argc, char **argv) // Perform preprocessing simplecpp::OutputList outputList; std::vector files; - std::ifstream f(filename); - simplecpp::TokenList rawtokens(f,files,filename,&outputList); + //std::ifstream f(filename); + simplecpp::TokenList rawtokens(files,filename,&outputList); rawtokens.removeComments(); std::map included = simplecpp::load(rawtokens, files, dui, &outputList); for (std::pair i : included) diff --git a/simplecpp.cpp b/simplecpp.cpp index a44dc028..0f6643db 100755 --- a/simplecpp.cpp +++ b/simplecpp.cpp @@ -360,6 +360,40 @@ class StdIStream : public simplecpp::TokenList::Stream { std::istream &istr; }; +class FileStream : public simplecpp::TokenList::Stream { +public: + FileStream(const std::string &filename) + : file(fopen(filename.c_str(), "rb")) + { + init(); + } + + ~FileStream() { + fclose(file); + file = nullptr; + } + + virtual int get() { + lastCh = fgetc(file); + return lastCh; + } + virtual int peek() { + const int ch = get(); + unget(); + return ch; + } + virtual void unget() { + ungetc(lastCh, file); + } + virtual bool good() { + return lastCh != EOF; + } + +private: + FILE *file; + int lastCh; +}; + simplecpp::TokenList::TokenList(std::vector &filenames) : frontToken(nullptr), backToken(nullptr), files(filenames) {} simplecpp::TokenList::TokenList(std::istream &istr, std::vector &filenames, const std::string &filename, OutputList *outputList) @@ -369,6 +403,13 @@ simplecpp::TokenList::TokenList(std::istream &istr, std::vector &fi readfile(stream,filename,outputList); } +simplecpp::TokenList::TokenList(std::vector &filenames, const std::string &filename, OutputList *outputList) + : frontToken(nullptr), backToken(nullptr), files(filenames) +{ + FileStream stream(filename); + readfile(stream,filename,outputList); +} + simplecpp::TokenList::TokenList(const TokenList &other) : frontToken(nullptr), backToken(nullptr), files(other.files) { *this = other; diff --git a/simplecpp.h b/simplecpp.h index 45a720f5..4d3a85fd 100755 --- a/simplecpp.h +++ b/simplecpp.h @@ -197,6 +197,7 @@ namespace simplecpp { explicit TokenList(std::vector &filenames); TokenList(std::istream &istr, std::vector &filenames, const std::string &filename=std::string(), OutputList *outputList = nullptr); + TokenList(std::vector &filenames, const std::string &filename, OutputList *outputList = nullptr); TokenList(const TokenList &other); #if __cplusplus >= 201103L TokenList(TokenList &&other); From 81fe8cdcac58bd1d9b032651b45cf7192ef31684 Mon Sep 17 00:00:00 2001 From: firewave Date: Sat, 5 Mar 2022 14:11:56 +0100 Subject: [PATCH 06/21] use FileStream for includes as well --- simplecpp.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/simplecpp.cpp b/simplecpp.cpp index 0f6643db..dd1a0f68 100755 --- a/simplecpp.cpp +++ b/simplecpp.cpp @@ -3018,8 +3018,9 @@ std::map simplecpp::load(const simplecpp::To } continue; } + fin.close(); - TokenList *tokenlist = new TokenList(fin, filenames, filename, outputList); + TokenList *tokenlist = new TokenList(filenames, filename, outputList); if (!tokenlist->front()) { delete tokenlist; continue; @@ -3057,8 +3058,9 @@ std::map simplecpp::load(const simplecpp::To const std::string header2 = openHeader(f,dui,sourcefile,header,systemheader); if (!f.is_open()) continue; + f.close(); - TokenList *tokens = new TokenList(f, filenames, header2, outputList); + TokenList *tokens = new TokenList(filenames, header2, outputList); ret[header2] = tokens; if (tokens->front()) filelist.push_back(tokens->front()); From 4f0da7842a3f5b3de1a90c1ebf251b674ccc634c Mon Sep 17 00:00:00 2001 From: firewave Date: Sat, 5 Mar 2022 15:08:08 +0100 Subject: [PATCH 07/21] constness --- simplecpp.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/simplecpp.cpp b/simplecpp.cpp index dd1a0f68..482fdd6e 100755 --- a/simplecpp.cpp +++ b/simplecpp.cpp @@ -257,9 +257,9 @@ class simplecpp::TokenList::Stream { if (bom == 0 && static_cast(peek()) == '\n') (void)get(); else if (isUtf16) { - int c1 = get(); - int c2 = get(); - int ch16 = (bom == 0xfeff) ? (c1<<8 | c2) : (c2<<8 | c1); + const int c1 = get(); + const int c2 = get(); + const int ch16 = (bom == 0xfeff) ? (c1<<8 | c2) : (c2<<8 | c1); if (ch16 != '\n') { unget(); unget(); @@ -310,7 +310,7 @@ class simplecpp::TokenList::Stream { // The UTF-16 BOM is 0xfffe or 0xfeff. if (ch1 >= 0xfe) { - unsigned short bom = (static_cast(get()) << 8); + const unsigned short bom = (static_cast(get()) << 8); if (peek() >= 0xfe) return bom | static_cast(get()); unget(); From 1b52b2211ff058a74174e0a53facedb0d464d9d2 Mon Sep 17 00:00:00 2001 From: firewave Date: Sat, 5 Mar 2022 15:09:03 +0100 Subject: [PATCH 08/21] made some TokenList::Stream members private --- simplecpp.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/simplecpp.cpp b/simplecpp.cpp index 482fdd6e..12b1d334 100755 --- a/simplecpp.cpp +++ b/simplecpp.cpp @@ -304,6 +304,7 @@ class simplecpp::TokenList::Stream { isUtf16 = (bom == 0xfeff || bom == 0xfffe); } +private: unsigned short getAndSkipBOM() { const int ch1 = peek(); From 95c747b6a99dde5e17056b13d6ab265451b598c3 Mon Sep 17 00:00:00 2001 From: firewave Date: Sat, 5 Mar 2022 15:17:02 +0100 Subject: [PATCH 09/21] pulled out repeated UTF-16 character generation code --- simplecpp.cpp | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/simplecpp.cpp b/simplecpp.cpp index 12b1d334..1b179b13 100755 --- a/simplecpp.cpp +++ b/simplecpp.cpp @@ -247,7 +247,7 @@ class simplecpp::TokenList::Stream { // character is non-ASCII character then replace it with 0xff if (isUtf16) { const unsigned char ch2 = static_cast(get()); - const int ch16 = (bom == 0xfeff) ? (ch<<8 | ch2) : (ch2<<8 | ch); + const int ch16 = makeUtf16Char(ch, ch2); ch = static_cast(((ch16 >= 0x80) ? 0xff : ch16)); } @@ -259,7 +259,7 @@ class simplecpp::TokenList::Stream { else if (isUtf16) { const int c1 = get(); const int c2 = get(); - const int ch16 = (bom == 0xfeff) ? (c1<<8 | c2) : (c2<<8 | c1); + const int ch16 = makeUtf16Char(c1, c2); if (ch16 != '\n') { unget(); unget(); @@ -280,7 +280,7 @@ class simplecpp::TokenList::Stream { (void)get(); const unsigned char ch2 = static_cast(peek()); unget(); - const int ch16 = (bom == 0xfeff) ? (ch<<8 | ch2) : (ch2<<8 | ch); + const int ch16 = makeUtf16Char(ch, ch2); ch = static_cast(((ch16 >= 0x80) ? 0xff : ch16)); } @@ -305,6 +305,11 @@ class simplecpp::TokenList::Stream { } private: + inline int makeUtf16Char(const unsigned char ch, const unsigned char ch2) const + { + return (bom == 0xfeff) ? (ch<<8 | ch2) : (ch2<<8 | ch); + } + unsigned short getAndSkipBOM() { const int ch1 = peek(); From 892183ecbdf32ae2be55d2032905c5ab9e757c9b Mon Sep 17 00:00:00 2001 From: firewave Date: Wed, 9 Mar 2022 11:41:22 +0100 Subject: [PATCH 10/21] main.cpp: added command-option "-is" to specify usage of std::istream interface in reading initial file --- main.cpp | 26 ++++++++++++++++++++------ 1 file changed, 20 insertions(+), 6 deletions(-) diff --git a/main.cpp b/main.cpp index fb3d4256..dd61c76c 100644 --- a/main.cpp +++ b/main.cpp @@ -25,9 +25,10 @@ int main(int argc, char **argv) { bool error = false; + const char *filename = nullptr; + bool use_istream = false; // Settings.. - const char *filename = nullptr; simplecpp::DUI dui; bool quiet = false; for (int i = 1; i < argc; i++) { @@ -54,6 +55,10 @@ int main(int argc, char **argv) dui.includes.push_back(arg+9); found = true; } + else if (std::strncmp(arg, "-is",3)==0) { + use_istream = true; + found = true; + } break; case 's': if (std::strncmp(arg, "-std=",5)==0) { @@ -87,20 +92,29 @@ int main(int argc, char **argv) std::cout << " -UNAME Undefine NAME." << std::endl; std::cout << " -std=STD Specify standard." << std::endl; std::cout << " -q Quiet mode (no output)." << std::endl; + std::cout << " -is Use std::istream interface." << std::endl; std::exit(0); } // Perform preprocessing simplecpp::OutputList outputList; std::vector files; - //std::ifstream f(filename); - simplecpp::TokenList rawtokens(files,filename,&outputList); - rawtokens.removeComments(); - std::map included = simplecpp::load(rawtokens, files, dui, &outputList); + simplecpp::TokenList *rawtokens; + if (use_istream) { + std::ifstream f(filename); + rawtokens = new simplecpp::TokenList(f, files,filename,&outputList); + } + else { + rawtokens = new simplecpp::TokenList(files,filename,&outputList); + } + rawtokens->removeComments(); + std::map included = simplecpp::load(*rawtokens, files, dui, &outputList); for (std::pair i : included) i.second->removeComments(); simplecpp::TokenList outputTokens(files); - simplecpp::preprocess(outputTokens, rawtokens, files, included, dui, &outputList); + simplecpp::preprocess(outputTokens, *rawtokens, files, included, dui, &outputList); + delete rawtokens; + rawtokens = nullptr; // Output if (!quiet) { From 26ccd246db7cec3335d416987d1d89f124cc191e Mon Sep 17 00:00:00 2001 From: firewave Date: Thu, 14 Apr 2022 10:41:33 +0200 Subject: [PATCH 11/21] test.cpp: got rid of another `std::istringstream` usage --- test.cpp | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/test.cpp b/test.cpp index 41fbc972..e2eca7dc 100644 --- a/test.cpp +++ b/test.cpp @@ -77,17 +77,15 @@ static void testcase(const std::string &name, void (*f)(), int argc, char * cons #define TEST_CASE(F) (testcase(#F, F, argc, argv)) - -static simplecpp::TokenList makeTokenList(const char code[], std::vector &filenames, const std::string &filename=std::string(), simplecpp::OutputList *outputList=nullptr) +static simplecpp::TokenList makeTokenList(const char code[], std::size_t size, std::vector &filenames, const std::string &filename=std::string(), simplecpp::OutputList *outputList=nullptr) { - std::istringstream istr(code); + std::istringstream istr(std::string(code, size)); return simplecpp::TokenList(istr,filenames,filename,outputList); } -static simplecpp::TokenList makeTokenList(const char code[], std::size_t size, std::vector &filenames, const std::string &filename=std::string(), simplecpp::OutputList *outputList=nullptr) +static simplecpp::TokenList makeTokenList(const char code[], std::vector &filenames, const std::string &filename=std::string(), simplecpp::OutputList *outputList=nullptr) { - std::istringstream istr(std::string(code, size)); - return simplecpp::TokenList(istr,filenames,filename,outputList); + return makeTokenList(code, strlen(code), filenames, filename, outputList); } static std::string readfile(const char code[], simplecpp::OutputList *outputList=nullptr) From e57f3128db5ee20551b8ac8b7445845b371d7d15 Mon Sep 17 00:00:00 2001 From: firewave Date: Tue, 19 Apr 2022 10:11:24 +0200 Subject: [PATCH 12/21] fixed FileStream::unget() with subsequent calls (i.e. UTF-16 encoding) --- simplecpp.cpp | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/simplecpp.cpp b/simplecpp.cpp index 1b179b13..a23dc12d 100755 --- a/simplecpp.cpp +++ b/simplecpp.cpp @@ -300,6 +300,8 @@ class simplecpp::TokenList::Stream { protected: void init() { + // initialize since we use peek() in getAndSkipBOM() + isUtf16 = false; bom = getAndSkipBOM(); isUtf16 = (bom == 0xfeff || bom == 0xfffe); } @@ -338,6 +340,7 @@ class simplecpp::TokenList::Stream { } unsigned short bom; +protected: bool isUtf16; }; @@ -389,7 +392,14 @@ class FileStream : public simplecpp::TokenList::Stream { return ch; } virtual void unget() { - ungetc(lastCh, file); + if (isUtf16) { + // TODO: use ungetc() as well + // UTF-16 has subsequent unget() calls + fseek(file, -1, SEEK_CUR); + } + else + ungetc(lastCh, file); + } virtual bool good() { return lastCh != EOF; From bdc7f3c34521364eb61f469c7e3a5e29d3bb02a7 Mon Sep 17 00:00:00 2001 From: firewave Date: Tue, 19 Apr 2022 16:13:38 +0200 Subject: [PATCH 13/21] simplified newline handling in TokenList::Stream::readChar() --- simplecpp.cpp | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/simplecpp.cpp b/simplecpp.cpp index a23dc12d..b9d53d2a 100755 --- a/simplecpp.cpp +++ b/simplecpp.cpp @@ -254,17 +254,15 @@ class simplecpp::TokenList::Stream { // Handling of newlines.. if (ch == '\r') { ch = '\n'; - if (bom == 0 && static_cast(peek()) == '\n') - (void)get(); - else if (isUtf16) { - const int c1 = get(); + + int ch2 = get(); + if (isUtf16) { const int c2 = get(); - const int ch16 = makeUtf16Char(c1, c2); - if (ch16 != '\n') { - unget(); - unget(); - } + ch2 = makeUtf16Char(ch2, c2); } + + if (ch2 != '\n') + ungetChar(); } return ch; From 76b94e3ba988688be06eea9fe5cd641260044959 Mon Sep 17 00:00:00 2001 From: firewave Date: Tue, 19 Apr 2022 16:14:13 +0200 Subject: [PATCH 14/21] fixed handling of incomplete UTF-8 BOM 0xefbbbf --- simplecpp.cpp | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/simplecpp.cpp b/simplecpp.cpp index b9d53d2a..8b937afb 100755 --- a/simplecpp.cpp +++ b/simplecpp.cpp @@ -316,7 +316,8 @@ class simplecpp::TokenList::Stream { // The UTF-16 BOM is 0xfffe or 0xfeff. if (ch1 >= 0xfe) { - const unsigned short bom = (static_cast(get()) << 8); + (void)get(); + const unsigned short bom = (static_cast(ch1) << 8); if (peek() >= 0xfe) return bom | static_cast(get()); unget(); @@ -326,12 +327,15 @@ class simplecpp::TokenList::Stream { // Skip UTF-8 BOM 0xefbbbf if (ch1 == 0xef) { (void)get(); - if (get() == 0xbb && peek() == 0xbf) { + if (peek() == 0xbb) { (void)get(); - } else { - unget(); + if (peek() == 0xbf) { + (void)get(); + return 0; + } unget(); } + unget(); } return 0; From cd084aeecdd9dcdf0de43473b404dd475edace8e Mon Sep 17 00:00:00 2001 From: firewave Date: Tue, 19 Apr 2022 16:18:10 +0200 Subject: [PATCH 15/21] test.cpp: added tests for incomplete UTF sequences --- test.cpp | 44 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/test.cpp b/test.cpp index e2eca7dc..b0ba751a 100644 --- a/test.cpp +++ b/test.cpp @@ -2233,6 +2233,12 @@ static void utf8() ASSERT_EQUALS("123", readfile("\xEF\xBB\xBF 123")); } +static void utf8_invalid() +{ + ASSERT_EQUALS("", readfile("\xEF 123")); + ASSERT_EQUALS("", readfile("\xEF\xBB 123")); +} + static void unicode() { { @@ -2265,6 +2271,42 @@ static void unicode() } } +static void unicode_invalid() +{ + { + const char code[] = "\xFF"; + ASSERT_EQUALS("", readfile(code, sizeof(code))); + } + { + const char code[] = "\xFE"; + ASSERT_EQUALS("", readfile(code, sizeof(code))); + } + { + const char code[] = "\xFE\xFF\x31"; + ASSERT_EQUALS("", readfile(code, sizeof(code))); + } + { + const char code[] = "\xFF\xFE\x31"; + ASSERT_EQUALS("1", readfile(code, sizeof(code))); + } + { + const char code[] = "\xFE\xFF\x31\x32"; + ASSERT_EQUALS("", readfile(code, sizeof(code))); + } + { + const char code[] = "\xFF\xFE\x31\x32"; + ASSERT_EQUALS("", readfile(code, sizeof(code))); + } + { + const char code[] = "\xFE\xFF\x00\x31\x00\x32\x33"; + ASSERT_EQUALS("", readfile(code, sizeof(code))); + } + { + const char code[] = "\xFF\xFE\x31\x00\x32\x00\x33"; + ASSERT_EQUALS("123", readfile(code, sizeof(code))); + } +} + static void warning() { const char code[] = "#warning MSG\n1"; @@ -2687,7 +2729,9 @@ int main(int argc, char **argv) // utf/unicode TEST_CASE(utf8); + TEST_CASE(utf8_invalid); TEST_CASE(unicode); + TEST_CASE(unicode_invalid); TEST_CASE(warning); From 797f899da9141b3fb46b79a1e075902927be10b3 Mon Sep 17 00:00:00 2001 From: firewave Date: Wed, 20 Apr 2022 09:20:50 +0200 Subject: [PATCH 16/21] keep lastCh intact when using FileStream::peek() --- simplecpp.cpp | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/simplecpp.cpp b/simplecpp.cpp index 8b937afb..bbad3517 100755 --- a/simplecpp.cpp +++ b/simplecpp.cpp @@ -385,31 +385,36 @@ class FileStream : public simplecpp::TokenList::Stream { } virtual int get() { - lastCh = fgetc(file); + lastStatus = lastCh = fgetc(file); return lastCh; } virtual int peek() { - const int ch = get(); - unget(); + // keep lastCh intact + const int ch = fgetc(file); + unget_internal(ch); return ch; } virtual void unget() { + unget_internal(lastCh); + } + virtual bool good() { + return lastStatus != EOF; + } + +private: + void unget_internal(int ch) { if (isUtf16) { // TODO: use ungetc() as well // UTF-16 has subsequent unget() calls fseek(file, -1, SEEK_CUR); } else - ungetc(lastCh, file); - - } - virtual bool good() { - return lastCh != EOF; + ungetc(ch, file); } -private: FILE *file; int lastCh; + int lastStatus; }; simplecpp::TokenList::TokenList(std::vector &filenames) : frontToken(nullptr), backToken(nullptr), files(filenames) {} From c1da435de1410e1210770642b6b084ad59a14b10 Mon Sep 17 00:00:00 2001 From: firewave Date: Thu, 6 Oct 2022 02:24:23 +0200 Subject: [PATCH 17/21] fixed uninitialized members in `FileStream` --- simplecpp.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/simplecpp.cpp b/simplecpp.cpp index bbad3517..c671533e 100755 --- a/simplecpp.cpp +++ b/simplecpp.cpp @@ -375,6 +375,8 @@ class FileStream : public simplecpp::TokenList::Stream { public: FileStream(const std::string &filename) : file(fopen(filename.c_str(), "rb")) + , lastCh(0) + , lastStatus(0) { init(); } From 52b4b61a8fddc249382c81d30aa771955fe4dcd3 Mon Sep 17 00:00:00 2001 From: firewave Date: Thu, 6 Oct 2022 02:26:12 +0200 Subject: [PATCH 18/21] .clang-tidy: disabled `modernize-use-override` warning --- .clang-tidy | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.clang-tidy b/.clang-tidy index 244e860f..1005f909 100644 --- a/.clang-tidy +++ b/.clang-tidy @@ -1,5 +1,5 @@ --- -Checks: '*,-abseil-*,-altera-*,-android-*,-cert-*,-cppcoreguidelines-*,-fuchsia-*,-google-*,-hicpp-*,-linuxkernel-*,-llvm-*,-llvmlibc-*,-mpi-*,-objc-*,-openmp-*,-zircon-*,-misc-non-private-member-variables-in-classes,-modernize-avoid-c-arrays,-modernize-use-default-member-init,-modernize-use-using,-readability-braces-around-statements,-readability-function-size,-readability-implicit-bool-conversion,-readability-isolate-declaration,-readability-magic-numbers,-readability-simplify-boolean-expr,-readability-uppercase-literal-suffix,-modernize-use-auto,-modernize-use-trailing-return-type,-bugprone-branch-clone,-modernize-pass-by-value,-modernize-loop-convert,-modernize-use-emplace,-modernize-use-equals-default,-performance-noexcept-move-constructor,-modernize-use-equals-delete,-readability-identifier-length,-readability-function-cognitive-complexity,-modernize-return-braced-init-list,-misc-no-recursion,-bugprone-easily-swappable-parameters,-bugprone-narrowing-conversions,-concurrency-mt-unsafe,-modernize-loop-convert,-clang-analyzer-core.NullDereference,-performance-move-constructor-init,-performance-inefficient-string-concatenation,-performance-no-automatic-move' +Checks: '*,-abseil-*,-altera-*,-android-*,-cert-*,-cppcoreguidelines-*,-fuchsia-*,-google-*,-hicpp-*,-linuxkernel-*,-llvm-*,-llvmlibc-*,-mpi-*,-objc-*,-openmp-*,-zircon-*,-misc-non-private-member-variables-in-classes,-modernize-avoid-c-arrays,-modernize-use-default-member-init,-modernize-use-using,-readability-braces-around-statements,-readability-function-size,-readability-implicit-bool-conversion,-readability-isolate-declaration,-readability-magic-numbers,-readability-simplify-boolean-expr,-readability-uppercase-literal-suffix,-modernize-use-auto,-modernize-use-trailing-return-type,-bugprone-branch-clone,-modernize-pass-by-value,-modernize-loop-convert,-modernize-use-emplace,-modernize-use-equals-default,-performance-noexcept-move-constructor,-modernize-use-equals-delete,-readability-identifier-length,-readability-function-cognitive-complexity,-modernize-return-braced-init-list,-misc-no-recursion,-bugprone-easily-swappable-parameters,-bugprone-narrowing-conversions,-concurrency-mt-unsafe,-modernize-loop-convert,-clang-analyzer-core.NullDereference,-performance-move-constructor-init,-performance-inefficient-string-concatenation,-performance-no-automatic-move,-modernize-use-override' HeaderFilterRegex: '.*' WarningsAsErrors: '*' CheckOptions: From 7ff8c9899c203b7ddd48f34cbaf7c914138f28fa Mon Sep 17 00:00:00 2001 From: firewave Date: Thu, 6 Oct 2022 02:26:55 +0200 Subject: [PATCH 19/21] fixed `readability-inconsistent-declaration-parameter-name` clean-tidy warnings --- simplecpp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/simplecpp.h b/simplecpp.h index 4d3a85fd..804c6f70 100755 --- a/simplecpp.h +++ b/simplecpp.h @@ -282,7 +282,7 @@ namespace simplecpp { void constFoldLogicalOp(Token *tok); void constFoldQuestionOp(Token **tok1); - std::string readUntil(Stream &istr, const Location &location, char start, char end, OutputList *outputList); + std::string readUntil(Stream &stream, const Location &location, char start, char end, OutputList *outputList); void lineDirective(unsigned int fileIndex, unsigned int line, Location *location); std::string lastLine(int maxsize=100000) const; From c9605feec619bc1aa560e73d1d4a858cdc794062 Mon Sep 17 00:00:00 2001 From: firewave Date: Thu, 6 Oct 2022 02:37:26 +0200 Subject: [PATCH 20/21] fixed `-Wshadow` Clang compiler warning --- simplecpp.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/simplecpp.cpp b/simplecpp.cpp index c671533e..1fc4f1c7 100755 --- a/simplecpp.cpp +++ b/simplecpp.cpp @@ -317,9 +317,9 @@ class simplecpp::TokenList::Stream { // The UTF-16 BOM is 0xfffe or 0xfeff. if (ch1 >= 0xfe) { (void)get(); - const unsigned short bom = (static_cast(ch1) << 8); + const unsigned short byte = (static_cast(ch1) << 8); if (peek() >= 0xfe) - return bom | static_cast(get()); + return byte | static_cast(get()); unget(); return 0; } From 557099c46446fa20eff89ecd619ceec26772ee02 Mon Sep 17 00:00:00 2001 From: firewave Date: Sat, 25 Feb 2023 17:22:31 +0100 Subject: [PATCH 21/21] adjusted parameter order in added `TokenList()` constructor / added some barebones documentation --- main.cpp | 2 +- simplecpp.cpp | 6 +++--- simplecpp.h | 4 +++- 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/main.cpp b/main.cpp index dd61c76c..6592bf6c 100644 --- a/main.cpp +++ b/main.cpp @@ -105,7 +105,7 @@ int main(int argc, char **argv) rawtokens = new simplecpp::TokenList(f, files,filename,&outputList); } else { - rawtokens = new simplecpp::TokenList(files,filename,&outputList); + rawtokens = new simplecpp::TokenList(filename,files,&outputList); } rawtokens->removeComments(); std::map included = simplecpp::load(*rawtokens, files, dui, &outputList); diff --git a/simplecpp.cpp b/simplecpp.cpp index 1fc4f1c7..8f14020d 100755 --- a/simplecpp.cpp +++ b/simplecpp.cpp @@ -428,7 +428,7 @@ simplecpp::TokenList::TokenList(std::istream &istr, std::vector &fi readfile(stream,filename,outputList); } -simplecpp::TokenList::TokenList(std::vector &filenames, const std::string &filename, OutputList *outputList) +simplecpp::TokenList::TokenList(const std::string &filename, std::vector &filenames, OutputList *outputList) : frontToken(nullptr), backToken(nullptr), files(filenames) { FileStream stream(filename); @@ -3045,7 +3045,7 @@ std::map simplecpp::load(const simplecpp::To } fin.close(); - TokenList *tokenlist = new TokenList(filenames, filename, outputList); + TokenList *tokenlist = new TokenList(filename, filenames, outputList); if (!tokenlist->front()) { delete tokenlist; continue; @@ -3085,7 +3085,7 @@ std::map simplecpp::load(const simplecpp::To continue; f.close(); - TokenList *tokens = new TokenList(filenames, header2, outputList); + TokenList *tokens = new TokenList(header2, filenames, outputList); ret[header2] = tokens; if (tokens->front()) filelist.push_back(tokens->front()); diff --git a/simplecpp.h b/simplecpp.h index 804c6f70..5b918a98 100755 --- a/simplecpp.h +++ b/simplecpp.h @@ -196,8 +196,10 @@ namespace simplecpp { class Stream; explicit TokenList(std::vector &filenames); + /** generates a token list from the given std::istream parameter */ TokenList(std::istream &istr, std::vector &filenames, const std::string &filename=std::string(), OutputList *outputList = nullptr); - TokenList(std::vector &filenames, const std::string &filename, OutputList *outputList = nullptr); + /** generates a token list from the given filename parameter */ + TokenList(const std::string &filename, std::vector &filenames, OutputList *outputList = nullptr); TokenList(const TokenList &other); #if __cplusplus >= 201103L TokenList(TokenList &&other);