diff --git a/Engine/source/persistence/taml/fsTinyXml.cpp b/Engine/source/persistence/taml/fsTinyXml.cpp index e0dc53572..961d96b0e 100644 --- a/Engine/source/persistence/taml/fsTinyXml.cpp +++ b/Engine/source/persistence/taml/fsTinyXml.cpp @@ -26,6 +26,41 @@ #include "console/console.h" +VfsXMLPrinter::VfsXMLPrinter(FileStream& stream, bool compact, int depth) + : XMLPrinter(NULL, compact, depth), + m_Stream(stream) +{ +} + +VfsXMLPrinter::~VfsXMLPrinter() +{ + m_Stream.flush(); + m_Stream.close(); +} + + +// Add VFS friendly implementations of output functions + +void VfsXMLPrinter::Print(const char* format, ...) +{ + va_list va; + va_start(va, format); + + m_Stream.writeFormattedBuffer(format, va); + + va_end(va); +} + +void VfsXMLPrinter::Write(const char* data, size_t size) +{ + m_Stream.write(size, data); +} + +void VfsXMLPrinter::Putc(char ch) +{ + m_Stream.write(static_cast(ch)); +} + bool VfsXMLDocument::LoadFile(const char* pFilename) { // Expand the file-path. @@ -61,6 +96,114 @@ bool VfsXMLDocument::LoadFile(const char* pFilename) return true; } +bool VfsXMLDocument::LoadFile(FileStream& stream) +{ + // Delete the existing data: + Clear(); + // Clear shadowed error + ClearError(); + //TODO: Can't clear location, investigate if this gives issues. + //doc.location.Clear(); + + // Get the file size, so we can pre-allocate the string. HUGE speed impact. + long length = stream.getStreamSize(); + + // Strange case, but good to handle up front. + if (length <= 0) + { + SetError(tinyxml2::XML_ERROR_EMPTY_DOCUMENT, 0, 0); + return false; + } + + // Subtle bug here. TinyXml did use fgets. But from the XML spec: + // 2.11 End-of-Line Handling + // + // + // ...the XML processor MUST behave as if it normalized all line breaks in external + // parsed entities (including the document entity) on input, before parsing, by translating + // both the two-character sequence #xD #xA and any #xD that is not followed by #xA to + // a single #xA character. + // + // + // It is not clear fgets does that, and certainly isn't clear it works cross platform. + // Generally, you expect fgets to translate from the convention of the OS to the c/unix + // convention, and not work generally. + + /* + while( fgets( buf, sizeof(buf), file ) ) + { + data += buf; + } + */ + + char* buf = new char[length + 1]; + buf[0] = 0; + + if (!stream.read(length, buf)) + { + delete[] buf; + SetError(tinyxml2::XML_ERROR_FILE_COULD_NOT_BE_OPENED, 0, 0); + return false; + } + + // Process the buffer in place to normalize new lines. (See comment above.) + // Copies from the 'p' to 'q' pointer, where p can advance faster if + // a newline-carriage return is hit. + // + // Wikipedia: + // Systems based on ASCII or a compatible character set use either LF (Line feed, '\n', 0x0A, 10 in decimal) or + // CR (Carriage return, '\r', 0x0D, 13 in decimal) individually, or CR followed by LF (CR+LF, 0x0D 0x0A)... + // * LF: Multics, Unix and Unix-like systems (GNU/Linux, AIX, Xenix, Mac OS X, FreeBSD, etc.), BeOS, Amiga, RISC OS, and others + // * CR+LF: DEC RT-11 and most other early non-Unix, non-IBM OSes, CP/M, MP/M, DOS, OS/2, Microsoft Windows, Symbian OS + // * CR: Commodore 8-bit machines, Apple II family, Mac OS up to version 9 and OS-9 + + const char* p = buf; // the read head + char* q = buf; // the write head + const char CR = 0x0d; + const char LF = 0x0a; + + buf[length] = 0; + while (*p) + { + assert(p < (buf + length)); + assert(q <= (buf + length)); + assert(q <= p); + + if (*p == CR) + { + *q++ = LF; + p++; + if (*p == LF) + { + // check for CR+LF (and skip LF) + p++; + } + } + else + { + *q++ = *p++; + } + } + assert(q <= (buf + length)); + *q = 0; + + Parse(buf, length); + + delete[] buf; + return !Error(); +} + +bool VfsXMLDocument::SaveFile(FileStream& stream) +{ + // Clear any error from the last save, otherwise it will get reported + // for *this* call. + ClearError(); + VfsXMLPrinter printer(stream, false, 0); + PrettyXMLPrinter prettyPrinter(printer); + Print(&prettyPrinter); + return !Error(); +} + bool VfsXMLDocument::SaveFile(const char* pFilename) { // Expand the file-name into the file-path buffer. @@ -119,141 +262,110 @@ void VfsXMLDocument::SetError(tinyxml2::XMLError error, int lineNum, const char* delete[] buffer; } -VfsXMLPrinter::VfsXMLPrinter(FileStream& stream, bool compact, int depth) - : XMLPrinter(NULL, compact, depth), - m_Stream(stream) + +// Overwrite Visitation of elements to add newlines before attributes +PrettyXMLPrinter::PrettyXMLPrinter(VfsXMLPrinter& innerPrinter, int depth) + : mInnerPrinter(innerPrinter), + mDepth(depth) { -} - -VfsXMLPrinter::~VfsXMLPrinter() -{ - m_Stream.flush(); - m_Stream.close(); -} - -void VfsXMLPrinter::Print(const char* format, ...) -{ - va_list va; - va_start(va, format); - - m_Stream.writeFormattedBuffer(format, va); - - va_end(va); -} - -void VfsXMLPrinter::Write(const char* data, size_t size) -{ - m_Stream.write(size, data); -} - -void VfsXMLPrinter::Putc(char ch) -{ - m_Stream.write(static_cast(ch)); -} - -bool VfsXMLDocument::LoadFile(FileStream& stream) -{ - // Delete the existing data: - Clear(); - // Clear shadowed error - ClearError(); - //TODO: Can't clear location, investigate if this gives issues. - //doc.location.Clear(); - - // Get the file size, so we can pre-allocate the string. HUGE speed impact. - long length = stream.getStreamSize(); - - // Strange case, but good to handle up front. - if (length <= 0) - { - SetError(tinyxml2::XML_ERROR_EMPTY_DOCUMENT, 0, 0); - return false; + for (int i = 0; i < ENTITY_RANGE; ++i) { + mEntityFlag[i] = false; + mRestrictedEntityFlag[i] = false; } - - // Subtle bug here. TinyXml did use fgets. But from the XML spec: - // 2.11 End-of-Line Handling - // - // - // ...the XML processor MUST behave as if it normalized all line breaks in external - // parsed entities (including the document entity) on input, before parsing, by translating - // both the two-character sequence #xD #xA and any #xD that is not followed by #xA to - // a single #xA character. - // - // - // It is not clear fgets does that, and certainly isn't clear it works cross platform. - // Generally, you expect fgets to translate from the convention of the OS to the c/unix - // convention, and not work generally. - - /* - while( fgets( buf, sizeof(buf), file ) ) - { - data += buf; + for (int i = 0; i < NUM_ENTITIES; ++i) { + const char entityValue = entities[i].value; + const unsigned char flagIndex = static_cast(entityValue); + TIXMLASSERT(flagIndex < ENTITY_RANGE); + mEntityFlag[flagIndex] = true; } - */ + mRestrictedEntityFlag[static_cast('&')] = true; + mRestrictedEntityFlag[static_cast('<')] = true; + mRestrictedEntityFlag[static_cast('>')] = true; // not required, but consistency is nice +} - char* buf = new char[length + 1]; - buf[0] = 0; +void PrettyXMLPrinter::PrintString(const char* p, bool restricted) +{ + // Look for runs of bytes between entities to print. + const char* q = p; - if (!stream.read(length, buf)) - { - delete [] buf; - SetError(tinyxml2::XML_ERROR_FILE_COULD_NOT_BE_OPENED, 0, 0); - return false; - } - - // Process the buffer in place to normalize new lines. (See comment above.) - // Copies from the 'p' to 'q' pointer, where p can advance faster if - // a newline-carriage return is hit. - // - // Wikipedia: - // Systems based on ASCII or a compatible character set use either LF (Line feed, '\n', 0x0A, 10 in decimal) or - // CR (Carriage return, '\r', 0x0D, 13 in decimal) individually, or CR followed by LF (CR+LF, 0x0D 0x0A)... - // * LF: Multics, Unix and Unix-like systems (GNU/Linux, AIX, Xenix, Mac OS X, FreeBSD, etc.), BeOS, Amiga, RISC OS, and others - // * CR+LF: DEC RT-11 and most other early non-Unix, non-IBM OSes, CP/M, MP/M, DOS, OS/2, Microsoft Windows, Symbian OS - // * CR: Commodore 8-bit machines, Apple II family, Mac OS up to version 9 and OS-9 - - const char* p = buf; // the read head - char* q = buf; // the write head - const char CR = 0x0d; - const char LF = 0x0a; - - buf[length] = 0; - while (*p) - { - assert(p < (buf+length)); - assert(q <= (buf+length)); - assert(q <= p); - - if (*p == CR) - { - *q++ = LF; - p++; - if (*p == LF) - { - // check for CR+LF (and skip LF) - p++; + if (mProcessEntities) { + const bool* flag = restricted ? mRestrictedEntityFlag : mEntityFlag; + while (*q) { + TIXMLASSERT(p <= q); + // Remember, char is sometimes signed. (How many times has that bitten me?) + if (*q > 0 && *q < ENTITY_RANGE) { + // Check for entities. If one is found, flush + // the stream up until the entity, write the + // entity, and keep looking. + if (flag[static_cast(*q)]) { + while (p < q) { + const size_t delta = q - p; + const int toPrint = (INT_MAX < delta) ? INT_MAX : static_cast(delta); + mInnerPrinter.Write(p, toPrint); + p += toPrint; + } + bool entityPatternPrinted = false; + for (int i = 0; i < NUM_ENTITIES; ++i) { + if (entities[i].value == *q) { + mInnerPrinter.Putc('&'); + mInnerPrinter.Write(entities[i].pattern, entities[i].length); + mInnerPrinter.Putc(';'); + entityPatternPrinted = true; + break; + } + } + if (!entityPatternPrinted) { + // TIXMLASSERT( entityPatternPrinted ) causes gcc -Wunused-but-set-variable in release + TIXMLASSERT(false); + } + ++p; + } } + ++q; + TIXMLASSERT(p <= q); } - else - { - *q++ = *p++; + // Flush the remaining string. This will be the entire + // string if an entity wasn't found. + if (p < q) { + const size_t delta = q - p; + const int toPrint = (INT_MAX < delta) ? INT_MAX : static_cast(delta); + mInnerPrinter.Write(p, toPrint); } } - assert(q <= (buf+length)); - *q = 0; - - Parse(buf, length); - - delete [] buf; - return !Error(); + else { + mInnerPrinter.Write(p); + } } -bool VfsXMLDocument::SaveFile(FileStream& stream) +bool PrettyXMLPrinter::VisitEnter(const tinyxml2::XMLElement& element, const tinyxml2::XMLAttribute* attribute) { - // Clear any error from the last save, otherwise it will get reported - // for *this* call. - ClearError(); - VfsXMLPrinter printer(stream, false, 0); - Print(&printer); - return !Error(); + const tinyxml2::XMLElement* parentElem = 0; + if (element.Parent()) { + parentElem = element.Parent()->ToElement(); + } + const bool compactMode = parentElem ? mInnerPrinter.CompactMode(*parentElem) : mInnerPrinter.CompactMode(element); + mInnerPrinter.OpenElement(element.Name(), compactMode); + mDepth++; + while (attribute) { + PushAttribute(attribute->Name(), attribute->Value(), compactMode); + attribute = attribute->Next(); + } + return true; +} + +void PrettyXMLPrinter::PushAttribute(const char* name, const char* value, bool compactMode) +{ + if (compactMode) + { + mInnerPrinter.Putc(' '); + } + else + { + mInnerPrinter.Putc('\n'); + mInnerPrinter.PrintSpace(mDepth); + } + mInnerPrinter.Write(name); + mInnerPrinter.Write("=\""); + PrintString(value, false); + mInnerPrinter.Putc('\"'); } diff --git a/Engine/source/persistence/taml/fsTinyXml.h b/Engine/source/persistence/taml/fsTinyXml.h index 2a83cb614..c4a0d55c2 100644 --- a/Engine/source/persistence/taml/fsTinyXml.h +++ b/Engine/source/persistence/taml/fsTinyXml.h @@ -40,9 +40,18 @@ public: VfsXMLPrinter(FileStream& stream, bool compact = false, int depth = 0); ~VfsXMLPrinter() override; + // Re-implement protected functionality in TinyXML2 library, and make it public + // (This is a bit dirty, but it's necessary for the PrettyXMLPrinter) + bool CompactMode(const tinyxml2::XMLElement& element) override { return tinyxml2::XMLPrinter::CompactMode(element); } + void PrintSpace(int depth) override { tinyxml2::XMLPrinter::PrintSpace(depth); } + inline void Write(const char* data) { Write(data, strlen(data)); } + + // Add VFS friendly implementations of output functions void Print(const char* format, ...) override; void Write(const char* data, size_t size) override; void Putc(char ch) override; + + // Accept a virtual FileStream instead of a FILE pointer FileStream& m_Stream; }; @@ -127,4 +136,103 @@ public: } }; +class PrettyXMLPrinter : public tinyxml2::XMLPrinter +{ + // Re-implement private functionality in TinyXML2 + static const char LINE_FEED = static_cast(0x0a); // all line endings are normalized to LF + static const char LF = LINE_FEED; + static const char CARRIAGE_RETURN = static_cast(0x0d); // CR gets filtered out + static const char CR = CARRIAGE_RETURN; + static const char SINGLE_QUOTE = '\''; + static const char DOUBLE_QUOTE = '\"'; + + struct Entity + { + const char* pattern; + int length; + char value; + }; + + static const int NUM_ENTITIES = 5; + static constexpr Entity entities[NUM_ENTITIES] = { + {"quot", 4, DOUBLE_QUOTE}, + {"amp", 3, '&'}, + {"apos", 4, SINGLE_QUOTE}, + {"lt", 2, '<'}, + {"gt", 2, '>'} + }; +public: + PrettyXMLPrinter(VfsXMLPrinter& innerPrinter, int depth = 0); + + /// Visit a document. + virtual bool VisitEnter(const tinyxml2::XMLDocument& doc) + { + mProcessEntities = doc.ProcessEntities(); + return mInnerPrinter.VisitEnter(doc); + } + + /// Visit a document. + virtual bool VisitExit(const tinyxml2::XMLDocument& doc) + { + return mInnerPrinter.VisitExit(doc); + } + + /// Visit an element. + virtual bool VisitEnter(const tinyxml2::XMLElement& element, const tinyxml2::XMLAttribute* firstAttribute); + /// Visit an element. + virtual bool VisitExit(const tinyxml2::XMLElement& element) + { + mDepth--; + return mInnerPrinter.VisitExit(element); + } + + /// Visit a declaration. + virtual bool Visit(const tinyxml2::XMLDeclaration& declaration) + { + return mInnerPrinter.Visit(declaration); + } + + /// Visit a text node. + virtual bool Visit(const tinyxml2::XMLText& text) + { + return mInnerPrinter.Visit(text); + } + + /// Visit a comment node. + virtual bool Visit(const tinyxml2::XMLComment& comment) + { + return mInnerPrinter.Visit(comment); + } + + /// Visit an unknown node. + virtual bool Visit(const tinyxml2::XMLUnknown& unknown) + { + return mInnerPrinter.Visit(unknown); + } + + void PushAttribute(const char* name, const char* value, bool compactMode); + + // Re-implement private functionality in TinyXML2 library, this is just a copy-paste job + void PrintString(const char*, bool restrictedEntitySet); // prints out, after detecting entities. + + // The inner printer we are wrapping, we only support VfsXMLPrinter based classes because + // stock tinyxml printer is very closed + VfsXMLPrinter& mInnerPrinter; + + // Track private fields that are necessary for private functionality in TinyXML2 + int mDepth; + bool mProcessEntities; + bool mCompactMode; + + enum + { + ENTITY_RANGE = 64, + BUF_SIZE = 200 + }; + + bool mEntityFlag[ENTITY_RANGE]; + bool mRestrictedEntityFlag[ENTITY_RANGE]; +}; + + #endif //_FSTINYXML_H_