mirror of
https://github.com/TorqueGameEngines/Torque3D.git
synced 2026-01-20 12:44:46 +00:00
613 lines
15 KiB
C++
613 lines
15 KiB
C++
//-----------------------------------------------------------------------------
|
|
// Copyright (c) 2012 GarageGames, LLC
|
|
//
|
|
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
// of this software and associated documentation files (the "Software"), to
|
|
// deal in the Software without restriction, including without limitation the
|
|
// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
|
// sell copies of the Software, and to permit persons to whom the Software is
|
|
// furnished to do so, subject to the following conditions:
|
|
//
|
|
// The above copyright notice and this permission notice shall be included in
|
|
// all copies or substantial portions of the Software.
|
|
//
|
|
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
|
// IN THE SOFTWARE.
|
|
//-----------------------------------------------------------------------------
|
|
|
|
#include "core/tokenizer.h"
|
|
#include "platform/platform.h"
|
|
#include "core/stream/fileStream.h"
|
|
#include "core/strings/stringFunctions.h"
|
|
#include "core/util/safeDelete.h"
|
|
|
|
Tokenizer::Tokenizer()
|
|
{
|
|
dMemset(mFileName, 0, sizeof(mFileName));
|
|
|
|
mpBuffer = NULL;
|
|
mBufferSize = 0;
|
|
|
|
mStartPos = 0;
|
|
mCurrPos = 0;
|
|
|
|
mTokenIsQuoted = false;
|
|
|
|
dMemset(mCurrTokenBuffer, 0, sizeof(mCurrTokenBuffer));
|
|
mTokenIsCurrent = false;
|
|
|
|
mSingleTokens = NULL;
|
|
|
|
VECTOR_SET_ASSOCIATION(mLinePositions);
|
|
}
|
|
|
|
Tokenizer::~Tokenizer()
|
|
{
|
|
clear();
|
|
}
|
|
|
|
bool Tokenizer::openFile(const char* pFileName)
|
|
{
|
|
AssertFatal(mFileName[0] == '\0', "Reuse of Tokenizers not allowed!");
|
|
|
|
FileStream* pStream = new FileStream;
|
|
if (pStream->open(pFileName, Torque::FS::File::Read) == false)
|
|
{
|
|
delete pStream;
|
|
return false;
|
|
}
|
|
dStrcpy(mFileName, pFileName);
|
|
|
|
mBufferSize = pStream->getStreamSize();
|
|
mpBuffer = new char[mBufferSize];
|
|
pStream->read(mBufferSize, mpBuffer);
|
|
pStream->close();
|
|
delete pStream;
|
|
|
|
reset();
|
|
|
|
buildLinePositions();
|
|
|
|
return true;
|
|
}
|
|
|
|
bool Tokenizer::openFile(Stream* pStream)
|
|
{
|
|
mBufferSize = pStream->getStreamSize();
|
|
mpBuffer = new char[mBufferSize];
|
|
pStream->read(mBufferSize, mpBuffer);
|
|
|
|
reset();
|
|
|
|
buildLinePositions();
|
|
|
|
return true;
|
|
}
|
|
|
|
void Tokenizer::setBuffer(const char* buffer, U32 bufferSize)
|
|
{
|
|
if (mpBuffer)
|
|
{
|
|
SAFE_DELETE_ARRAY(mpBuffer);
|
|
mBufferSize = 0;
|
|
}
|
|
|
|
mBufferSize = bufferSize;
|
|
mpBuffer = new char[mBufferSize + 1];
|
|
dStrcpy(mpBuffer, buffer);
|
|
|
|
reset();
|
|
|
|
buildLinePositions();
|
|
}
|
|
|
|
void Tokenizer::setSingleTokens(const char* singleTokens)
|
|
{
|
|
if (mSingleTokens)
|
|
SAFE_DELETE(mSingleTokens);
|
|
|
|
if (singleTokens)
|
|
mSingleTokens = dStrdup(singleTokens);
|
|
}
|
|
|
|
bool Tokenizer::reset()
|
|
{
|
|
mStartPos = 0;
|
|
mCurrPos = 0;
|
|
|
|
mTokenIsQuoted = false;
|
|
|
|
dMemset(mCurrTokenBuffer, 0, sizeof(mCurrTokenBuffer));
|
|
mTokenIsCurrent = false;
|
|
|
|
return true;
|
|
}
|
|
|
|
bool Tokenizer::clear()
|
|
{
|
|
// Delete our buffer
|
|
if (mpBuffer)
|
|
SAFE_DELETE_ARRAY(mpBuffer);
|
|
|
|
// Reset the buffer size
|
|
mBufferSize = 0;
|
|
|
|
// Reset our active data
|
|
reset();
|
|
|
|
// Clear our line positions
|
|
mLinePositions.clear();
|
|
|
|
// Reset our file name
|
|
dMemset(mFileName, 0, 1024);
|
|
|
|
// Wipe the single tokens
|
|
setSingleTokens(NULL);
|
|
|
|
return true;
|
|
}
|
|
|
|
bool Tokenizer::setCurrentPos(U32 pos)
|
|
{
|
|
mCurrPos = pos;
|
|
mTokenIsCurrent = false;
|
|
|
|
return advanceToken(true);
|
|
}
|
|
|
|
void Tokenizer::buildLinePositions()
|
|
{
|
|
if (mBufferSize == 0)
|
|
return;
|
|
|
|
// We can safely assume that the first line is at position 0
|
|
mLinePositions.push_back(0);
|
|
|
|
U32 currPos = 0;
|
|
while (currPos + 1 < mBufferSize)
|
|
{
|
|
// Windows line ending
|
|
if (mpBuffer[currPos] == '\r' && mpBuffer[currPos + 1] == '\n')
|
|
{
|
|
currPos += 2;
|
|
|
|
mLinePositions.push_back(currPos);
|
|
}
|
|
// Not sure if this ever happens but just in case
|
|
else if (mpBuffer[currPos] == '\n' && mpBuffer[currPos + 1] == '\r')
|
|
{
|
|
currPos += 2;
|
|
|
|
mLinePositions.push_back(currPos);
|
|
}
|
|
// Unix line endings should only have a single line break character
|
|
else if (mpBuffer[currPos] == '\n' || mpBuffer[currPos] == '\r')
|
|
{
|
|
currPos++;
|
|
|
|
mLinePositions.push_back(currPos);
|
|
}
|
|
else
|
|
currPos++;
|
|
}
|
|
}
|
|
|
|
U32 Tokenizer::getLinePosition(const U32 pos, U32 lowIndex, S32 highIndex)
|
|
{
|
|
// If we have one or less lines then
|
|
// the result is easy
|
|
if (mLinePositions.size() <= 1)
|
|
return 0;
|
|
|
|
// Now that we know we have at least one position
|
|
// we can do a quick test against the last line
|
|
if (pos >= mLinePositions.last())
|
|
return mLinePositions.size() - 1;
|
|
|
|
// If this is the beginning of the search
|
|
// set a good starting point (the middle)
|
|
if (highIndex < 0)
|
|
highIndex = mLinePositions.size() - 1;
|
|
|
|
// Just in case bad values got handed in
|
|
if (lowIndex > highIndex)
|
|
lowIndex = highIndex;
|
|
|
|
// Compute our test index (middle)
|
|
U32 testIndex = (lowIndex + highIndex) / 2;
|
|
|
|
// Make sure that our test indices are valid
|
|
if (testIndex >= mLinePositions.size() ||
|
|
testIndex + 1 >= mLinePositions.size())
|
|
return mLinePositions.size() - 1;
|
|
|
|
// See if we are already at the right line
|
|
if (pos >= mLinePositions[testIndex] && pos < mLinePositions[testIndex + 1])
|
|
return testIndex;
|
|
|
|
if (pos < mLinePositions[testIndex])
|
|
highIndex = testIndex;
|
|
else
|
|
lowIndex = testIndex;
|
|
|
|
return getLinePosition(pos, lowIndex, highIndex);
|
|
}
|
|
|
|
U32 Tokenizer::getCurrentLine()
|
|
{
|
|
// Binary search for the line number whose
|
|
// position is equal to or lower than the
|
|
// current position
|
|
return getLinePosition(mStartPos);
|
|
}
|
|
|
|
U32 Tokenizer::getTokenLineOffset()
|
|
{
|
|
U32 lineNumber = getCurrentLine();
|
|
|
|
if (lineNumber >= mLinePositions.size())
|
|
return 0;
|
|
|
|
U32 linePosition = mLinePositions[lineNumber];
|
|
|
|
if (linePosition >= mStartPos)
|
|
return 0;
|
|
|
|
return mStartPos - linePosition;
|
|
}
|
|
|
|
bool Tokenizer::advanceToken(const bool crossLine, const bool assertAvail)
|
|
{
|
|
if (mTokenIsCurrent == true)
|
|
{
|
|
AssertFatal(mCurrTokenBuffer[0] != '\0', "No token, but marked as current?");
|
|
mTokenIsCurrent = false;
|
|
return true;
|
|
}
|
|
|
|
U32 currPosition = 0;
|
|
mCurrTokenBuffer[0] = '\0';
|
|
|
|
mTokenIsQuoted = false;
|
|
|
|
// Store the beginning of the previous advance
|
|
// and the beginning of the current advance
|
|
mStartPos = mCurrPos;
|
|
|
|
while (mCurrPos < mBufferSize)
|
|
{
|
|
char c = mpBuffer[mCurrPos];
|
|
|
|
bool cont = true;
|
|
|
|
if (mSingleTokens && dStrchr(mSingleTokens, c))
|
|
{
|
|
if (currPosition == 0)
|
|
{
|
|
mCurrTokenBuffer[currPosition++] = c;
|
|
mCurrPos++;
|
|
cont = false;
|
|
break;
|
|
}
|
|
else
|
|
{
|
|
// End of token
|
|
cont = false;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
switch (c)
|
|
{
|
|
case ' ':
|
|
case '\t':
|
|
if (currPosition == 0)
|
|
{
|
|
// Token hasn't started yet...
|
|
mCurrPos++;
|
|
}
|
|
else
|
|
{
|
|
// End of token
|
|
mCurrPos++;
|
|
cont = false;
|
|
}
|
|
break;
|
|
|
|
case '\r':
|
|
case '\n':
|
|
if (crossLine == true)
|
|
{
|
|
// Windows line ending
|
|
if (mpBuffer[mCurrPos] == '\r' && mpBuffer[mCurrPos + 1] == '\n')
|
|
mCurrPos += 2;
|
|
// Not sure if this ever happens but just in case
|
|
else if (mpBuffer[mCurrPos] == '\n' && mpBuffer[mCurrPos + 1] == '\r')
|
|
mCurrPos += 2;
|
|
// Unix line endings should only have a single line break character
|
|
else
|
|
mCurrPos++;
|
|
}
|
|
else
|
|
{
|
|
cont = false;
|
|
break;
|
|
}
|
|
break;
|
|
|
|
default:
|
|
if (c == '\"' || c == '\'')
|
|
{
|
|
// Quoted token
|
|
U32 startLine = getCurrentLine();
|
|
mCurrPos++;
|
|
|
|
// Store the beginning of the token
|
|
mStartPos = mCurrPos;
|
|
|
|
while (mpBuffer[mCurrPos] != c)
|
|
{
|
|
AssertISV(mCurrPos < mBufferSize,
|
|
avar("End of file before quote closed. Quote started: (%s: %d)",
|
|
getFileName(), startLine));
|
|
AssertISV((mpBuffer[mCurrPos] != '\n' && mpBuffer[mCurrPos] != '\r'),
|
|
avar("End of line reached before end of quote. Quote started: (%s: %d)",
|
|
getFileName(), startLine));
|
|
|
|
mCurrTokenBuffer[currPosition++] = mpBuffer[mCurrPos++];
|
|
}
|
|
|
|
mTokenIsQuoted = true;
|
|
|
|
mCurrPos++;
|
|
cont = false;
|
|
}
|
|
else if (c == '/' && mpBuffer[mCurrPos+1] == '/')
|
|
{
|
|
// Line quote...
|
|
if (currPosition == 0)
|
|
{
|
|
// continue to end of line, then let crossLine determine on the next pass
|
|
while (mCurrPos < mBufferSize && (mpBuffer[mCurrPos] != '\n' && mpBuffer[mCurrPos] != '\r'))
|
|
mCurrPos++;
|
|
}
|
|
else
|
|
{
|
|
// This is the end of the token. Continue to EOL
|
|
while (mCurrPos < mBufferSize && (mpBuffer[mCurrPos] != '\n' && mpBuffer[mCurrPos] != '\r'))
|
|
mCurrPos++;
|
|
cont = false;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
// If this is the first non-token character then store the
|
|
// beginning of the token
|
|
if (currPosition == 0)
|
|
mStartPos = mCurrPos;
|
|
|
|
mCurrTokenBuffer[currPosition++] = c;
|
|
mCurrPos++;
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (cont == false)
|
|
break;
|
|
}
|
|
|
|
mCurrTokenBuffer[currPosition] = '\0';
|
|
|
|
if (assertAvail == true)
|
|
AssertISV(currPosition != 0, avar("Error parsing: %s at or around line: %d", getFileName(), getCurrentLine()));
|
|
|
|
if (mCurrPos == mBufferSize)
|
|
return false;
|
|
|
|
return true;
|
|
}
|
|
|
|
bool Tokenizer::regressToken(const bool crossLine)
|
|
{
|
|
if (mTokenIsCurrent == true)
|
|
{
|
|
AssertFatal(mCurrTokenBuffer[0] != '\0', "No token, but marked as current?");
|
|
mTokenIsCurrent = false;
|
|
return true;
|
|
}
|
|
|
|
U32 currPosition = 0;
|
|
mCurrTokenBuffer[0] = '\0';
|
|
|
|
mTokenIsQuoted = false;
|
|
|
|
// Store the beginning of the previous advance
|
|
// and the beginning of the current advance
|
|
mCurrPos = mStartPos;
|
|
|
|
// Back up to the first character of the previous token
|
|
mStartPos--;
|
|
|
|
while (mStartPos > 0)
|
|
{
|
|
char c = mpBuffer[mStartPos];
|
|
|
|
bool cont = true;
|
|
|
|
if (mSingleTokens && dStrchr(mSingleTokens, c))
|
|
{
|
|
if (currPosition == 0)
|
|
{
|
|
mCurrTokenBuffer[currPosition++] = c;
|
|
mStartPos--;
|
|
cont = false;
|
|
break;
|
|
}
|
|
else
|
|
{
|
|
// End of token
|
|
cont = false;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
switch (c)
|
|
{
|
|
case ' ':
|
|
case '\t':
|
|
if (currPosition == 0)
|
|
{
|
|
// Token hasn't started yet...
|
|
mStartPos--;
|
|
}
|
|
else
|
|
{
|
|
// End of token
|
|
mStartPos--;
|
|
cont = false;
|
|
}
|
|
break;
|
|
|
|
case '\r':
|
|
case '\n':
|
|
if (crossLine == true && currPosition == 0)
|
|
{
|
|
// Windows line ending
|
|
if (mStartPos > 0 && mpBuffer[mStartPos] == '\r' && mpBuffer[mStartPos - 1] == '\n')
|
|
mStartPos -= 2;
|
|
// Not sure if this ever happens but just in case
|
|
else if (mStartPos > 0 && mpBuffer[mStartPos] == '\n' && mpBuffer[mStartPos - 1] == '\r')
|
|
mStartPos -= 2;
|
|
// Unix line endings should only have a single line break character
|
|
else
|
|
mStartPos--;
|
|
}
|
|
else
|
|
{
|
|
cont = false;
|
|
break;
|
|
}
|
|
break;
|
|
|
|
default:
|
|
if (c == '\"' || c == '\'')
|
|
{
|
|
// Quoted token
|
|
U32 endLine = getCurrentLine();
|
|
mStartPos--;
|
|
|
|
while (mpBuffer[mStartPos] != c)
|
|
{
|
|
AssertISV(mStartPos < 0,
|
|
avar("Beginning of file reached before finding begin quote. Quote ended: (%s: %d)",
|
|
getFileName(), endLine));
|
|
|
|
mCurrTokenBuffer[currPosition++] = mpBuffer[mStartPos--];
|
|
}
|
|
|
|
mTokenIsQuoted = true;
|
|
|
|
mStartPos--;
|
|
cont = false;
|
|
}
|
|
else if (c == '/' && mStartPos > 0 && mpBuffer[mStartPos - 1] == '/')
|
|
{
|
|
// Line quote...
|
|
// Clear out anything saved already
|
|
currPosition = 0;
|
|
|
|
mStartPos -= 2;
|
|
}
|
|
else
|
|
{
|
|
mCurrTokenBuffer[currPosition++] = c;
|
|
mStartPos--;
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (cont == false)
|
|
break;
|
|
}
|
|
|
|
mCurrTokenBuffer[currPosition] = '\0';
|
|
|
|
// Reveres the token
|
|
for (U32 i = 0; i < currPosition / 2; i++)
|
|
{
|
|
char c = mCurrTokenBuffer[i];
|
|
mCurrTokenBuffer[i] = mCurrTokenBuffer[currPosition - i - 1];
|
|
mCurrTokenBuffer[currPosition - i - 1] = c;
|
|
}
|
|
|
|
mStartPos++;
|
|
|
|
if (mStartPos == mCurrPos)
|
|
return false;
|
|
|
|
return true;
|
|
}
|
|
|
|
bool Tokenizer::tokenAvailable()
|
|
{
|
|
// Note: this implies that when advanceToken(false) fails, it must cap the
|
|
// token buffer.
|
|
//
|
|
return mCurrTokenBuffer[0] != '\0';
|
|
}
|
|
|
|
const char* Tokenizer::getToken() const
|
|
{
|
|
return mCurrTokenBuffer;
|
|
}
|
|
|
|
const char* Tokenizer::getNextToken()
|
|
{
|
|
advanceToken(true);
|
|
|
|
return getToken();
|
|
}
|
|
|
|
bool Tokenizer::tokenICmp(const char* pCmp) const
|
|
{
|
|
return dStricmp(mCurrTokenBuffer, pCmp) == 0;
|
|
}
|
|
|
|
bool Tokenizer::findToken(U32 start, const char* pCmp)
|
|
{
|
|
// Move to the start
|
|
setCurrentPos(start);
|
|
|
|
// In case the first token is what we are looking for
|
|
if (tokenICmp(pCmp))
|
|
return true;
|
|
|
|
// Loop through the file and see if the token exists
|
|
while (advanceToken(true))
|
|
{
|
|
if (tokenICmp(pCmp))
|
|
return true;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
bool Tokenizer::findToken(const char* pCmp)
|
|
{
|
|
return findToken(0, pCmp);
|
|
}
|
|
|
|
bool Tokenizer::endOfFile()
|
|
{
|
|
if (mCurrPos < mBufferSize)
|
|
return false;
|
|
else
|
|
return true;
|
|
} |