Torque3D/Engine/source/core/tokenizer.cpp
2012-09-19 11:15:01 -04:00

613 lines
15 KiB
C++

//-----------------------------------------------------------------------------
// Copyright (c) 2012 GarageGames, LLC
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal in the Software without restriction, including without limitation the
// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
// sell copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
// IN THE SOFTWARE.
//-----------------------------------------------------------------------------
#include "core/tokenizer.h"
#include "platform/platform.h"
#include "core/stream/fileStream.h"
#include "core/strings/stringFunctions.h"
#include "core/util/safeDelete.h"
Tokenizer::Tokenizer()
{
dMemset(mFileName, 0, sizeof(mFileName));
mpBuffer = NULL;
mBufferSize = 0;
mStartPos = 0;
mCurrPos = 0;
mTokenIsQuoted = false;
dMemset(mCurrTokenBuffer, 0, sizeof(mCurrTokenBuffer));
mTokenIsCurrent = false;
mSingleTokens = NULL;
VECTOR_SET_ASSOCIATION(mLinePositions);
}
Tokenizer::~Tokenizer()
{
clear();
}
bool Tokenizer::openFile(const char* pFileName)
{
AssertFatal(mFileName[0] == '\0', "Reuse of Tokenizers not allowed!");
FileStream* pStream = new FileStream;
if (pStream->open(pFileName, Torque::FS::File::Read) == false)
{
delete pStream;
return false;
}
dStrcpy(mFileName, pFileName);
mBufferSize = pStream->getStreamSize();
mpBuffer = new char[mBufferSize];
pStream->read(mBufferSize, mpBuffer);
pStream->close();
delete pStream;
reset();
buildLinePositions();
return true;
}
bool Tokenizer::openFile(Stream* pStream)
{
mBufferSize = pStream->getStreamSize();
mpBuffer = new char[mBufferSize];
pStream->read(mBufferSize, mpBuffer);
reset();
buildLinePositions();
return true;
}
void Tokenizer::setBuffer(const char* buffer, U32 bufferSize)
{
if (mpBuffer)
{
SAFE_DELETE_ARRAY(mpBuffer);
mBufferSize = 0;
}
mBufferSize = bufferSize;
mpBuffer = new char[mBufferSize + 1];
dStrcpy(mpBuffer, buffer);
reset();
buildLinePositions();
}
void Tokenizer::setSingleTokens(const char* singleTokens)
{
if (mSingleTokens)
SAFE_DELETE(mSingleTokens);
if (singleTokens)
mSingleTokens = dStrdup(singleTokens);
}
bool Tokenizer::reset()
{
mStartPos = 0;
mCurrPos = 0;
mTokenIsQuoted = false;
dMemset(mCurrTokenBuffer, 0, sizeof(mCurrTokenBuffer));
mTokenIsCurrent = false;
return true;
}
bool Tokenizer::clear()
{
// Delete our buffer
if (mpBuffer)
SAFE_DELETE_ARRAY(mpBuffer);
// Reset the buffer size
mBufferSize = 0;
// Reset our active data
reset();
// Clear our line positions
mLinePositions.clear();
// Reset our file name
dMemset(mFileName, 0, 1024);
// Wipe the single tokens
setSingleTokens(NULL);
return true;
}
bool Tokenizer::setCurrentPos(U32 pos)
{
mCurrPos = pos;
mTokenIsCurrent = false;
return advanceToken(true);
}
void Tokenizer::buildLinePositions()
{
if (mBufferSize == 0)
return;
// We can safely assume that the first line is at position 0
mLinePositions.push_back(0);
U32 currPos = 0;
while (currPos + 1 < mBufferSize)
{
// Windows line ending
if (mpBuffer[currPos] == '\r' && mpBuffer[currPos + 1] == '\n')
{
currPos += 2;
mLinePositions.push_back(currPos);
}
// Not sure if this ever happens but just in case
else if (mpBuffer[currPos] == '\n' && mpBuffer[currPos + 1] == '\r')
{
currPos += 2;
mLinePositions.push_back(currPos);
}
// Unix line endings should only have a single line break character
else if (mpBuffer[currPos] == '\n' || mpBuffer[currPos] == '\r')
{
currPos++;
mLinePositions.push_back(currPos);
}
else
currPos++;
}
}
U32 Tokenizer::getLinePosition(const U32 pos, U32 lowIndex, S32 highIndex)
{
// If we have one or less lines then
// the result is easy
if (mLinePositions.size() <= 1)
return 0;
// Now that we know we have at least one position
// we can do a quick test against the last line
if (pos >= mLinePositions.last())
return mLinePositions.size() - 1;
// If this is the beginning of the search
// set a good starting point (the middle)
if (highIndex < 0)
highIndex = mLinePositions.size() - 1;
// Just in case bad values got handed in
if (lowIndex > highIndex)
lowIndex = highIndex;
// Compute our test index (middle)
U32 testIndex = (lowIndex + highIndex) / 2;
// Make sure that our test indices are valid
if (testIndex >= mLinePositions.size() ||
testIndex + 1 >= mLinePositions.size())
return mLinePositions.size() - 1;
// See if we are already at the right line
if (pos >= mLinePositions[testIndex] && pos < mLinePositions[testIndex + 1])
return testIndex;
if (pos < mLinePositions[testIndex])
highIndex = testIndex;
else
lowIndex = testIndex;
return getLinePosition(pos, lowIndex, highIndex);
}
U32 Tokenizer::getCurrentLine()
{
// Binary search for the line number whose
// position is equal to or lower than the
// current position
return getLinePosition(mStartPos);
}
U32 Tokenizer::getTokenLineOffset()
{
U32 lineNumber = getCurrentLine();
if (lineNumber >= mLinePositions.size())
return 0;
U32 linePosition = mLinePositions[lineNumber];
if (linePosition >= mStartPos)
return 0;
return mStartPos - linePosition;
}
bool Tokenizer::advanceToken(const bool crossLine, const bool assertAvail)
{
if (mTokenIsCurrent == true)
{
AssertFatal(mCurrTokenBuffer[0] != '\0', "No token, but marked as current?");
mTokenIsCurrent = false;
return true;
}
U32 currPosition = 0;
mCurrTokenBuffer[0] = '\0';
mTokenIsQuoted = false;
// Store the beginning of the previous advance
// and the beginning of the current advance
mStartPos = mCurrPos;
while (mCurrPos < mBufferSize)
{
char c = mpBuffer[mCurrPos];
bool cont = true;
if (mSingleTokens && dStrchr(mSingleTokens, c))
{
if (currPosition == 0)
{
mCurrTokenBuffer[currPosition++] = c;
mCurrPos++;
cont = false;
break;
}
else
{
// End of token
cont = false;
}
}
else
{
switch (c)
{
case ' ':
case '\t':
if (currPosition == 0)
{
// Token hasn't started yet...
mCurrPos++;
}
else
{
// End of token
mCurrPos++;
cont = false;
}
break;
case '\r':
case '\n':
if (crossLine == true)
{
// Windows line ending
if (mpBuffer[mCurrPos] == '\r' && mpBuffer[mCurrPos + 1] == '\n')
mCurrPos += 2;
// Not sure if this ever happens but just in case
else if (mpBuffer[mCurrPos] == '\n' && mpBuffer[mCurrPos + 1] == '\r')
mCurrPos += 2;
// Unix line endings should only have a single line break character
else
mCurrPos++;
}
else
{
cont = false;
break;
}
break;
default:
if (c == '\"' || c == '\'')
{
// Quoted token
U32 startLine = getCurrentLine();
mCurrPos++;
// Store the beginning of the token
mStartPos = mCurrPos;
while (mpBuffer[mCurrPos] != c)
{
AssertISV(mCurrPos < mBufferSize,
avar("End of file before quote closed. Quote started: (%s: %d)",
getFileName(), startLine));
AssertISV((mpBuffer[mCurrPos] != '\n' && mpBuffer[mCurrPos] != '\r'),
avar("End of line reached before end of quote. Quote started: (%s: %d)",
getFileName(), startLine));
mCurrTokenBuffer[currPosition++] = mpBuffer[mCurrPos++];
}
mTokenIsQuoted = true;
mCurrPos++;
cont = false;
}
else if (c == '/' && mpBuffer[mCurrPos+1] == '/')
{
// Line quote...
if (currPosition == 0)
{
// continue to end of line, then let crossLine determine on the next pass
while (mCurrPos < mBufferSize && (mpBuffer[mCurrPos] != '\n' && mpBuffer[mCurrPos] != '\r'))
mCurrPos++;
}
else
{
// This is the end of the token. Continue to EOL
while (mCurrPos < mBufferSize && (mpBuffer[mCurrPos] != '\n' && mpBuffer[mCurrPos] != '\r'))
mCurrPos++;
cont = false;
}
}
else
{
// If this is the first non-token character then store the
// beginning of the token
if (currPosition == 0)
mStartPos = mCurrPos;
mCurrTokenBuffer[currPosition++] = c;
mCurrPos++;
}
break;
}
}
if (cont == false)
break;
}
mCurrTokenBuffer[currPosition] = '\0';
if (assertAvail == true)
AssertISV(currPosition != 0, avar("Error parsing: %s at or around line: %d", getFileName(), getCurrentLine()));
if (mCurrPos == mBufferSize)
return false;
return true;
}
bool Tokenizer::regressToken(const bool crossLine)
{
if (mTokenIsCurrent == true)
{
AssertFatal(mCurrTokenBuffer[0] != '\0', "No token, but marked as current?");
mTokenIsCurrent = false;
return true;
}
U32 currPosition = 0;
mCurrTokenBuffer[0] = '\0';
mTokenIsQuoted = false;
// Store the beginning of the previous advance
// and the beginning of the current advance
mCurrPos = mStartPos;
// Back up to the first character of the previous token
mStartPos--;
while (mStartPos > 0)
{
char c = mpBuffer[mStartPos];
bool cont = true;
if (mSingleTokens && dStrchr(mSingleTokens, c))
{
if (currPosition == 0)
{
mCurrTokenBuffer[currPosition++] = c;
mStartPos--;
cont = false;
break;
}
else
{
// End of token
cont = false;
}
}
else
{
switch (c)
{
case ' ':
case '\t':
if (currPosition == 0)
{
// Token hasn't started yet...
mStartPos--;
}
else
{
// End of token
mStartPos--;
cont = false;
}
break;
case '\r':
case '\n':
if (crossLine == true && currPosition == 0)
{
// Windows line ending
if (mStartPos > 0 && mpBuffer[mStartPos] == '\r' && mpBuffer[mStartPos - 1] == '\n')
mStartPos -= 2;
// Not sure if this ever happens but just in case
else if (mStartPos > 0 && mpBuffer[mStartPos] == '\n' && mpBuffer[mStartPos - 1] == '\r')
mStartPos -= 2;
// Unix line endings should only have a single line break character
else
mStartPos--;
}
else
{
cont = false;
break;
}
break;
default:
if (c == '\"' || c == '\'')
{
// Quoted token
U32 endLine = getCurrentLine();
mStartPos--;
while (mpBuffer[mStartPos] != c)
{
AssertISV(mStartPos < 0,
avar("Beginning of file reached before finding begin quote. Quote ended: (%s: %d)",
getFileName(), endLine));
mCurrTokenBuffer[currPosition++] = mpBuffer[mStartPos--];
}
mTokenIsQuoted = true;
mStartPos--;
cont = false;
}
else if (c == '/' && mStartPos > 0 && mpBuffer[mStartPos - 1] == '/')
{
// Line quote...
// Clear out anything saved already
currPosition = 0;
mStartPos -= 2;
}
else
{
mCurrTokenBuffer[currPosition++] = c;
mStartPos--;
}
break;
}
}
if (cont == false)
break;
}
mCurrTokenBuffer[currPosition] = '\0';
// Reveres the token
for (U32 i = 0; i < currPosition / 2; i++)
{
char c = mCurrTokenBuffer[i];
mCurrTokenBuffer[i] = mCurrTokenBuffer[currPosition - i - 1];
mCurrTokenBuffer[currPosition - i - 1] = c;
}
mStartPos++;
if (mStartPos == mCurrPos)
return false;
return true;
}
bool Tokenizer::tokenAvailable()
{
// Note: this implies that when advanceToken(false) fails, it must cap the
// token buffer.
//
return mCurrTokenBuffer[0] != '\0';
}
const char* Tokenizer::getToken() const
{
return mCurrTokenBuffer;
}
const char* Tokenizer::getNextToken()
{
advanceToken(true);
return getToken();
}
bool Tokenizer::tokenICmp(const char* pCmp) const
{
return dStricmp(mCurrTokenBuffer, pCmp) == 0;
}
bool Tokenizer::findToken(U32 start, const char* pCmp)
{
// Move to the start
setCurrentPos(start);
// In case the first token is what we are looking for
if (tokenICmp(pCmp))
return true;
// Loop through the file and see if the token exists
while (advanceToken(true))
{
if (tokenICmp(pCmp))
return true;
}
return false;
}
bool Tokenizer::findToken(const char* pCmp)
{
return findToken(0, pCmp);
}
bool Tokenizer::endOfFile()
{
if (mCurrPos < mBufferSize)
return false;
else
return true;
}