Index: filters/kword/rtf/import/rtfimport_tokenizer.cpp =================================================================== --- filters/kword/rtf/import/rtfimport_tokenizer.cpp (revision 463235) +++ filters/kword/rtf/import/rtfimport_tokenizer.cpp (working copy) @@ -16,7 +16,7 @@ RTFTokenizer::RTFTokenizer() { - tokenText.resize( 4112 ); + tokenText.resize( 4113 ); fileBuffer.resize( 4096 ); infile = 0L; } @@ -30,8 +30,25 @@ fileBufferPtr = 0L; fileBufferEnd = 0L; infile = in; + type = RTFTokenizer::PlainText; } +int RTFTokenizer::nextChar() +{ + if ( fileBufferPtr == fileBufferEnd ) { + int n = infile->readBlock( fileBuffer.data(), fileBuffer.size() ); + fileBufferPtr = ( uchar* ) fileBuffer.data(); + fileBufferEnd = fileBufferPtr; + + if ( n <= 0 ) + return -1; + + fileBufferEnd = fileBufferPtr + n; + } + return *fileBufferPtr++; +} + + /** * Reads the next token. */ @@ -42,22 +59,15 @@ if (!infile) return; - do - { - if (fileBufferPtr == fileBufferEnd) - { - int n = infile->readBlock( fileBuffer.data(), fileBuffer.size() ); + do { + int n = nextChar(); - if (n <= 0) - { - // Return CloseGroup on EOF - ch = '}'; - break; - } - fileBufferPtr = (uchar *)fileBuffer.data(); - fileBufferEnd = (fileBufferPtr + n); - } - ch = *fileBufferPtr++; + if ( n <= 0 ) { + ch = '}'; + break; + } + + ch = n; } while (ch == '\n' || ch == '\r' && ch != 0); @@ -67,6 +77,7 @@ uchar *_text = (uchar *)text; + if (ch == '{') type = RTFTokenizer::OpenGroup; else if (ch == '}') @@ -75,20 +86,14 @@ { type = RTFTokenizer::ControlWord; - if (fileBufferPtr == fileBufferEnd) - { - int n = infile->readBlock( fileBuffer.data(), fileBuffer.size() ); + int n = nextChar(); - if (n <= 0) - { - // Return CloseGroup on EOF - type = RTFTokenizer::CloseGroup; - return; - } - fileBufferPtr = (uchar *)fileBuffer.data(); - fileBufferEnd = (fileBufferPtr + n); - } - ch = *fileBufferPtr++; + if ( n <= 0 ) { + // Return CloseGroup on EOF + type = RTFTokenizer::CloseGroup; + return; + } + ch = n; // Type is either control word or control symbol if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z')) @@ -96,64 +101,41 @@ int v = 0; // Read alphabetic string (command) - while ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z')) + while (_text < ( uchar* )tokenText.data()+tokenText.size()-3 && + ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z')) ) { *_text++ = ch; - if (fileBufferPtr == fileBufferEnd) - { - int n = infile->readBlock( fileBuffer.data(), fileBuffer.size() ); - - if (n <= 0) - { - ch = ' '; - break; - } - fileBufferPtr = (uchar *)fileBuffer.data(); - fileBufferEnd = (fileBufferPtr + n); - } - ch = *fileBufferPtr++; + int n = nextChar(); + if ( n <= 0 ) { + ch = ' '; + break; + } + ch = n; } // Read numeric parameter (param) bool isneg = (ch == '-'); - if (isneg) - { - if (fileBufferPtr == fileBufferEnd) - { - int n = infile->readBlock( fileBuffer.data(), fileBuffer.size() ); - - if (n <= 0) - { - // Return CloseGroup on EOF - type = RTFTokenizer::CloseGroup; - return; - } - fileBufferPtr = (uchar *)fileBuffer.data(); - fileBufferEnd = (fileBufferPtr + n); - } - ch = *fileBufferPtr++; + if (isneg) { + int n = nextChar(); + if ( n <= 0 ) { + type = RTFTokenizer::CloseGroup; + return; + } + ch = n; } - while (ch >= '0' && ch <= '9') - { + + while (ch >= '0' && ch <= '9') { v = (10 * v) + ch - '0'; hasParam = true; - if (fileBufferPtr == fileBufferEnd) - { - int n = infile->readBlock( fileBuffer.data(), fileBuffer.size() ); + int n = nextChar(); - if (n <= 0) - { - ch = ' '; - break; - } - fileBufferPtr = (uchar *)fileBuffer.data(); - fileBufferEnd = (fileBufferPtr + n); - } - ch = *fileBufferPtr++; - } + if ( n <= 0 ) + n = ' '; + ch = n; + } value = isneg ? -v : v; // If delimiter is a space, it's part of the control word @@ -163,7 +145,7 @@ } *_text = 0; // Just put an end of string for the test, it can then be over-written again - if ( !qstrncmp( tokenText.data()+1, "bin", 4 ) ) // Test the NULL too to avoid catching keywords starting with "bin" + if ( !memcmp( tokenText.data()+1, "bin", 4 ) ) { // We have \bin, so we need to read the bytes kdDebug(30515) << "Token:" << tokenText << endl; if (value > 0) @@ -173,26 +155,15 @@ binaryData.resize(value); for (int i=0; ireadBlock( fileBuffer.data(), fileBuffer.size() ); - - if (n <= 0) - { - kdError(30515) << "\\bin stream hit end of file." << endl; - type = RTFTokenizer::CloseGroup; - break; - } - fileBufferPtr = (uchar *)fileBuffer.data(); - fileBufferEnd = (fileBufferPtr + n); + int n = nextChar(); + if ( n <= 0 ) { + type = RTFTokenizer::CloseGroup; + break; } - binaryData[i]=*fileBufferPtr++; + + binaryData[i] = n; } } - else - { - kdError(30515) << "\\bin with negative value skipping" << endl; - } } } @@ -200,19 +171,13 @@ { type = RTFTokenizer::ControlWord; *_text++ = ch; - if (fileBufferPtr == fileBufferEnd) - { - int n = infile->readBlock( fileBuffer.data(), fileBuffer.size() ); - if (n <= 0) - { - // Return CloseGroup on EOF - type = RTFTokenizer::CloseGroup; - return; - } - fileBufferPtr = (uchar *)fileBuffer.data(); - fileBufferEnd = (fileBufferPtr + n); - } + int n = nextChar(); + + if ( n <= 0 ) { + type = RTFTokenizer::CloseGroup; + return; + } ch = *fileBufferPtr++; for(int i=0;i<2;i++) { @@ -220,22 +185,16 @@ value<<=4; value=value|((ch + ((ch & 16) ? 0 : 9)) & 0xf); - if (fileBufferPtr == fileBufferEnd) - { - int n = infile->readBlock( fileBuffer.data(), fileBuffer.size() ); + int n = nextChar(); - if (n <= 0) - { - ch = ' '; - break; - } - fileBufferPtr = (uchar *)fileBuffer.data(); - fileBufferEnd = (fileBufferPtr + n); - } + if ( n <= 0 ) { + ch = ' '; + break; + } ch = *fileBufferPtr++; } - --fileBufferPtr; - } + --fileBufferPtr; + } else { type = RTFTokenizer::ControlWord; @@ -248,14 +207,16 @@ // Everything until next backslash, opener or closer while ( ch != '\\' && ch != '{' && ch != '}' && ch != '\n' && - ch != '\r' && fileBufferPtr <= fileBufferEnd ) + ch != '\r') { *_text++ = ch; + if(fileBufferPtr >= fileBufferEnd) + break; ch = *fileBufferPtr++; } - - // Give back last char - --fileBufferPtr; + if(fileBufferPtr < fileBufferEnd) + --fileBufferPtr; // give back the last char } *_text++ = 0; + } Index: filters/kword/rtf/import/rtfimport_tokenizer.h =================================================================== --- filters/kword/rtf/import/rtfimport_tokenizer.h (revision 463235) +++ filters/kword/rtf/import/rtfimport_tokenizer.h (working copy) @@ -50,6 +50,8 @@ // tokenizer (private) data private: + int nextChar(); + QFile *infile; QByteArray fileBuffer; QCString tokenText;