Added .cpp and .h files under the sql/src subdirectory

git-svn-id: https://svn.code.sf.net/p/flaim/code/trunk@469 0109f412-320b-0410-ab79-c3e0c5ffbbe6
This commit is contained in:
dsandersoremutah
2006-05-26 23:17:49 +00:00
parent dc6cd8b9cb
commit 021073907f
82 changed files with 97516 additions and 0 deletions

425
sql/src/kyeword.cpp Normal file
View File

@@ -0,0 +1,425 @@
//------------------------------------------------------------------------------
// Desc: This file contains the code to parse out individual words and
// substrings in a text string.
//
// Tabs: 3
//
// Copyright (c) 1990-2000, 2002-2006 Novell, Inc. All Rights Reserved.
//
// This program is free software; you can redistribute it and/or
// modify it under the terms of version 2 of the GNU General Public
// License as published by the Free Software Foundation.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, contact Novell, Inc.
//
// To contact Novell about this file by physical or electronic mail,
// you may find current contact information at www.novell.com
//
// $Id: kyeword.cpp 3115 2006-01-19 13:24:39 -0700 (Thu, 19 Jan 2006) dsanders $
//------------------------------------------------------------------------------
#include "flaimsys.h"
FSTATIC RCODE flmGetCharacter(
IF_PosIStream * pIStream,
FLMUINT * puiCompareRules,
FLMUINT16 * pui16WPValue,
FLMUNICODE * puUniValue);
FSTATIC RCODE flmTextGetCharType(
IF_PosIStream * pIStream,
FLMUINT * puiCompareRules,
FLMUNICODE * puUniValue,
FLMUINT * puiType);
/*****************************************************************************
Desc:
*****************************************************************************/
FINLINE FLMUINT flmCharTypeAnsi7(
FLMUINT16 ui16Char)
{
if( (ui16Char >= ASCII_LOWER_A && ui16Char <= ASCII_LOWER_Z) ||
(ui16Char >= ASCII_UPPER_A && ui16Char <= ASCII_UPPER_Z) ||
(ui16Char >= ASCII_ZERO && ui16Char <= ASCII_NINE))
{
return SDWD_CHR;
}
if( ui16Char == 0x27)
{
return WDJN_CHR;
}
if( ui16Char <= 0x2B)
{
return DELI_CHR;
}
if( ui16Char == ASCII_COMMA ||
ui16Char == ASCII_DASH ||
ui16Char == ASCII_DOT ||
ui16Char == ASCII_SLASH ||
ui16Char == ASCII_COLON ||
ui16Char == ASCII_AT ||
ui16Char == ASCII_BACKSLASH ||
ui16Char == ASCII_UNDERSCORE)
{
return WDJN_CHR;
}
return DELI_CHR;
}
/*****************************************************************************
Desc: Return the next WP or unicode character value.
Return: Number of bytes formatted to return the character value.
*****************************************************************************/
FSTATIC RCODE flmGetCharacter(
IF_PosIStream * pIStream,
FLMUINT * puiCompareRules,
FLMUINT16 * pui16WPValue,
FLMUNICODE * puUniValue)
{
RCODE rc = NE_SFLM_OK;
FLMUNICODE uChar = 0;
FLMUINT64 ui64AfterLastSpacePos = 0;
FLMBOOL bLastCharWasSpace = FALSE;
FLMUINT uiCompareRules = *puiCompareRules;
for( ;;)
{
if (RC_BAD( rc = f_readUTF8CharAsUnicode( pIStream, &uChar)))
{
if (rc != NE_SFLM_EOF_HIT)
{
goto Exit;
}
rc = NE_SFLM_OK;
if (bLastCharWasSpace &&
!(uiCompareRules & FLM_COMP_IGNORE_TRAILING_SPACE))
{
// bLastCharWasSpace flag can only be TRUE if either
// FLM_COMP_IGNORE_TRAILING_SPACE is set or
// FLM_COMP_COMPRESS_WHITESPACE is set.
flmAssert( uiCompareRules & FLM_COMP_COMPRESS_WHITESPACE);
uChar = ASCII_SPACE;
}
else
{
uChar = 0;
}
break;
}
if ((uChar = f_convertChar( uChar, uiCompareRules)) == 0)
{
continue;
}
if (uChar == ASCII_SPACE)
{
if (uiCompareRules & FLM_COMP_COMPRESS_WHITESPACE)
{
bLastCharWasSpace = TRUE;
ui64AfterLastSpacePos = pIStream->getCurrPosition();
}
else if (uiCompareRules & FLM_COMP_IGNORE_TRAILING_SPACE)
{
// If the ignore trailing space flag is set, but the compress
// space flag is not set, remember the position of the
// first space character. If we hit a non-space character,
// we will reposition to after this space character.
if (!bLastCharWasSpace)
{
bLastCharWasSpace = TRUE;
ui64AfterLastSpacePos = pIStream->getCurrPosition();
}
}
else
{
break;
}
}
else
{
// Disable the ignore leading space flag, because we are now
// past all leading space, and we don't want spaces ignored
// now on account of that flag.
uiCompareRules &= (~(FLM_COMP_IGNORE_LEADING_SPACE));
if (bLastCharWasSpace)
{
// Position to after the last space
if (RC_BAD( rc = pIStream->positionTo( ui64AfterLastSpacePos)))
{
goto Exit;
}
uChar = ASCII_SPACE;
bLastCharWasSpace = FALSE;
}
break;
}
}
if (pui16WPValue)
{
if (!f_unicodeToWP( uChar, pui16WPValue))
{
*pui16WPValue = 0;
}
}
if (puUniValue)
{
*puUniValue = uChar;
}
Exit:
*puiCompareRules = uiCompareRules;
return( rc);
}
/****************************************************************************
Desc: Substring-ize the string in a node. Normalize spaces and hyphens if
told to. Example: ABC DEF
ABC DEF
BC DEF
C DEF
DEF
****************************************************************************/
RCODE KYSubstringParse(
IF_PosIStream * pIStream,
FLMUINT * puiCompareRules, // [in/out] comparison rules
FLMUINT uiLimitParm, // [in] Max characters
FLMBYTE * pucSubstrBuf, // [out] buffer to fill
FLMUINT * puiSubstrBytes, // [out] returns length
FLMUINT * puiSubstrChars)
{
RCODE rc = NE_SFLM_OK;
FLMUINT uiDestOffset = 0;
FLMUINT uiDestSize = *puiSubstrBytes;
FLMUINT uiLimit = uiLimitParm ? uiLimitParm : ICD_DEFAULT_SUBSTRING_LIMIT;
FLMUINT uiCharCnt = 0;
FLMUINT uiSize;
FLMBOOL bFirstCharacter = TRUE;
FLMUINT64 ui64SavePosition = pIStream->getCurrPosition();
// The limit must return one more than requested in order
// for the text to collation routine to set the truncated flag.
uiLimit++;
while (uiLimit--)
{
FLMUNICODE uChar;
if( RC_BAD( rc = flmGetCharacter( pIStream, puiCompareRules, NULL, &uChar)))
{
goto Exit;
}
if (!uChar)
{
break;
}
uiCharCnt++;
uiSize = uiDestSize - uiDestOffset;
if (RC_BAD( rc = f_uni2UTF8( uChar, &pucSubstrBuf[ uiDestOffset], &uiSize)))
{
goto Exit;
}
uiDestOffset += uiSize;
// If on the first word, position to start on next character
// for the next call.
if (bFirstCharacter)
{
bFirstCharacter = FALSE;
// First character - save position so we can restore it
// upon leaving the routine.
ui64SavePosition = pIStream->getCurrPosition();
}
}
if (uiDestOffset)
{
pucSubstrBuf[ uiDestOffset++] = 0;
}
*puiSubstrBytes = (FLMUINT)uiDestOffset;
*puiSubstrChars = uiCharCnt;
// Restore position of stream to first character after the first
// character we found - to ready for next call.
if (RC_BAD( rc = pIStream->positionTo( ui64SavePosition)))
{
goto Exit;
}
Exit:
return( rc);
}
/****************************************************************************
Desc:
****************************************************************************/
RCODE KYEachWordParse(
IF_PosIStream * pIStream,
FLMUINT * puiCompareRules,
FLMUINT uiLimit, // [in] Max characters
FLMBYTE * pucWordBuf, // [out] Buffer of at least SFLM_MAX_KEY_SIZE
FLMUINT * puiWordLen)
{
RCODE rc = NE_SFLM_OK;
FLMBOOL bSkippingDelim = TRUE;
FLMUINT uiWordLen = 0;
FLMUINT uiWordBufSize = *puiWordLen;
FLMUNICODE uChar;
FLMUINT uiType = 0;
FLMUINT uiSize;
if (!uiLimit)
{
uiLimit = ICD_DEFAULT_SUBSTRING_LIMIT;
}
while (uiLimit)
{
if (RC_BAD( rc = flmTextGetCharType( pIStream, puiCompareRules, &uChar, &uiType)))
{
goto Exit;
}
if (!uChar)
{
break;
}
// Determine how to handle what we got.
if (bSkippingDelim)
{
// If we were skipping delimiters, and we run into a non-delimiter
// character, set the bSkippingDelim flag to FALSE to indicate the
// beginning of a word.
if (uiType & SDWD_CHR)
{
bSkippingDelim = FALSE;
uiLimit--;
uiSize = uiWordBufSize - uiWordLen;
if (RC_BAD( rc = f_uni2UTF8( uChar, &pucWordBuf [uiWordLen],
&uiSize)))
{
goto Exit;
}
uiWordLen += uiSize;
}
}
else
{
// If we were NOT skipping delimiters, and we run into a delimiter
// output the word.
if (uiType & (DELI_CHR | WDJN_CHR))
{
break;
}
uiSize = uiWordBufSize - uiWordLen;
if (RC_BAD( rc = f_uni2UTF8( uChar, &pucWordBuf [uiWordLen],
&uiSize)))
{
goto Exit;
}
uiWordLen += uiSize;
}
}
// Return the word, if any
if (uiWordLen)
{
pucWordBuf [uiWordLen++] = 0;
}
*puiWordLen = uiWordLen;
Exit:
return( rc);
}
/*****************************************************************************
Desc: Return the next WP or unicode character value and parsing type.
*****************************************************************************/
FSTATIC RCODE flmTextGetCharType(
IF_PosIStream * pIStream,
FLMUINT * puiCompareRules,
FLMUNICODE * puUniValue, // [out] Unicode value
FLMUINT * puiType // Char attribute type.
)
{
RCODE rc = NE_SFLM_OK;
FLMUINT16 ui16WPValue;
FLMUINT uiCharSet;
// We add on compress white space flag because we really want to ignore
// spaces anyway - we are trying to get the "words" from this stream.
if( RC_BAD( rc = flmGetCharacter( pIStream, puiCompareRules,
&ui16WPValue, puUniValue)))
{
goto Exit;
}
if (ui16WPValue)
{
if (ui16WPValue < 0x080)
{
*puiType = flmCharTypeAnsi7( ui16WPValue);
goto Exit;
}
uiCharSet = (FLMUINT)(ui16WPValue >> 8);
if (uiCharSet == 1 || uiCharSet == 2 ||
(uiCharSet >= 8 && uiCharSet <= 11))
{
*puiType = SDWD_CHR;
goto Exit;
}
*puiType = DELI_CHR;
}
else
{
// For now all unmapped unicode characters are treated
// as delimeters
*puiType = DELI_CHR;
}
Exit:
return( rc);
}