Added .cpp and .h files under the sql/src subdirectory
git-svn-id: https://svn.code.sf.net/p/flaim/code/trunk@469 0109f412-320b-0410-ab79-c3e0c5ffbbe6
This commit is contained in:
435
sql/src/kycollat.cpp
Normal file
435
sql/src/kycollat.cpp
Normal file
@@ -0,0 +1,435 @@
|
||||
//------------------------------------------------------------------------------
|
||||
// Desc: Index collation routines
|
||||
//
|
||||
// Tabs: 3
|
||||
//
|
||||
// Copyright (c) 1991-2006 Novell, Inc. All Rights Reserved.
|
||||
//
|
||||
// This program is free software; you can redistribute it and/or
|
||||
// modify it under the terms of version 2 of the GNU General Public
|
||||
// License as published by the Free Software Foundation.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with this program; if not, contact Novell, Inc.
|
||||
//
|
||||
// To contact Novell about this file by physical or electronic mail,
|
||||
// you may find current contact information at www.novell.com
|
||||
//
|
||||
// $Id: kycollat.cpp 3115 2006-01-19 13:24:39 -0700 (Thu, 19 Jan 2006) dsanders $
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
#include "flaimsys.h"
|
||||
|
||||
FSTATIC RCODE KYFormatUTF8Text(
|
||||
IF_PosIStream * pIStream,
|
||||
FLMUINT uiFlags,
|
||||
FLMUINT uiCompareRules,
|
||||
F_DynaBuf * pDynaBuf);
|
||||
|
||||
/****************************************************************************
|
||||
Desc: Build a collated key value piece.
|
||||
****************************************************************************/
|
||||
RCODE KYCollateValue(
|
||||
FLMBYTE * pucDest,
|
||||
FLMUINT * puiDestLen,
|
||||
IF_PosIStream * pIStream,
|
||||
FLMUINT uiDataType,
|
||||
FLMUINT uiFlags,
|
||||
FLMUINT uiCompareRules,
|
||||
FLMUINT uiLimit,
|
||||
FLMUINT * puiCollationLen,
|
||||
FLMUINT * puiLuLen,
|
||||
FLMUINT uiLanguage,
|
||||
FLMBOOL bFirstSubstring,
|
||||
FLMBOOL bDataTruncated,
|
||||
FLMBOOL * pbDataTruncated,
|
||||
FLMBOOL * pbOriginalCharsLost)
|
||||
{
|
||||
RCODE rc = NE_SFLM_OK;
|
||||
FLMUINT uiDestLen;
|
||||
IF_BufferIStream * pBufferIStream = NULL;
|
||||
FLMUINT uiCharLimit;
|
||||
FLMUINT uiLength;
|
||||
FLMBYTE * pucTmpDest;
|
||||
FLMUINT uiBytesRead;
|
||||
FLMBOOL bHaveData = TRUE;
|
||||
FLMUNICODE uChar;
|
||||
FLMBYTE ucDynaBuf[ 64];
|
||||
F_DynaBuf dynaBuf( ucDynaBuf, sizeof( ucDynaBuf));
|
||||
|
||||
if (puiLuLen)
|
||||
{
|
||||
*puiLuLen = 0;
|
||||
}
|
||||
|
||||
if ((uiDestLen = *puiDestLen) == 0)
|
||||
{
|
||||
rc = RC_SET( NE_SFLM_KEY_OVERFLOW);
|
||||
goto Exit;
|
||||
}
|
||||
|
||||
if (uiDataType != SFLM_STRING_TYPE)
|
||||
{
|
||||
if( !pIStream->remainingSize())
|
||||
{
|
||||
bHaveData = FALSE;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
FLMUINT64 ui64SavePosition = pIStream->getCurrPosition();
|
||||
|
||||
if( RC_BAD( rc = f_readUTF8CharAsUnicode(
|
||||
pIStream, &uChar)))
|
||||
{
|
||||
if (rc == NE_SFLM_EOF_HIT)
|
||||
{
|
||||
bHaveData = FALSE;
|
||||
rc = NE_SFLM_OK;
|
||||
}
|
||||
else
|
||||
{
|
||||
goto Exit;
|
||||
}
|
||||
}
|
||||
|
||||
if( RC_BAD( rc = pIStream->positionTo( ui64SavePosition)))
|
||||
{
|
||||
goto Exit;
|
||||
}
|
||||
|
||||
// The text is expected to be 0-terminated UTF-8
|
||||
|
||||
if ((uiFlags & ICD_ESC_CHAR) ||
|
||||
(uiCompareRules &
|
||||
(FLM_COMP_COMPRESS_WHITESPACE |
|
||||
FLM_COMP_NO_WHITESPACE |
|
||||
FLM_COMP_NO_UNDERSCORES |
|
||||
FLM_COMP_NO_DASHES |
|
||||
FLM_COMP_WHITESPACE_AS_SPACE |
|
||||
FLM_COMP_IGNORE_LEADING_SPACE |
|
||||
FLM_COMP_IGNORE_TRAILING_SPACE)))
|
||||
{
|
||||
dynaBuf.truncateData( 0);
|
||||
if (RC_BAD( rc = KYFormatUTF8Text( pIStream,
|
||||
uiFlags, uiCompareRules, &dynaBuf)))
|
||||
{
|
||||
goto Exit;
|
||||
}
|
||||
|
||||
if( RC_BAD( rc = FlmAllocBufferIStream( &pBufferIStream)))
|
||||
{
|
||||
goto Exit;
|
||||
}
|
||||
|
||||
if (RC_BAD( rc = pBufferIStream->open(
|
||||
(const char *)dynaBuf.getBufferPtr(), dynaBuf.getDataLength())))
|
||||
{
|
||||
goto Exit;
|
||||
}
|
||||
pIStream = pBufferIStream;
|
||||
}
|
||||
|
||||
uiCharLimit = uiLimit ? uiLimit : ICD_DEFAULT_LIMIT;
|
||||
|
||||
if( (uiLanguage >= FLM_FIRST_DBCS_LANG ) &&
|
||||
(uiLanguage <= FLM_LAST_DBCS_LANG))
|
||||
{
|
||||
if( RC_BAD( rc = f_asiaUTF8ToColText( pIStream, pucDest, &uiDestLen,
|
||||
(uiCompareRules & FLM_COMP_CASE_INSENSITIVE)
|
||||
? TRUE
|
||||
: FALSE,
|
||||
puiCollationLen, puiLuLen,
|
||||
uiCharLimit, bFirstSubstring,
|
||||
bDataTruncated, pbDataTruncated)))
|
||||
{
|
||||
goto Exit;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if( RC_BAD( rc = flmUTF8ToColText( pIStream, pucDest, &uiDestLen,
|
||||
(uiCompareRules & FLM_COMP_CASE_INSENSITIVE)
|
||||
? TRUE
|
||||
: FALSE,
|
||||
puiCollationLen, puiLuLen,
|
||||
uiLanguage, uiCharLimit, bFirstSubstring,
|
||||
bDataTruncated,
|
||||
pbOriginalCharsLost, pbDataTruncated)))
|
||||
{
|
||||
goto Exit;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// TRICKY: uiDestLen could be set to zero if text and no value.
|
||||
|
||||
if (!bHaveData || !uiDestLen)
|
||||
{
|
||||
uiDestLen = 0;
|
||||
goto Exit;
|
||||
}
|
||||
|
||||
switch (uiDataType)
|
||||
{
|
||||
case SFLM_STRING_TYPE:
|
||||
break;
|
||||
|
||||
case SFLM_NUMBER_TYPE:
|
||||
{
|
||||
FLMBYTE ucTmpBuf [FLM_MAX_NUM_BUF_SIZE];
|
||||
|
||||
uiLength = (FLMUINT)pIStream->remainingSize();
|
||||
|
||||
flmAssert( uiLength <= sizeof( ucTmpBuf));
|
||||
|
||||
if (RC_BAD( rc = pIStream->read( ucTmpBuf, uiLength, &uiBytesRead)))
|
||||
{
|
||||
goto Exit;
|
||||
}
|
||||
flmAssert( uiBytesRead == uiLength);
|
||||
if (RC_BAD( rc = flmStorageNum2CollationNum( ucTmpBuf,
|
||||
uiBytesRead, pucDest, &uiDestLen)))
|
||||
{
|
||||
goto Exit;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case SFLM_BINARY_TYPE:
|
||||
{
|
||||
uiLength = (FLMUINT)pIStream->remainingSize();
|
||||
pucTmpDest = pucDest;
|
||||
|
||||
if (uiLength >= uiLimit)
|
||||
{
|
||||
uiLength = uiLimit;
|
||||
bDataTruncated = TRUE;
|
||||
}
|
||||
|
||||
// We don't want any single key piece to "pig out" more
|
||||
// than 256 bytes of the key
|
||||
|
||||
if (uiDestLen > 256)
|
||||
{
|
||||
uiDestLen = 256;
|
||||
}
|
||||
|
||||
if (uiLength > uiDestLen)
|
||||
{
|
||||
|
||||
// Compute length so will not overflow
|
||||
|
||||
uiLength = uiDestLen;
|
||||
bDataTruncated = TRUE;
|
||||
}
|
||||
else
|
||||
{
|
||||
uiDestLen = uiLength;
|
||||
}
|
||||
|
||||
// Store as is.
|
||||
|
||||
if (RC_BAD( rc = pIStream->read( pucTmpDest, uiDestLen, &uiBytesRead)))
|
||||
{
|
||||
goto Exit;
|
||||
}
|
||||
|
||||
if (bDataTruncated && pbDataTruncated)
|
||||
{
|
||||
*pbDataTruncated = TRUE;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
{
|
||||
rc = RC_SET( NE_SFLM_CANNOT_INDEX_DATA_TYPE);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
Exit:
|
||||
|
||||
if( pBufferIStream)
|
||||
{
|
||||
pBufferIStream->Release();
|
||||
}
|
||||
|
||||
*puiDestLen = uiDestLen;
|
||||
return( rc);
|
||||
}
|
||||
|
||||
/****************************************************************************
|
||||
Desc: Format text removing leading and trailing spaces. Treat
|
||||
underscores as spaces. As options, remove all spaces and dashes.
|
||||
Ret: NE_SFLM_OK always. WIll truncate so text will fill SFLM_MAX_KEY_SIZE.
|
||||
Allocate 8 more than SFLM_MAX_KEY_SIZE for psDestBuf.
|
||||
Visit: Pass in uiLimit and pass back a truncated flag when the
|
||||
string is truncated. This was not done because we will have
|
||||
to get the exact truncated count that is done in f_tocoll.cpp
|
||||
and that could introduce some bugs.
|
||||
****************************************************************************/
|
||||
FSTATIC RCODE KYFormatUTF8Text(
|
||||
IF_PosIStream * pIStream,
|
||||
FLMUINT uiFlags, // ICD flags
|
||||
FLMUINT uiCompareRules, // ICD compare rules
|
||||
F_DynaBuf * pDynaBuf)
|
||||
{
|
||||
RCODE rc = NE_SFLM_OK;
|
||||
FLMUINT uiFirstSpaceCharPos = FLM_MAX_UINT;
|
||||
FLMUNICODE uChar;
|
||||
FLMUINT uiSize;
|
||||
FLMUINT uiStrSize = 0;
|
||||
FLMBYTE * pucTmp;
|
||||
|
||||
if( !pIStream->remainingSize())
|
||||
{
|
||||
pDynaBuf->truncateData( 0);
|
||||
goto Exit;
|
||||
}
|
||||
|
||||
for (;;)
|
||||
{
|
||||
if (RC_BAD( rc = f_readUTF8CharAsUnicode( pIStream, &uChar)))
|
||||
{
|
||||
if (rc == NE_SFLM_EOF_HIT)
|
||||
{
|
||||
rc = NE_SFLM_OK;
|
||||
break;
|
||||
}
|
||||
goto Exit;
|
||||
}
|
||||
if ((uChar = f_convertChar( uChar, uiCompareRules)) == 0)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
if (uChar == ASCII_SPACE)
|
||||
{
|
||||
if (uiCompareRules &
|
||||
(FLM_COMP_COMPRESS_WHITESPACE |
|
||||
FLM_COMP_IGNORE_TRAILING_SPACE))
|
||||
{
|
||||
|
||||
// Remember the position of the first space.
|
||||
// When we come to the end of the spaces, we may reset
|
||||
// the size to compress out spaces if necessary. Or,
|
||||
// we may opt to get rid of all of them.
|
||||
|
||||
if (uiFirstSpaceCharPos == FLM_MAX_UINT)
|
||||
{
|
||||
uiFirstSpaceCharPos = uiStrSize;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
|
||||
// Once we hit a non-space character, we can turn off the
|
||||
// ignore leading spaces flag.
|
||||
|
||||
uiCompareRules &= (~(FLM_COMP_IGNORE_LEADING_SPACE));
|
||||
|
||||
// See if we need to compress spaces.
|
||||
|
||||
if (uiFirstSpaceCharPos != FLM_MAX_UINT)
|
||||
{
|
||||
|
||||
// Output exactly one ASCII_SPACE character if we are compressing
|
||||
// spaces. If we are not compressing spaces, then the only other
|
||||
// way uiFirstSpaceCharPos would have been set is if we were
|
||||
// ignoring trailing spaces. In that case, since the spaces
|
||||
// were not trailing spaces, we need to leave them as is.
|
||||
|
||||
if (uiCompareRules & FLM_COMP_COMPRESS_WHITESPACE)
|
||||
{
|
||||
|
||||
// A space will already have been encoded into the string.
|
||||
// Since we know a space takes exactly one byte in the UTF8
|
||||
// space, we can simply set our pointer one byte past where
|
||||
// the last non-space character was found.
|
||||
|
||||
uiStrSize = uiFirstSpaceCharPos + 1;
|
||||
pDynaBuf->truncateData( uiStrSize);
|
||||
}
|
||||
uiFirstSpaceCharPos = FLM_MAX_UINT;
|
||||
}
|
||||
|
||||
// If we are allowing escaped characters, backslash is treated
|
||||
// always as an escape character. Whatever follows the
|
||||
// backslash is the character we need to process.
|
||||
|
||||
if (uChar == ASCII_BACKSLASH && (uiFlags & ICD_ESC_CHAR))
|
||||
{
|
||||
if (RC_BAD( rc = f_readUTF8CharAsUnicode( pIStream, &uChar)))
|
||||
{
|
||||
if (rc == NE_SFLM_EOF_HIT)
|
||||
{
|
||||
rc = NE_SFLM_OK;
|
||||
}
|
||||
else
|
||||
{
|
||||
goto Exit;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Output the character - need at most three bytes
|
||||
|
||||
if (RC_BAD( rc = pDynaBuf->allocSpace( 3, (void **)&pucTmp)))
|
||||
{
|
||||
goto Exit;
|
||||
}
|
||||
uiSize = 3;
|
||||
if (RC_BAD( rc = f_uni2UTF8( uChar, pucTmp, &uiSize)))
|
||||
{
|
||||
goto Exit;
|
||||
}
|
||||
uiStrSize += uiSize;
|
||||
pDynaBuf->truncateData( uiStrSize);
|
||||
}
|
||||
|
||||
// If uiFirstSpaceCharPos != FLM_MAX_UINT, it means that all of the
|
||||
// characters at the end of the string were spaces. If we
|
||||
// are ignoring trailing spaces, we need to truncate the string so
|
||||
// they will be ignored. Otherwise, we need to compress them into
|
||||
// a single space.
|
||||
|
||||
if (uiFirstSpaceCharPos != FLM_MAX_UINT)
|
||||
{
|
||||
if (uiCompareRules & FLM_COMP_IGNORE_TRAILING_SPACE)
|
||||
{
|
||||
uiStrSize = uiFirstSpaceCharPos;
|
||||
}
|
||||
else
|
||||
{
|
||||
flmAssert( uiCompareRules & FLM_COMP_COMPRESS_WHITESPACE);
|
||||
|
||||
// A space will already have been encoded into the string.
|
||||
// Since we know a space takes exactly one byte in the UTF8
|
||||
// space, we can simply set our pointer one byte past where
|
||||
// the last non-space character was found.
|
||||
|
||||
uiStrSize = uiFirstSpaceCharPos + 1;
|
||||
}
|
||||
pDynaBuf->truncateData( uiStrSize);
|
||||
}
|
||||
|
||||
// Terminate the UTF-8 string
|
||||
|
||||
if (RC_BAD( rc = pDynaBuf->appendByte( 0)))
|
||||
{
|
||||
goto Exit;
|
||||
}
|
||||
|
||||
Exit:
|
||||
|
||||
return( rc);
|
||||
}
|
||||
Reference in New Issue
Block a user