Files
mars-flaim/sql/src/kycollat.cpp
dsandersoremutah ffe3cb6975 Changed Id property
git-svn-id: https://svn.code.sf.net/p/flaim/code/trunk@482 0109f412-320b-0410-ab79-c3e0c5ffbbe6
2006-05-30 22:00:45 +00:00

436 lines
10 KiB
C++

//------------------------------------------------------------------------------
// Desc: Index collation routines
//
// Tabs: 3
//
// Copyright (c) 1991-2006 Novell, Inc. All Rights Reserved.
//
// This program is free software; you can redistribute it and/or
// modify it under the terms of version 2 of the GNU General Public
// License as published by the Free Software Foundation.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, contact Novell, Inc.
//
// To contact Novell about this file by physical or electronic mail,
// you may find current contact information at www.novell.com
//
// $Id$
//------------------------------------------------------------------------------
#include "flaimsys.h"
FSTATIC RCODE KYFormatUTF8Text(
IF_PosIStream * pIStream,
FLMUINT uiFlags,
FLMUINT uiCompareRules,
F_DynaBuf * pDynaBuf);
/****************************************************************************
Desc: Build a collated key value piece.
****************************************************************************/
RCODE KYCollateValue(
FLMBYTE * pucDest,
FLMUINT * puiDestLen,
IF_PosIStream * pIStream,
FLMUINT uiDataType,
FLMUINT uiFlags,
FLMUINT uiCompareRules,
FLMUINT uiLimit,
FLMUINT * puiCollationLen,
FLMUINT * puiLuLen,
FLMUINT uiLanguage,
FLMBOOL bFirstSubstring,
FLMBOOL bDataTruncated,
FLMBOOL * pbDataTruncated,
FLMBOOL * pbOriginalCharsLost)
{
RCODE rc = NE_SFLM_OK;
FLMUINT uiDestLen;
IF_BufferIStream * pBufferIStream = NULL;
FLMUINT uiCharLimit;
FLMUINT uiLength;
FLMBYTE * pucTmpDest;
FLMUINT uiBytesRead;
FLMBOOL bHaveData = TRUE;
FLMUNICODE uChar;
FLMBYTE ucDynaBuf[ 64];
F_DynaBuf dynaBuf( ucDynaBuf, sizeof( ucDynaBuf));
if (puiLuLen)
{
*puiLuLen = 0;
}
if ((uiDestLen = *puiDestLen) == 0)
{
rc = RC_SET( NE_SFLM_KEY_OVERFLOW);
goto Exit;
}
if (uiDataType != SFLM_STRING_TYPE)
{
if( !pIStream->remainingSize())
{
bHaveData = FALSE;
}
}
else
{
FLMUINT64 ui64SavePosition = pIStream->getCurrPosition();
if( RC_BAD( rc = f_readUTF8CharAsUnicode(
pIStream, &uChar)))
{
if (rc == NE_SFLM_EOF_HIT)
{
bHaveData = FALSE;
rc = NE_SFLM_OK;
}
else
{
goto Exit;
}
}
if( RC_BAD( rc = pIStream->positionTo( ui64SavePosition)))
{
goto Exit;
}
// The text is expected to be 0-terminated UTF-8
if ((uiFlags & ICD_ESC_CHAR) ||
(uiCompareRules &
(FLM_COMP_COMPRESS_WHITESPACE |
FLM_COMP_NO_WHITESPACE |
FLM_COMP_NO_UNDERSCORES |
FLM_COMP_NO_DASHES |
FLM_COMP_WHITESPACE_AS_SPACE |
FLM_COMP_IGNORE_LEADING_SPACE |
FLM_COMP_IGNORE_TRAILING_SPACE)))
{
dynaBuf.truncateData( 0);
if (RC_BAD( rc = KYFormatUTF8Text( pIStream,
uiFlags, uiCompareRules, &dynaBuf)))
{
goto Exit;
}
if( RC_BAD( rc = FlmAllocBufferIStream( &pBufferIStream)))
{
goto Exit;
}
if (RC_BAD( rc = pBufferIStream->open(
(const char *)dynaBuf.getBufferPtr(), dynaBuf.getDataLength())))
{
goto Exit;
}
pIStream = pBufferIStream;
}
uiCharLimit = uiLimit ? uiLimit : ICD_DEFAULT_LIMIT;
if( (uiLanguage >= FLM_FIRST_DBCS_LANG ) &&
(uiLanguage <= FLM_LAST_DBCS_LANG))
{
if( RC_BAD( rc = f_asiaUTF8ToColText( pIStream, pucDest, &uiDestLen,
(uiCompareRules & FLM_COMP_CASE_INSENSITIVE)
? TRUE
: FALSE,
puiCollationLen, puiLuLen,
uiCharLimit, bFirstSubstring,
bDataTruncated, pbDataTruncated)))
{
goto Exit;
}
}
else
{
if( RC_BAD( rc = flmUTF8ToColText( pIStream, pucDest, &uiDestLen,
(uiCompareRules & FLM_COMP_CASE_INSENSITIVE)
? TRUE
: FALSE,
puiCollationLen, puiLuLen,
uiLanguage, uiCharLimit, bFirstSubstring,
bDataTruncated,
pbOriginalCharsLost, pbDataTruncated)))
{
goto Exit;
}
}
}
// TRICKY: uiDestLen could be set to zero if text and no value.
if (!bHaveData || !uiDestLen)
{
uiDestLen = 0;
goto Exit;
}
switch (uiDataType)
{
case SFLM_STRING_TYPE:
break;
case SFLM_NUMBER_TYPE:
{
FLMBYTE ucTmpBuf [FLM_MAX_NUM_BUF_SIZE];
uiLength = (FLMUINT)pIStream->remainingSize();
flmAssert( uiLength <= sizeof( ucTmpBuf));
if (RC_BAD( rc = pIStream->read( ucTmpBuf, uiLength, &uiBytesRead)))
{
goto Exit;
}
flmAssert( uiBytesRead == uiLength);
if (RC_BAD( rc = flmStorageNum2CollationNum( ucTmpBuf,
uiBytesRead, pucDest, &uiDestLen)))
{
goto Exit;
}
break;
}
case SFLM_BINARY_TYPE:
{
uiLength = (FLMUINT)pIStream->remainingSize();
pucTmpDest = pucDest;
if (uiLength >= uiLimit)
{
uiLength = uiLimit;
bDataTruncated = TRUE;
}
// We don't want any single key piece to "pig out" more
// than 256 bytes of the key
if (uiDestLen > 256)
{
uiDestLen = 256;
}
if (uiLength > uiDestLen)
{
// Compute length so will not overflow
uiLength = uiDestLen;
bDataTruncated = TRUE;
}
else
{
uiDestLen = uiLength;
}
// Store as is.
if (RC_BAD( rc = pIStream->read( pucTmpDest, uiDestLen, &uiBytesRead)))
{
goto Exit;
}
if (bDataTruncated && pbDataTruncated)
{
*pbDataTruncated = TRUE;
}
break;
}
default:
{
rc = RC_SET( NE_SFLM_CANNOT_INDEX_DATA_TYPE);
break;
}
}
Exit:
if( pBufferIStream)
{
pBufferIStream->Release();
}
*puiDestLen = uiDestLen;
return( rc);
}
/****************************************************************************
Desc: Format text removing leading and trailing spaces. Treat
underscores as spaces. As options, remove all spaces and dashes.
Ret: NE_SFLM_OK always. WIll truncate so text will fill SFLM_MAX_KEY_SIZE.
Allocate 8 more than SFLM_MAX_KEY_SIZE for psDestBuf.
Visit: Pass in uiLimit and pass back a truncated flag when the
string is truncated. This was not done because we will have
to get the exact truncated count that is done in f_tocoll.cpp
and that could introduce some bugs.
****************************************************************************/
FSTATIC RCODE KYFormatUTF8Text(
IF_PosIStream * pIStream,
FLMUINT uiFlags, // ICD flags
FLMUINT uiCompareRules, // ICD compare rules
F_DynaBuf * pDynaBuf)
{
RCODE rc = NE_SFLM_OK;
FLMUINT uiFirstSpaceCharPos = FLM_MAX_UINT;
FLMUNICODE uChar;
FLMUINT uiSize;
FLMUINT uiStrSize = 0;
FLMBYTE * pucTmp;
if( !pIStream->remainingSize())
{
pDynaBuf->truncateData( 0);
goto Exit;
}
for (;;)
{
if (RC_BAD( rc = f_readUTF8CharAsUnicode( pIStream, &uChar)))
{
if (rc == NE_SFLM_EOF_HIT)
{
rc = NE_SFLM_OK;
break;
}
goto Exit;
}
if ((uChar = f_convertChar( uChar, uiCompareRules)) == 0)
{
continue;
}
if (uChar == ASCII_SPACE)
{
if (uiCompareRules &
(FLM_COMP_COMPRESS_WHITESPACE |
FLM_COMP_IGNORE_TRAILING_SPACE))
{
// Remember the position of the first space.
// When we come to the end of the spaces, we may reset
// the size to compress out spaces if necessary. Or,
// we may opt to get rid of all of them.
if (uiFirstSpaceCharPos == FLM_MAX_UINT)
{
uiFirstSpaceCharPos = uiStrSize;
}
}
}
else
{
// Once we hit a non-space character, we can turn off the
// ignore leading spaces flag.
uiCompareRules &= (~(FLM_COMP_IGNORE_LEADING_SPACE));
// See if we need to compress spaces.
if (uiFirstSpaceCharPos != FLM_MAX_UINT)
{
// Output exactly one ASCII_SPACE character if we are compressing
// spaces. If we are not compressing spaces, then the only other
// way uiFirstSpaceCharPos would have been set is if we were
// ignoring trailing spaces. In that case, since the spaces
// were not trailing spaces, we need to leave them as is.
if (uiCompareRules & FLM_COMP_COMPRESS_WHITESPACE)
{
// A space will already have been encoded into the string.
// Since we know a space takes exactly one byte in the UTF8
// space, we can simply set our pointer one byte past where
// the last non-space character was found.
uiStrSize = uiFirstSpaceCharPos + 1;
pDynaBuf->truncateData( uiStrSize);
}
uiFirstSpaceCharPos = FLM_MAX_UINT;
}
// If we are allowing escaped characters, backslash is treated
// always as an escape character. Whatever follows the
// backslash is the character we need to process.
if (uChar == ASCII_BACKSLASH && (uiFlags & ICD_ESC_CHAR))
{
if (RC_BAD( rc = f_readUTF8CharAsUnicode( pIStream, &uChar)))
{
if (rc == NE_SFLM_EOF_HIT)
{
rc = NE_SFLM_OK;
}
else
{
goto Exit;
}
}
}
}
// Output the character - need at most three bytes
if (RC_BAD( rc = pDynaBuf->allocSpace( 3, (void **)&pucTmp)))
{
goto Exit;
}
uiSize = 3;
if (RC_BAD( rc = f_uni2UTF8( uChar, pucTmp, &uiSize)))
{
goto Exit;
}
uiStrSize += uiSize;
pDynaBuf->truncateData( uiStrSize);
}
// If uiFirstSpaceCharPos != FLM_MAX_UINT, it means that all of the
// characters at the end of the string were spaces. If we
// are ignoring trailing spaces, we need to truncate the string so
// they will be ignored. Otherwise, we need to compress them into
// a single space.
if (uiFirstSpaceCharPos != FLM_MAX_UINT)
{
if (uiCompareRules & FLM_COMP_IGNORE_TRAILING_SPACE)
{
uiStrSize = uiFirstSpaceCharPos;
}
else
{
flmAssert( uiCompareRules & FLM_COMP_COMPRESS_WHITESPACE);
// A space will already have been encoded into the string.
// Since we know a space takes exactly one byte in the UTF8
// space, we can simply set our pointer one byte past where
// the last non-space character was found.
uiStrSize = uiFirstSpaceCharPos + 1;
}
pDynaBuf->truncateData( uiStrSize);
}
// Terminate the UTF-8 string
if (RC_BAD( rc = pDynaBuf->appendByte( 0)))
{
goto Exit;
}
Exit:
return( rc);
}