Renamed version4 to flaim and version5 to xflaim
git-svn-id: https://svn.code.sf.net/p/flaim/code/trunk@7 0109f412-320b-0410-ab79-c3e0c5ffbbe6
This commit is contained in:
772
flaim/src/f_tocoll.cpp
Normal file
772
flaim/src/f_tocoll.cpp
Normal file
@@ -0,0 +1,772 @@
|
||||
//-------------------------------------------------------------------------
|
||||
// Desc: Collation routines for indexing.
|
||||
// Tabs: 3
|
||||
//
|
||||
// Copyright (c) 1991-2001,2003,2005-2006 Novell, Inc. All Rights Reserved.
|
||||
//
|
||||
// This program is free software; you can redistribute it and/or
|
||||
// modify it under the terms of version 2 of the GNU General Public
|
||||
// License as published by the Free Software Foundation.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with this program; if not, contact Novell, Inc.
|
||||
//
|
||||
// To contact Novell about this file by physical or electronic mail,
|
||||
// you may find current contact information at www.novell.com
|
||||
//
|
||||
// $Id: f_tocoll.cpp 12245 2006-01-19 14:29:51 -0700 (Thu, 19 Jan 2006) dsanders $
|
||||
//-------------------------------------------------------------------------
|
||||
|
||||
#include "flaimsys.h"
|
||||
|
||||
// Returns TRUE if upper case, FALSE if lower case.
|
||||
|
||||
FINLINE FLMBOOL charIsUpper(
|
||||
FLMUINT16 ui16Char)
|
||||
{
|
||||
return( (FLMBOOL)((ui16Char < 0x7F)
|
||||
? (FLMBOOL)((ui16Char >= ASCII_LOWER_A &&
|
||||
ui16Char <= ASCII_LOWER_Z)
|
||||
? (FLMBOOL)FALSE
|
||||
: (FLMBOOL)TRUE)
|
||||
: fwpIsUpper( ui16Char)));
|
||||
}
|
||||
|
||||
extern FLMBYTE fwp_dia60Tbl[]; // Diacritic conversions
|
||||
extern FLMBYTE fwp_alefSubColTbl[]; // Arabic sub collation table.
|
||||
extern FLMBYTE fwp_ar2BitTbl[]; // Arabic 2 bit table.
|
||||
|
||||
/****************************************************************************
|
||||
Desc: Convert a text string to a collated string.
|
||||
If FERR_CONV_DEST_OVERFLOW is returned the string is truncated as
|
||||
best as it can be. The caller must decide to return the error up
|
||||
or deal with the truncation.
|
||||
Return: RCODE = SUCCESS or FERR_CONV_DEST_OVERFLOW
|
||||
VISIT: If the string is EXACTLY the length of the truncation
|
||||
length then it should, but doesn't, set the truncation flag.
|
||||
The code didn't match the design intent. Fix next major
|
||||
version.
|
||||
****************************************************************************/
|
||||
RCODE FTextToColStr(
|
||||
const FLMBYTE * pucStr, // Points to the internal TEXT string
|
||||
FLMUINT uiStrLen, // Length of the internal TEXT string
|
||||
FLMBYTE * pucCollatedStr, // Returns collated string
|
||||
FLMUINT * puiCollatedStrLen, // Returns total collated string length
|
||||
// Input is maximum bytes in buffer
|
||||
FLMUINT uiUppercaseFlag, // Set if to convert to uppercase
|
||||
FLMUINT * puiCollationLen, // Returns the collation bytes length
|
||||
FLMUINT * puiCaseLen, // Returns length of case bytes
|
||||
FLMUINT uiLanguage, // Language
|
||||
FLMUINT uiCharLimit, // Max number of characters in this key piece
|
||||
FLMBOOL bFirstSubstring, // TRUE is this is the first substring key
|
||||
FLMBOOL * pbOriginalCharsLost,
|
||||
FLMBOOL * pbDataTruncated)
|
||||
{
|
||||
RCODE rc = FERR_OK;
|
||||
const FLMBYTE * pucStrEnd; // Points to the end of the string
|
||||
FLMUINT16 ui16Base; // Value of the base character
|
||||
FLMUINT16 ui16SubColVal; // Sub-collated value (diacritic)
|
||||
FLMUINT uiObjLength = 0;
|
||||
FLMUINT uiLength; // Temporary variable for length
|
||||
FLMUINT uiTargetColLen = *puiCollatedStrLen - 8; // 4=ovhd,4=worse char
|
||||
FLMUINT uiObjType;
|
||||
FLMBOOL bDataTruncated = FALSE;
|
||||
|
||||
// Need to increase the buffer sizes to not overflow.
|
||||
// Characaters without COLL values will take up 3 bytes in
|
||||
// the ucSubColBuf[] and easily overflow the buffer.
|
||||
// Hard coded the values so as to minimize changes.
|
||||
|
||||
FLMBYTE ucSubColBuf[ MAX_SUBCOL_BUF + 301]; // Holds sub-collated values(diac)
|
||||
FLMBYTE ucCaseBits[ MAX_LOWUP_BUF + 81]; // Holds case bits
|
||||
FLMUINT16 ui16WpChr; // Current WP character
|
||||
FLMUNICODE unichr = 0; // Current unconverted Unicode character
|
||||
FLMUINT16 ui16WpChr2; // 2nd character if any; default 0 for US lang
|
||||
FLMUINT uiColLen; // Return value of collated length
|
||||
FLMUINT uiSubColBitPos; // Sub-collation bit position
|
||||
FLMUINT uiCaseBitPos; // Case bit position
|
||||
FLMUINT uiFlags; // Clear all bit flags
|
||||
FLMBOOL bHebrewArabic = FALSE; // Set if language is hebrew, arabic, farsi
|
||||
FLMBOOL bTwoIntoOne;
|
||||
|
||||
uiColLen = 0;
|
||||
uiSubColBitPos = 0;
|
||||
uiCaseBitPos = 0;
|
||||
uiFlags = 0;
|
||||
ui16WpChr2 = 0;
|
||||
|
||||
// Don't allow any key component to exceed 256 bytes regardless of the
|
||||
// user-specified character or byte limit. The goal is to prevent
|
||||
// any single key piece from consuming too much of the key (which is
|
||||
// limited to 640 bytes) and thus "starving" other pieces, resulting
|
||||
// in a key overflow error.
|
||||
|
||||
if( uiTargetColLen > 256)
|
||||
{
|
||||
uiTargetColLen = 256;
|
||||
}
|
||||
|
||||
// Code below sets ucSubColBuf[] and ucCaseBits[] values to zero.
|
||||
|
||||
if (uiLanguage != US_LANG)
|
||||
{
|
||||
if (uiLanguage == AR_LANG || // Arabic
|
||||
uiLanguage == FA_LANG || // Farsi - persian
|
||||
uiLanguage == HE_LANG || // Hebrew
|
||||
uiLanguage == UR_LANG) // Urdu
|
||||
{
|
||||
bHebrewArabic = TRUE;
|
||||
}
|
||||
}
|
||||
pucStrEnd = &pucStr [uiStrLen];
|
||||
|
||||
while (pucStr < pucStrEnd)
|
||||
{
|
||||
|
||||
// Set the case bits and sub-collation bits to zero when
|
||||
// on the first bit of the byte.
|
||||
|
||||
if (!(uiCaseBitPos & 0x07))
|
||||
{
|
||||
ucCaseBits [uiCaseBitPos >> 3] = 0;
|
||||
}
|
||||
if (!(uiSubColBitPos & 0x07))
|
||||
{
|
||||
ucSubColBuf [uiSubColBitPos >> 3] = 0;
|
||||
}
|
||||
|
||||
// Get the next character from the TEXT string.
|
||||
|
||||
for (ui16WpChr = ui16SubColVal = 0; // Default sub-collation value
|
||||
!ui16WpChr && pucStr < pucStrEnd;
|
||||
pucStr += uiObjLength)
|
||||
{
|
||||
FLMBYTE ucChar = *pucStr;
|
||||
|
||||
uiObjType = GedTextObjType( ucChar);
|
||||
switch (uiObjType)
|
||||
{
|
||||
case ASCII_CHAR_CODE: // 0nnnnnnn
|
||||
uiObjLength = 1;
|
||||
|
||||
// Character set zero is assumed.
|
||||
|
||||
ui16WpChr = (FLMUINT16)ucChar;
|
||||
continue;
|
||||
case CHAR_SET_CODE: // 10nnnnnn
|
||||
uiObjLength = 2;
|
||||
|
||||
// Character set followed by character
|
||||
|
||||
ui16WpChr = (((FLMUINT16)(ucChar & (~CHAR_SET_MASK)) << 8)
|
||||
+ (FLMUINT16)*(pucStr + 1));
|
||||
continue;
|
||||
case WHITE_SPACE_CODE: // 110nnnnn
|
||||
uiObjLength = 1;
|
||||
ucChar &= (~WHITE_SPACE_MASK);
|
||||
ui16WpChr = (ucChar == HARD_HYPHEN ||
|
||||
ucChar == HARD_HYPHEN_EOL ||
|
||||
ucChar == HARD_HYPHEN_EOP)
|
||||
? (FLMUINT16)0x2D // Minus sign -- character set 0
|
||||
: (FLMUINT16)0x20;// Space -- character set zero
|
||||
continue;
|
||||
|
||||
// Skip all of the unknown stuff
|
||||
|
||||
case UNK_GT_255_CODE:
|
||||
uiObjLength = 3 + FB2UW( pucStr + 1);
|
||||
continue;
|
||||
case UNK_LE_255_CODE:
|
||||
uiObjLength = 2 + (FLMUINT16)*(pucStr + 1);
|
||||
continue;
|
||||
case UNK_EQ_1_CODE:
|
||||
uiObjLength = 2;
|
||||
continue;
|
||||
case EXT_CHAR_CODE:
|
||||
uiObjLength = 3;
|
||||
|
||||
// Character set followed by character
|
||||
|
||||
ui16WpChr = (((FLMUINT16)*(pucStr + 1) << 8)
|
||||
+ (FLMUINT16)*(pucStr + 2));
|
||||
continue;
|
||||
case OEM_CODE:
|
||||
|
||||
// OEM characters are always >= 128
|
||||
// Use character set zero to process them.
|
||||
|
||||
uiObjLength = 2;
|
||||
ui16WpChr = (FLMUINT16)*(pucStr + 1);
|
||||
continue;
|
||||
case UNICODE_CODE: // Unconvertable UNICODE code
|
||||
uiObjLength = 3;
|
||||
|
||||
// Unicode character followed by unicode character set
|
||||
|
||||
unichr = (FLMUINT16)(((FLMUINT16)*(pucStr + 1) << 8)
|
||||
+ (FLMUINT16)*(pucStr + 2));
|
||||
ui16WpChr = UNK_UNICODE_CODE;
|
||||
continue;
|
||||
default:
|
||||
|
||||
// Should not happen, but don't return an error
|
||||
|
||||
flmAssert( 0);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
// If we didn't get a character, break out of while loop.
|
||||
|
||||
if (!ui16WpChr)
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
||||
// fwpCheckDoubleCollation modifies ui16WpChr if a digraph or a double
|
||||
// character sequence is found. If a double character is found, pucStr
|
||||
// is incremented and ui16WpChr2 is set to 1. If a digraph is found,
|
||||
// pucStr is not changed, but ui16WpChr contains the first character and
|
||||
// ui16WpChr2 contains the second character of the digraph.
|
||||
|
||||
if (uiLanguage != US_LANG)
|
||||
{
|
||||
ui16WpChr2 = fwpCheckDoubleCollation( &ui16WpChr, &bTwoIntoOne,
|
||||
&pucStr, uiLanguage);
|
||||
}
|
||||
|
||||
// Save the case bit
|
||||
|
||||
if (!uiUppercaseFlag)
|
||||
{
|
||||
|
||||
// charIsUpper returns TRUE if upper case, 0 if lower case.
|
||||
|
||||
if (!charIsUpper( ui16WpChr))
|
||||
{
|
||||
uiFlags |= HAD_LOWER_CASE;
|
||||
}
|
||||
else
|
||||
{
|
||||
|
||||
// Set if upper case.
|
||||
|
||||
SET_BIT( ucCaseBits, uiCaseBitPos);
|
||||
}
|
||||
uiCaseBitPos++;
|
||||
}
|
||||
|
||||
// Handle OEM characters, non-collating characters,
|
||||
// characters with subcollating values, double collating
|
||||
// values.
|
||||
|
||||
// Get the collated value from the WP character-if not collating value
|
||||
|
||||
if ((pucCollatedStr[ uiColLen++] =
|
||||
(FLMBYTE)(fwpGetCollation( ui16WpChr, uiLanguage))) >= COLS11)
|
||||
{
|
||||
FLMUINT uiTemp;
|
||||
|
||||
// Save OEM characters just like non-collating characters
|
||||
|
||||
// If lower case, convert to upper case.
|
||||
|
||||
if (!charIsUpper( ui16WpChr))
|
||||
{
|
||||
ui16WpChr &= ~1;
|
||||
}
|
||||
|
||||
// No collating value given for this WP char.
|
||||
// Save original WP char (2 bytes) in subcollating
|
||||
// buffer.
|
||||
|
||||
// 1110 is a new code that will store an insert over
|
||||
// the character OR a non-convertable unicode character.
|
||||
// Store with the same alignment as "store_extended_char"
|
||||
// below.
|
||||
|
||||
// 11110 is code for unmappable UNICODE value.
|
||||
// A value 0xFE will be the collation value. The sub-collation
|
||||
// value will be 0xFFFF followed by the UNICODE value.
|
||||
// Be sure to eat an extra case bit.
|
||||
|
||||
// See specific Hebrew and Arabic comments in the
|
||||
// switch statement below.
|
||||
|
||||
// Set the next byte that follows in the sub collation buffer.
|
||||
|
||||
ucSubColBuf [(uiSubColBitPos + 8) >> 3] = 0;
|
||||
|
||||
if (bHebrewArabic && (pucCollatedStr [uiColLen-1] == COLS0_ARABIC))
|
||||
{
|
||||
|
||||
// Store first bit of 1110, fall through & store remaining 3 bits
|
||||
|
||||
SET_BIT( ucSubColBuf, uiSubColBitPos);
|
||||
uiSubColBitPos++;
|
||||
|
||||
// Don't store collation value
|
||||
|
||||
uiColLen--;
|
||||
}
|
||||
else if (unichr)
|
||||
{
|
||||
ui16WpChr = unichr;
|
||||
unichr = 0;
|
||||
|
||||
// Store 11 out of 11110
|
||||
|
||||
SET_BIT( ucSubColBuf, uiSubColBitPos);
|
||||
uiSubColBitPos++;
|
||||
SET_BIT( ucSubColBuf, uiSubColBitPos);
|
||||
uiSubColBitPos++;
|
||||
if (!uiUppercaseFlag)
|
||||
{
|
||||
ucCaseBits [(uiCaseBitPos + 7) >> 3] = 0;
|
||||
|
||||
// Set upper case bit.
|
||||
|
||||
SET_BIT( ucCaseBits, uiCaseBitPos);
|
||||
uiCaseBitPos++;
|
||||
}
|
||||
}
|
||||
store_extended_char:
|
||||
|
||||
// Set the next byte that follows in the sub collation buffer.
|
||||
|
||||
ucSubColBuf [(uiSubColBitPos + 8) >> 3] = 0;
|
||||
ucSubColBuf [(uiSubColBitPos + 16) >> 3] = 0;
|
||||
uiFlags |= HAD_SUB_COLLATION;
|
||||
|
||||
// Set 110 bits in sub-collation - continued from above.
|
||||
// No need to explicitly set the zero, but must increment
|
||||
// for it.
|
||||
|
||||
SET_BIT( ucSubColBuf, uiSubColBitPos);
|
||||
uiSubColBitPos++;
|
||||
SET_BIT( ucSubColBuf, uiSubColBitPos);
|
||||
uiSubColBitPos += 2;
|
||||
|
||||
// store_aligned_word: This label is not referenced.
|
||||
// Go to the next byte boundary to write the character.
|
||||
|
||||
uiSubColBitPos = (uiSubColBitPos + 7) & (~7);
|
||||
uiTemp = BYTES_IN_BITS( uiSubColBitPos);
|
||||
|
||||
// Need to big-endian - so it will sort correctly.
|
||||
|
||||
ucSubColBuf [uiTemp] = (FLMBYTE)(ui16WpChr >> 8);
|
||||
ucSubColBuf [uiTemp + 1] = (FLMBYTE)(ui16WpChr);
|
||||
uiSubColBitPos += 16;
|
||||
ucSubColBuf [uiSubColBitPos >> 3] = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
// Had a collation value
|
||||
|
||||
// Add the lower/uppercase bit if a mixed case output.
|
||||
|
||||
// If not lower ASCII set - check diacritic value for sub-collation
|
||||
|
||||
if (!(ui16WpChr & 0xFF00))
|
||||
{
|
||||
|
||||
// ASCII character set - set a single 0 bit - just need to
|
||||
// increment to do this.
|
||||
|
||||
uiSubColBitPos++;
|
||||
}
|
||||
else
|
||||
{
|
||||
FLMBYTE ucTmpChar = (FLMBYTE)ui16WpChr;
|
||||
FLMBYTE ucCharSet = (FLMBYTE)(ui16WpChr >> 8);
|
||||
|
||||
// Convert char to uppercase because case information
|
||||
// is stored above. This will help
|
||||
// ensure that the "ETA" doesn't sort before "eta"
|
||||
|
||||
if (!charIsUpper(ui16WpChr))
|
||||
{
|
||||
ui16WpChr &= ~1;
|
||||
}
|
||||
|
||||
switch (ucCharSet)
|
||||
{
|
||||
case CHSMUL1: // Multinational 1
|
||||
|
||||
// If we cannot break down a char into base and
|
||||
// diacritic we cannot combine the charaacter
|
||||
// later when converting back the key. In that case,
|
||||
// write the entire WP char in the sub-collation area.
|
||||
|
||||
if (fwpCh6Brkcar( ui16WpChr, &ui16Base, &ui16SubColVal))
|
||||
{
|
||||
goto store_extended_char;
|
||||
}
|
||||
|
||||
// Write the FLAIM diacritic sub-collation value.
|
||||
// Prefix is 2 bits "10". Remember to leave
|
||||
// "111" alone for the future.
|
||||
// NOTE: The "unlaut" character must sort after the "ring"
|
||||
// character.
|
||||
|
||||
ui16SubColVal = ((ui16SubColVal & 0xFF) == umlaut &&
|
||||
(uiLanguage == SU_LANG ||
|
||||
uiLanguage == SV_LANG ||
|
||||
uiLanguage == CZ_LANG ||
|
||||
uiLanguage == SL_LANG))
|
||||
? (FLMUINT16)(fwp_dia60Tbl[ ring] + 1)
|
||||
: (FLMUINT16)(fwp_dia60Tbl[ ui16SubColVal & 0xFF]);
|
||||
|
||||
store_sub_col:
|
||||
// Set the next byte that follows in the sub collation buffer.
|
||||
|
||||
ucSubColBuf[ (uiSubColBitPos + 8) >> 3] = 0;
|
||||
uiFlags |= HAD_SUB_COLLATION;
|
||||
|
||||
// Set the 10 bits - no need to explicitly set the zero, but
|
||||
// must increment for it.
|
||||
|
||||
SET_BIT( ucSubColBuf, uiSubColBitPos);
|
||||
uiSubColBitPos += 2;
|
||||
|
||||
// Set sub-collation bits.
|
||||
|
||||
SETnBITS( 5, ucSubColBuf, uiSubColBitPos, ui16SubColVal);
|
||||
uiSubColBitPos += 5;
|
||||
break;
|
||||
|
||||
case CHSGREK: // Greek
|
||||
|
||||
if (ucTmpChar >= 52 || // Keep case bit for 52-69 else ignore
|
||||
ui16WpChr == 0x804 || // [ 8,4] BETA Medial | Terminal
|
||||
ui16WpChr == 0x826) // [ 8,38] SIGMA terminal
|
||||
{
|
||||
goto store_extended_char;
|
||||
}
|
||||
|
||||
// No subcollation to worry about - set a zero bit by
|
||||
// incrementing the bit position.
|
||||
|
||||
uiSubColBitPos++;
|
||||
break;
|
||||
|
||||
case CHSCYR:
|
||||
if (ucTmpChar >= 144)
|
||||
{
|
||||
goto store_extended_char;
|
||||
}
|
||||
|
||||
// No subcollation to worry about - set a zero bit by
|
||||
// incrementing the bit position.
|
||||
|
||||
uiSubColBitPos++;
|
||||
|
||||
// VISIT: Georgian covers 208-249 - no collation defined yet
|
||||
|
||||
break;
|
||||
|
||||
case CHSHEB: // Hebrew
|
||||
|
||||
// Three sections in Hebrew:
|
||||
// 0..26 - main characters
|
||||
// 27..83 - accents that apear over previous character
|
||||
// 84..118- dagesh (ancient) hebrew with accents
|
||||
|
||||
// Because the ancient is only used for sayings & scriptures
|
||||
// we will support a collation value and in the sub-collation
|
||||
// store the actual character because sub-collation is in
|
||||
// character order.
|
||||
|
||||
if (ucTmpChar >= 84) // Save ancient - value 84 and above
|
||||
{
|
||||
goto store_extended_char;
|
||||
}
|
||||
|
||||
// No subcollation to worry about - set a zero bit by
|
||||
// incrementing the bit position.
|
||||
|
||||
uiSubColBitPos++;
|
||||
break;
|
||||
|
||||
case CHSARB1: // Arabic 1
|
||||
|
||||
// Three sections in Arabic:
|
||||
// 00..37 - accents that display OVER a previous character
|
||||
// 38..46 - symbols
|
||||
// 47..57 - numbers
|
||||
// 58..163 - characters
|
||||
// 164 - hamzah accent
|
||||
// 165..180- common characters with accents
|
||||
// 181..193- ligatures - common character combinations
|
||||
// 194..195- extensions - throw away when sorting
|
||||
|
||||
if (ucTmpChar <= 46)
|
||||
{
|
||||
goto store_extended_char; // save original character
|
||||
}
|
||||
|
||||
if (pucCollatedStr[ uiColLen-1] == COLS10a+1) // Alef?
|
||||
{
|
||||
ui16SubColVal = (ucTmpChar >= 165)
|
||||
? (FLMUINT16)(fwp_alefSubColTbl[ ucTmpChar - 165 ])
|
||||
: (FLMUINT16)7; // Alef subcol value
|
||||
goto store_sub_col;
|
||||
}
|
||||
if (ucTmpChar >= 181) // Ligatures - char combination
|
||||
{
|
||||
goto store_extended_char; // save original character
|
||||
}
|
||||
|
||||
if (ucTmpChar == 64) // taa exception
|
||||
{
|
||||
ui16SubColVal = 8;
|
||||
goto store_sub_col;
|
||||
}
|
||||
|
||||
// No subcollation to worry about - set a zero bit by
|
||||
// incrementing the bit position.
|
||||
|
||||
uiSubColBitPos++;
|
||||
break;
|
||||
|
||||
case CHSARB2: // Arabic 2
|
||||
|
||||
// There are some characters that share the same slot
|
||||
// Check the bit table if above character 64
|
||||
|
||||
if (ucTmpChar >= 64 &&
|
||||
fwp_ar2BitTbl[(ucTmpChar - 64) >> 3] &
|
||||
(0x80 >> (ucTmpChar & 0x07)))
|
||||
{
|
||||
goto store_extended_char; // Will save original
|
||||
}
|
||||
|
||||
// No subcollation to worry about - set a zero bit by
|
||||
// incrementing the bit position.
|
||||
|
||||
uiSubColBitPos++;
|
||||
break;
|
||||
|
||||
default:
|
||||
|
||||
// Increment bit position to set a zero bit.
|
||||
|
||||
uiSubColBitPos++;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// Now let's worry about double character sorting
|
||||
|
||||
if (ui16WpChr2)
|
||||
{
|
||||
if (pbOriginalCharsLost)
|
||||
{
|
||||
*pbOriginalCharsLost = TRUE;
|
||||
}
|
||||
|
||||
// Set the next byte that follows in the sub collation buffer.
|
||||
|
||||
ucSubColBuf [(uiSubColBitPos + 7) >> 3] = 0;
|
||||
|
||||
if (bTwoIntoOne)
|
||||
{
|
||||
|
||||
// Sorts after character in ui16WpChr after call to
|
||||
// fwpCheckDoubleCollation
|
||||
// Write the char 2 times so lower/upper bits are correct.
|
||||
// Could write infinite times because of collation rules.
|
||||
|
||||
pucCollatedStr[ uiColLen] = ++pucCollatedStr[ uiColLen-1];
|
||||
uiColLen++;
|
||||
|
||||
// If original was upper case, set one more upper case bit
|
||||
|
||||
if (!uiUppercaseFlag)
|
||||
{
|
||||
ucCaseBits[ (uiCaseBitPos + 7) >> 3] = 0;
|
||||
if (!charIsUpper( (FLMUINT16) *(pucStr - 1)))
|
||||
{
|
||||
uiFlags |= HAD_LOWER_CASE;
|
||||
}
|
||||
else
|
||||
{
|
||||
SET_BIT( ucCaseBits, uiCaseBitPos);
|
||||
}
|
||||
uiCaseBitPos++;
|
||||
}
|
||||
|
||||
// Take into account the diacritical space
|
||||
|
||||
uiSubColBitPos++;
|
||||
}
|
||||
else
|
||||
{
|
||||
|
||||
// We have a digraph, get second collation value
|
||||
|
||||
pucCollatedStr[ uiColLen++] =
|
||||
(FLMBYTE)(fwpGetCollation( ui16WpChr2, uiLanguage));
|
||||
|
||||
// Normal case, assume no diacritics set
|
||||
|
||||
uiSubColBitPos++;
|
||||
|
||||
// If first was upper, set one more upper bit.
|
||||
|
||||
if (!uiUppercaseFlag)
|
||||
{
|
||||
ucCaseBits [(uiCaseBitPos + 7) >> 3] = 0;
|
||||
if (charIsUpper( ui16WpChr))
|
||||
{
|
||||
SET_BIT( ucCaseBits, uiCaseBitPos);
|
||||
}
|
||||
uiCaseBitPos++;
|
||||
|
||||
// no need to reset the uiFlags
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Check to see if uiColLen is at some overflow limit.
|
||||
|
||||
if (uiColLen >= uiCharLimit ||
|
||||
uiColLen + BYTES_IN_BITS( uiSubColBitPos) +
|
||||
BYTES_IN_BITS( uiCaseBitPos) >= uiTargetColLen)
|
||||
{
|
||||
|
||||
// We hit the maximum number of characters.
|
||||
|
||||
if (pucStr < pucStrEnd)
|
||||
{
|
||||
bDataTruncated = TRUE;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// END OF WHILE LOOP
|
||||
|
||||
if (puiCollationLen)
|
||||
{
|
||||
*puiCollationLen = uiColLen;
|
||||
}
|
||||
|
||||
// Add the first substring marker - also serves as making the string non-null.
|
||||
|
||||
if (bFirstSubstring)
|
||||
{
|
||||
pucCollatedStr [uiColLen++] = COLL_FIRST_SUBSTRING;
|
||||
}
|
||||
|
||||
if (bDataTruncated)
|
||||
{
|
||||
pucCollatedStr[ uiColLen++ ] = COLL_TRUNCATED;
|
||||
}
|
||||
|
||||
// 10/20/98 - Add code to return NOTHING if no values found.
|
||||
|
||||
if (!uiColLen && !uiSubColBitPos)
|
||||
{
|
||||
if (puiCaseLen)
|
||||
{
|
||||
*puiCaseLen = 0;
|
||||
}
|
||||
goto Exit;
|
||||
}
|
||||
|
||||
// Store extra zero bit in the sub-collation area for Hebrew/Arabic
|
||||
|
||||
if (bHebrewArabic)
|
||||
{
|
||||
uiSubColBitPos++;
|
||||
}
|
||||
|
||||
// Done putting the string into 4 sections - build the COLLATED KEY
|
||||
// Don't set uiUppercaseFlag earlier than here because SC_LOWER may be zero
|
||||
|
||||
uiUppercaseFlag = (uiLanguage == GR_LANG) ? SC_LOWER : SC_UPPER;
|
||||
|
||||
// The default terminating characters is (COLL_MARKER|SC_UPPER)
|
||||
// Did we write anything to the subcollation area?
|
||||
|
||||
if (uiFlags & HAD_SUB_COLLATION)
|
||||
{
|
||||
// Writes out a 0x7
|
||||
|
||||
pucCollatedStr [uiColLen++] = COLL_MARKER | SC_SUB_COL;
|
||||
|
||||
// Move the sub-collation into the collating string
|
||||
|
||||
uiLength = BYTES_IN_BITS( uiSubColBitPos);
|
||||
f_memcpy( &pucCollatedStr[uiColLen], ucSubColBuf, uiLength);
|
||||
uiColLen += uiLength;
|
||||
}
|
||||
|
||||
// Move the upper/lower case stuff - force bits for Greek ONLY
|
||||
// This is such a small size that a memcpy is not worth it
|
||||
|
||||
if (uiFlags & HAD_LOWER_CASE)
|
||||
{
|
||||
FLMUINT uiNumBytes = BYTES_IN_BITS( uiCaseBitPos);
|
||||
FLMBYTE * pucCasePtr = ucCaseBits;
|
||||
|
||||
// Output the 0x5
|
||||
|
||||
pucCollatedStr [uiColLen++] = (FLMBYTE)(COLL_MARKER | SC_MIXED);
|
||||
if (puiCaseLen)
|
||||
{
|
||||
*puiCaseLen = uiNumBytes + 1;
|
||||
}
|
||||
|
||||
if (uiUppercaseFlag == SC_LOWER)
|
||||
{
|
||||
|
||||
// Negate case bits for languages (like GREEK) that sort
|
||||
// upper case before lower case.
|
||||
|
||||
while (uiNumBytes--)
|
||||
{
|
||||
pucCollatedStr [uiColLen++] = ~(*pucCasePtr++);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
while (uiNumBytes--)
|
||||
{
|
||||
pucCollatedStr [uiColLen++] = *pucCasePtr++;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
|
||||
// All characters are either upper or lower case, as determined
|
||||
// by uiUppercaseFlag.
|
||||
|
||||
pucCollatedStr [uiColLen++] = (FLMBYTE)(COLL_MARKER | uiUppercaseFlag);
|
||||
if( puiCaseLen)
|
||||
{
|
||||
*puiCaseLen = 1;
|
||||
}
|
||||
}
|
||||
Exit:
|
||||
|
||||
// Set length return value.
|
||||
|
||||
if( pbDataTruncated)
|
||||
{
|
||||
*pbDataTruncated = bDataTruncated;
|
||||
}
|
||||
|
||||
*puiCollatedStrLen = uiColLen;
|
||||
return( rc);
|
||||
}
|
||||
Reference in New Issue
Block a user