Renamed version4 to flaim and version5 to xflaim
git-svn-id: https://svn.code.sf.net/p/flaim/code/trunk@7 0109f412-320b-0410-ab79-c3e0c5ffbbe6
This commit is contained in:
551
flaim/src/funicode.cpp
Normal file
551
flaim/src/funicode.cpp
Normal file
@@ -0,0 +1,551 @@
|
||||
//-------------------------------------------------------------------------
|
||||
// Desc: Unicode functions.
|
||||
// Tabs: 3
|
||||
//
|
||||
// Copyright (c) 1999-2001,2003-2006 Novell, Inc. All Rights Reserved.
|
||||
//
|
||||
// This program is free software; you can redistribute it and/or
|
||||
// modify it under the terms of version 2 of the GNU General Public
|
||||
// License as published by the Free Software Foundation.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU General Public License
|
||||
// along with this program; if not, contact Novell, Inc.
|
||||
//
|
||||
// To contact Novell about this file by physical or electronic mail,
|
||||
// you may find current contact information at www.novell.com
|
||||
//
|
||||
// $Id: funicode.cpp 12334 2006-01-23 12:45:35 -0700 (Mon, 23 Jan 2006) dsanders $
|
||||
//-------------------------------------------------------------------------
|
||||
|
||||
#include "flaimsys.h"
|
||||
|
||||
FSTATIC FLMUINT flmUnicodeToWP(
|
||||
const FLMUNICODE * puzUniStr,
|
||||
FLMUINT16 * pWPChr);
|
||||
|
||||
/****************************************************************************
|
||||
Desc: Returns the size of buffer needed to hold the unicode string in
|
||||
FLAIM's storage format.
|
||||
****************************************************************************/
|
||||
FLMUINT FlmGetUnicodeStorageLength(
|
||||
const FLMUNICODE * puzStr)
|
||||
{
|
||||
FLMBYTE chrSet;
|
||||
FLMUINT uiStorageLength = 0;
|
||||
FLMUINT uniLength;
|
||||
FLMUINT16 wp60Buf[12];
|
||||
|
||||
flmAssert( puzStr != NULL);
|
||||
|
||||
// Two passes are needed to store a UNICODE string:
|
||||
// 1st pass determines the storage length (via FlmGetUnicodeStorageLength)
|
||||
// 2nd pass stores the string into FLAIMs internal text format
|
||||
// (via FlmUnicode2Storage).
|
||||
|
||||
do
|
||||
{
|
||||
// Cannot check for A..Z because flmUnicodeToWP may convert
|
||||
// multiple Unicode characters into 1 WP char - (D-slash)
|
||||
// This 'complex' convert code is defined out.
|
||||
//
|
||||
// Personally, I don't think this should ever be done, but the
|
||||
// conversions must be looked at. The hard part of all of this
|
||||
// is deciding if we should have perfect UNI-->WP60-->UNI where
|
||||
// the 2nd UNI is exactly the same as the first.
|
||||
//
|
||||
// For the NDS project, this code MUST have exact conversions.
|
||||
|
||||
if( *puzStr < 0x20)
|
||||
{
|
||||
uniLength = 1;
|
||||
uiStorageLength += 3;
|
||||
}
|
||||
else
|
||||
{
|
||||
// This is a speed good optimization.
|
||||
|
||||
if( *puzStr < 0x7F)
|
||||
{
|
||||
uiStorageLength++;
|
||||
puzStr++;
|
||||
continue;
|
||||
}
|
||||
|
||||
uniLength = flmUnicodeToWP( puzStr, wp60Buf);
|
||||
|
||||
if( !uniLength)
|
||||
{
|
||||
uiStorageLength += 3;
|
||||
uniLength = 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
if( (chrSet = (FLMBYTE) (wp60Buf[0] >> 8)) == 0)
|
||||
{
|
||||
uiStorageLength++;
|
||||
}
|
||||
else
|
||||
{
|
||||
uiStorageLength += (chrSet <= 63) ? 2 : 3;
|
||||
}
|
||||
}
|
||||
}
|
||||
puzStr += uniLength;
|
||||
|
||||
} while( *puzStr != 0 );
|
||||
|
||||
return( uiStorageLength);
|
||||
}
|
||||
|
||||
/****************************************************************************
|
||||
Desc: Copies and formats a Unicode string into FLAIM's storage format.
|
||||
The Unicode string must be in little endian format.
|
||||
Unicode values that are not represented as WordPerfect 6.x characters
|
||||
are preserved as non-WP characters.
|
||||
****************************************************************************/
|
||||
RCODE FlmUnicode2Storage(
|
||||
const FLMUNICODE * puzStr,
|
||||
FLMUINT * puiBufLength,
|
||||
FLMBYTE * pBuf)
|
||||
{
|
||||
FLMBYTE chrSet;
|
||||
FLMUINT16 wp60Buf[ 12];
|
||||
FLMUINT uiStorageLength = 0;
|
||||
FLMUINT uniLength;
|
||||
|
||||
flmAssert( puzStr != NULL);
|
||||
flmAssert( pBuf != NULL);
|
||||
|
||||
do
|
||||
{
|
||||
if( *puzStr < 0x20 )
|
||||
{
|
||||
// Output the character as an unconvertable unicode character.
|
||||
|
||||
*pBuf++ = UNICODE_CODE;
|
||||
*pBuf++ = *puzStr >> 8;
|
||||
*pBuf++ = (FLMBYTE) *puzStr;
|
||||
uniLength = 1;
|
||||
uiStorageLength += 3;
|
||||
}
|
||||
else
|
||||
{
|
||||
if( *puzStr < 0x7F)
|
||||
{
|
||||
uiStorageLength++;
|
||||
*pBuf++ = (FLMBYTE)*puzStr++;
|
||||
continue;
|
||||
}
|
||||
|
||||
uniLength = flmUnicodeToWP( puzStr, wp60Buf);
|
||||
|
||||
if( !uniLength)
|
||||
{
|
||||
*pBuf++ = UNICODE_CODE;
|
||||
*pBuf++ = *puzStr >> 8;
|
||||
*pBuf++ = (FLMBYTE)*puzStr;
|
||||
uniLength = 1;
|
||||
uiStorageLength += 3;
|
||||
}
|
||||
else
|
||||
{
|
||||
chrSet = wp60Buf[0] >> 8;
|
||||
|
||||
if( chrSet == 0)
|
||||
{
|
||||
*pBuf++ = (FLMBYTE) wp60Buf[0];
|
||||
uiStorageLength++;
|
||||
}
|
||||
else if( chrSet <= 63)
|
||||
{
|
||||
*pBuf++ = CHAR_SET_CODE | chrSet;
|
||||
*pBuf++ = (FLMBYTE) wp60Buf[0];
|
||||
uiStorageLength += 2;
|
||||
}
|
||||
else
|
||||
{
|
||||
*pBuf++ = EXT_CHAR_CODE;
|
||||
*pBuf++ = chrSet;
|
||||
*pBuf++ = (FLMBYTE) wp60Buf[0];
|
||||
uiStorageLength += 3;
|
||||
}
|
||||
}
|
||||
}
|
||||
puzStr += uniLength;
|
||||
|
||||
// Make sure input buffer was large enough
|
||||
|
||||
if( *puiBufLength < uiStorageLength)
|
||||
{
|
||||
return( RC_SET( FERR_CONV_DEST_OVERFLOW));
|
||||
}
|
||||
|
||||
} while( *puzStr != 0);
|
||||
|
||||
*puiBufLength = uiStorageLength;
|
||||
return( FERR_OK );
|
||||
}
|
||||
|
||||
/****************************************************************************
|
||||
Desc: Convert from Unicode to 1 and only 1 WP60 character
|
||||
Ret: Conversion Count - 0 means Unicode character could not be converted.
|
||||
Notes: See commented out code below this for real neat multiple character
|
||||
conversions. We don't really want this so that the original
|
||||
UNICODE characters are preserved on get/put as much as possible.
|
||||
Code copied from WPTEXT\WPCHU.C in WpChUUniToWPLang() because
|
||||
of the multiple character conversion and that we only do one
|
||||
character at a time for both interfaces.
|
||||
Called from the UNICODE put routine above and QuickFinder
|
||||
UNICODE to WP conversion.
|
||||
****************************************************************************/
|
||||
FSTATIC FLMUINT flmUnicodeToWP(
|
||||
const FLMUNICODE * pUniStr,
|
||||
FLMUINT16 * pWPChr)
|
||||
{
|
||||
FLMUINT uiReturnLen = 1;
|
||||
FLMUNICODE uzUniChar = *pUniStr;
|
||||
FLMINT16 max;
|
||||
FLMINT16 min;
|
||||
FLMINT16 temp;
|
||||
FLMUINT16 * tablePtr;
|
||||
FLMUINT16 tblChr;
|
||||
|
||||
if( uzUniChar < 127)
|
||||
{
|
||||
*pWPChr = uzUniChar;
|
||||
goto Exit;
|
||||
}
|
||||
|
||||
tablePtr = (FLMUINT16 *) WP_UTOWP60;
|
||||
|
||||
// Value we should use ... max = UTOWP60_ENTRIES - 1;
|
||||
// Bug introduced before Nov99 where UTOWP60_ENTRIES is actually 1502
|
||||
// and the value of 2042 was used. Through debugging, all values in the
|
||||
// table from 1021 to 1502 were never converted to WP character. So, in order
|
||||
// to search correctly on these values we must preserve the WRONG conversion
|
||||
// of these characters (Unicode x222E on). The new max table size is 1021 so
|
||||
// max will be set to 1020 to work correctly.
|
||||
|
||||
max = 1020;
|
||||
min = 0;
|
||||
|
||||
do
|
||||
{
|
||||
temp = (min+max) >> 1;
|
||||
tblChr = *(tablePtr+(temp*2));
|
||||
if( tblChr < uzUniChar )
|
||||
{
|
||||
min = temp+1;
|
||||
}
|
||||
else if( tblChr > uzUniChar )
|
||||
{
|
||||
max = temp-1;
|
||||
}
|
||||
else
|
||||
{
|
||||
*pWPChr = *(tablePtr + (temp*2) + 1);
|
||||
goto Exit;
|
||||
}
|
||||
|
||||
} while( min <= max);
|
||||
|
||||
uiReturnLen = 0;
|
||||
|
||||
Exit:
|
||||
|
||||
return( uiReturnLen );
|
||||
}
|
||||
|
||||
/****************************************************************************
|
||||
Desc: Converts storage formats to UNICODE.
|
||||
****************************************************************************/
|
||||
RCODE FlmStorage2Unicode(
|
||||
FLMUINT uiValueType,
|
||||
FLMUINT uiValueLength,
|
||||
const FLMBYTE * pucValue,
|
||||
FLMUINT * puiStrBufLen,
|
||||
FLMUNICODE * puzStrBuf)
|
||||
{
|
||||
FLMUNICODE * tablePtr;
|
||||
FLMBYTE c;
|
||||
FLMUINT bytesProcessed = 0;
|
||||
FLMUINT bytesOutput = 0;
|
||||
FLMUINT outputData;
|
||||
FLMUINT maxOutLen;
|
||||
FLMBYTE objType;
|
||||
FLMUINT objLength = 0;
|
||||
FLMBYTE tempBuf[ 80];
|
||||
FLMBYTE chrSet, chrVal;
|
||||
FLMUNICODE newChrVal;
|
||||
RCODE rc = FERR_OK;
|
||||
|
||||
// If the value is a number, convert to text first
|
||||
|
||||
if( uiValueType != FLM_TEXT_TYPE)
|
||||
{
|
||||
if( pucValue == NULL)
|
||||
{
|
||||
uiValueLength = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
if( uiValueType == FLM_NUMBER_TYPE)
|
||||
{
|
||||
uiValueLength = sizeof( tempBuf);
|
||||
rc = GedNumToText( pucValue, tempBuf, &uiValueLength);
|
||||
}
|
||||
else
|
||||
{
|
||||
rc = RC_SET( FERR_CONV_ILLEGAL);
|
||||
goto Exit;
|
||||
}
|
||||
|
||||
if( RC_BAD(rc))
|
||||
{
|
||||
goto Exit;
|
||||
}
|
||||
|
||||
pucValue = &tempBuf[ 0];
|
||||
}
|
||||
}
|
||||
|
||||
maxOutLen = *puiStrBufLen;
|
||||
outputData = ((puzStrBuf != NULL) && (maxOutLen > 1));
|
||||
|
||||
if( outputData)
|
||||
{
|
||||
maxOutLen -= 2;
|
||||
}
|
||||
|
||||
// Parse through the string outputting data to the buffer as we go
|
||||
|
||||
while( bytesProcessed < uiValueLength)
|
||||
{
|
||||
// Determine what we are pointing at
|
||||
|
||||
c = *pucValue;
|
||||
objType = GedTextObjType( c);
|
||||
switch( objType)
|
||||
{
|
||||
case ASCII_CHAR_CODE:
|
||||
objLength = 1;
|
||||
if( outputData)
|
||||
{
|
||||
if( (maxOutLen < 2) || (bytesOutput > maxOutLen - 2))
|
||||
{
|
||||
rc = RC_SET( FERR_CONV_DEST_OVERFLOW);
|
||||
goto GedGetUNICODE_Output;
|
||||
}
|
||||
|
||||
*puzStrBuf++ = c;
|
||||
}
|
||||
bytesOutput += 2;
|
||||
break;
|
||||
|
||||
case CHAR_SET_CODE:
|
||||
objLength = 2;
|
||||
if( outputData)
|
||||
{
|
||||
if( (maxOutLen < 2) || (bytesOutput > maxOutLen - 2))
|
||||
{
|
||||
rc = RC_SET( FERR_CONV_DEST_OVERFLOW);
|
||||
goto GedGetUNICODE_Output;
|
||||
}
|
||||
|
||||
// Convert WP to UNICODE
|
||||
|
||||
chrSet = c & 0x3F;
|
||||
chrVal = *(pucValue + 1);
|
||||
|
||||
goto ConvertWPToUni;
|
||||
}
|
||||
|
||||
bytesOutput += 2;
|
||||
break;
|
||||
|
||||
case WHITE_SPACE_CODE:
|
||||
objLength = 1;
|
||||
|
||||
if( outputData)
|
||||
{
|
||||
if( (maxOutLen < 2) || (bytesOutput > maxOutLen - 2))
|
||||
{
|
||||
rc = RC_SET( FERR_CONV_DEST_OVERFLOW);
|
||||
goto GedGetUNICODE_Output;
|
||||
}
|
||||
|
||||
if( c == (WHITE_SPACE_CODE | NATIVE_TAB))
|
||||
{
|
||||
*puzStrBuf = (FLMUNICODE) 9;
|
||||
}
|
||||
else if( c == (WHITE_SPACE_CODE | NATIVE_LINEFEED))
|
||||
{
|
||||
*puzStrBuf = (FLMUNICODE) 10;
|
||||
}
|
||||
else if( c == (WHITE_SPACE_CODE | HARD_RETURN))
|
||||
{
|
||||
*puzStrBuf = (FLMUNICODE) 13;
|
||||
}
|
||||
else
|
||||
{
|
||||
*puzStrBuf = (FLMUNICODE) 0x20;
|
||||
}
|
||||
|
||||
puzStrBuf++;
|
||||
}
|
||||
|
||||
bytesOutput += 2;
|
||||
break;
|
||||
|
||||
case EXT_CHAR_CODE:
|
||||
objLength = 3;
|
||||
if( outputData)
|
||||
{
|
||||
if( (maxOutLen < 2) || (bytesOutput > maxOutLen - 2))
|
||||
{
|
||||
rc = RC_SET( FERR_CONV_DEST_OVERFLOW);
|
||||
goto GedGetUNICODE_Output;
|
||||
}
|
||||
|
||||
// Convert back from WP to UNICODE
|
||||
|
||||
chrSet = *(pucValue + 1);
|
||||
chrVal = *(pucValue + 2);
|
||||
|
||||
ConvertWPToUni:
|
||||
|
||||
// Code taken from _WpChWPToUni() in WPTEXT\WPCHU.C
|
||||
// There should always be a chrSet value.
|
||||
|
||||
if( (chrSet < WP60toUni_MAX) &&
|
||||
((tablePtr = WP60toUni[ chrSet ]) != 0 ))
|
||||
{
|
||||
FLMUNICODE * pCpxUniStr;
|
||||
|
||||
newChrVal = tablePtr[ chrVal];
|
||||
|
||||
if ((newChrVal & WPCH_LOMASK) == 0xF000)
|
||||
{
|
||||
/*
|
||||
** Does character convert to many Unicode chars?
|
||||
** Yes: Use complex character table
|
||||
** Move to the correct location in the table
|
||||
*/
|
||||
|
||||
pCpxUniStr = WP60toCpxUni[chrSet];
|
||||
pCpxUniStr += (newChrVal & WPCH_HIMASK) * WPCH_MAX_COMPLEX;
|
||||
|
||||
while( *pCpxUniStr)
|
||||
{
|
||||
if( outputData)
|
||||
{
|
||||
if( (maxOutLen < 2) || (bytesOutput > maxOutLen - 2))
|
||||
{
|
||||
rc = RC_SET( FERR_CONV_DEST_OVERFLOW);
|
||||
goto GedGetUNICODE_Output;
|
||||
}
|
||||
*puzStrBuf++ = *pCpxUniStr++;
|
||||
}
|
||||
bytesOutput += 2;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if( outputData)
|
||||
{
|
||||
if( (maxOutLen < 2) || (bytesOutput > maxOutLen - 2))
|
||||
{
|
||||
rc = RC_SET( FERR_CONV_DEST_OVERFLOW);
|
||||
goto GedGetUNICODE_Output;
|
||||
}
|
||||
*puzStrBuf++ = newChrVal;
|
||||
}
|
||||
bytesOutput += 2;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// Big extended WP char
|
||||
|
||||
if( outputData)
|
||||
{
|
||||
if( (maxOutLen < 2) || (bytesOutput > maxOutLen - 2))
|
||||
{
|
||||
rc = RC_SET( FERR_CONV_DEST_OVERFLOW);
|
||||
goto GedGetUNICODE_Output;
|
||||
}
|
||||
|
||||
*puzStrBuf++ = 0x03;
|
||||
}
|
||||
|
||||
bytesOutput += 2;
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
case OEM_CODE:
|
||||
|
||||
// We always just skip OEM codes
|
||||
|
||||
objLength = 2;
|
||||
break;
|
||||
|
||||
case UNICODE_CODE:
|
||||
objLength = 3;
|
||||
if( outputData)
|
||||
{
|
||||
if( (maxOutLen < 2) || (bytesOutput > maxOutLen - 2))
|
||||
{
|
||||
rc = RC_SET( FERR_CONV_DEST_OVERFLOW);
|
||||
goto GedGetUNICODE_Output;
|
||||
}
|
||||
|
||||
*puzStrBuf++ = (*(pucValue + 1) << 8) + *(pucValue + 2);
|
||||
}
|
||||
bytesOutput += 2;
|
||||
break;
|
||||
|
||||
case UNK_EQ_1_CODE:
|
||||
objLength = 2;
|
||||
if( outputData)
|
||||
{
|
||||
if( (maxOutLen < 2) || (bytesOutput > maxOutLen - 2))
|
||||
{
|
||||
rc = RC_SET( FERR_CONV_DEST_OVERFLOW);
|
||||
goto GedGetUNICODE_Output;
|
||||
}
|
||||
*puzStrBuf++ = *(pucValue+1);
|
||||
}
|
||||
bytesOutput += 2;
|
||||
break;
|
||||
|
||||
default:
|
||||
flmAssert(0);
|
||||
bytesProcessed = uiValueLength;
|
||||
break;
|
||||
}
|
||||
pucValue += objLength;
|
||||
bytesProcessed += objLength;
|
||||
}
|
||||
|
||||
// Add TWO terminating NULL characters, but DO NOT increment the
|
||||
// bytesOutput counter!
|
||||
|
||||
GedGetUNICODE_Output:
|
||||
|
||||
if( outputData)
|
||||
{
|
||||
*puzStrBuf = 0;
|
||||
}
|
||||
|
||||
*puiStrBufLen = bytesOutput;
|
||||
|
||||
Exit:
|
||||
|
||||
return( rc);
|
||||
}
|
||||
Reference in New Issue
Block a user