//------------------------------------------------------------------------- // Desc: Unicode functions. // Tabs: 3 // // Copyright (c) 1999-2001,2003-2006 Novell, Inc. All Rights Reserved. // // This program is free software; you can redistribute it and/or // modify it under the terms of version 2 of the GNU General Public // License as published by the Free Software Foundation. // // This program is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with this program; if not, contact Novell, Inc. // // To contact Novell about this file by physical or electronic mail, // you may find current contact information at www.novell.com // // $Id: funicode.cpp 12334 2006-01-23 12:45:35 -0700 (Mon, 23 Jan 2006) dsanders $ //------------------------------------------------------------------------- #include "flaimsys.h" FSTATIC FLMUINT flmUnicodeToWP( const FLMUNICODE * puzUniStr, FLMUINT16 * pWPChr); /**************************************************************************** Desc: Returns the size of buffer needed to hold the unicode string in FLAIM's storage format. ****************************************************************************/ FLMUINT FlmGetUnicodeStorageLength( const FLMUNICODE * puzStr) { FLMBYTE chrSet; FLMUINT uiStorageLength = 0; FLMUINT uniLength; FLMUINT16 wp60Buf[12]; flmAssert( puzStr != NULL); // Two passes are needed to store a UNICODE string: // 1st pass determines the storage length (via FlmGetUnicodeStorageLength) // 2nd pass stores the string into FLAIMs internal text format // (via FlmUnicode2Storage). do { // Cannot check for A..Z because flmUnicodeToWP may convert // multiple Unicode characters into 1 WP char - (D-slash) // This 'complex' convert code is defined out. // // Personally, I don't think this should ever be done, but the // conversions must be looked at. The hard part of all of this // is deciding if we should have perfect UNI-->WP60-->UNI where // the 2nd UNI is exactly the same as the first. // // For the NDS project, this code MUST have exact conversions. if( *puzStr < 0x20) { uniLength = 1; uiStorageLength += 3; } else { // This is a speed good optimization. if( *puzStr < 0x7F) { uiStorageLength++; puzStr++; continue; } uniLength = flmUnicodeToWP( puzStr, wp60Buf); if( !uniLength) { uiStorageLength += 3; uniLength = 1; } else { if( (chrSet = (FLMBYTE) (wp60Buf[0] >> 8)) == 0) { uiStorageLength++; } else { uiStorageLength += (chrSet <= 63) ? 2 : 3; } } } puzStr += uniLength; } while( *puzStr != 0 ); return( uiStorageLength); } /**************************************************************************** Desc: Copies and formats a Unicode string into FLAIM's storage format. The Unicode string must be in little endian format. Unicode values that are not represented as WordPerfect 6.x characters are preserved as non-WP characters. ****************************************************************************/ RCODE FlmUnicode2Storage( const FLMUNICODE * puzStr, FLMUINT * puiBufLength, FLMBYTE * pBuf) { FLMBYTE chrSet; FLMUINT16 wp60Buf[ 12]; FLMUINT uiStorageLength = 0; FLMUINT uniLength; flmAssert( puzStr != NULL); flmAssert( pBuf != NULL); do { if( *puzStr < 0x20 ) { // Output the character as an unconvertable unicode character. *pBuf++ = UNICODE_CODE; *pBuf++ = *puzStr >> 8; *pBuf++ = (FLMBYTE) *puzStr; uniLength = 1; uiStorageLength += 3; } else { if( *puzStr < 0x7F) { uiStorageLength++; *pBuf++ = (FLMBYTE)*puzStr++; continue; } uniLength = flmUnicodeToWP( puzStr, wp60Buf); if( !uniLength) { *pBuf++ = UNICODE_CODE; *pBuf++ = *puzStr >> 8; *pBuf++ = (FLMBYTE)*puzStr; uniLength = 1; uiStorageLength += 3; } else { chrSet = wp60Buf[0] >> 8; if( chrSet == 0) { *pBuf++ = (FLMBYTE) wp60Buf[0]; uiStorageLength++; } else if( chrSet <= 63) { *pBuf++ = CHAR_SET_CODE | chrSet; *pBuf++ = (FLMBYTE) wp60Buf[0]; uiStorageLength += 2; } else { *pBuf++ = EXT_CHAR_CODE; *pBuf++ = chrSet; *pBuf++ = (FLMBYTE) wp60Buf[0]; uiStorageLength += 3; } } } puzStr += uniLength; // Make sure input buffer was large enough if( *puiBufLength < uiStorageLength) { return( RC_SET( FERR_CONV_DEST_OVERFLOW)); } } while( *puzStr != 0); *puiBufLength = uiStorageLength; return( FERR_OK ); } /**************************************************************************** Desc: Convert from Unicode to 1 and only 1 WP60 character Ret: Conversion Count - 0 means Unicode character could not be converted. Notes: See commented out code below this for real neat multiple character conversions. We don't really want this so that the original UNICODE characters are preserved on get/put as much as possible. Code copied from WPTEXT\WPCHU.C in WpChUUniToWPLang() because of the multiple character conversion and that we only do one character at a time for both interfaces. Called from the UNICODE put routine above and QuickFinder UNICODE to WP conversion. ****************************************************************************/ FSTATIC FLMUINT flmUnicodeToWP( const FLMUNICODE * pUniStr, FLMUINT16 * pWPChr) { FLMUINT uiReturnLen = 1; FLMUNICODE uzUniChar = *pUniStr; FLMINT16 max; FLMINT16 min; FLMINT16 temp; FLMUINT16 * tablePtr; FLMUINT16 tblChr; if( uzUniChar < 127) { *pWPChr = uzUniChar; goto Exit; } tablePtr = (FLMUINT16 *) WP_UTOWP60; // Value we should use ... max = UTOWP60_ENTRIES - 1; // Bug introduced before Nov99 where UTOWP60_ENTRIES is actually 1502 // and the value of 2042 was used. Through debugging, all values in the // table from 1021 to 1502 were never converted to WP character. So, in order // to search correctly on these values we must preserve the WRONG conversion // of these characters (Unicode x222E on). The new max table size is 1021 so // max will be set to 1020 to work correctly. max = 1020; min = 0; do { temp = (min+max) >> 1; tblChr = *(tablePtr+(temp*2)); if( tblChr < uzUniChar ) { min = temp+1; } else if( tblChr > uzUniChar ) { max = temp-1; } else { *pWPChr = *(tablePtr + (temp*2) + 1); goto Exit; } } while( min <= max); uiReturnLen = 0; Exit: return( uiReturnLen ); } /**************************************************************************** Desc: Converts storage formats to UNICODE. ****************************************************************************/ RCODE FlmStorage2Unicode( FLMUINT uiValueType, FLMUINT uiValueLength, const FLMBYTE * pucValue, FLMUINT * puiStrBufLen, FLMUNICODE * puzStrBuf) { FLMUNICODE * tablePtr; FLMBYTE c; FLMUINT bytesProcessed = 0; FLMUINT bytesOutput = 0; FLMUINT outputData; FLMUINT maxOutLen; FLMBYTE objType; FLMUINT objLength = 0; FLMBYTE tempBuf[ 80]; FLMBYTE chrSet, chrVal; FLMUNICODE newChrVal; RCODE rc = FERR_OK; // If the value is a number, convert to text first if( uiValueType != FLM_TEXT_TYPE) { if( pucValue == NULL) { uiValueLength = 0; } else { if( uiValueType == FLM_NUMBER_TYPE) { uiValueLength = sizeof( tempBuf); rc = GedNumToText( pucValue, tempBuf, &uiValueLength); } else { rc = RC_SET( FERR_CONV_ILLEGAL); goto Exit; } if( RC_BAD(rc)) { goto Exit; } pucValue = &tempBuf[ 0]; } } maxOutLen = *puiStrBufLen; outputData = ((puzStrBuf != NULL) && (maxOutLen > 1)); if( outputData) { maxOutLen -= 2; } // Parse through the string outputting data to the buffer as we go while( bytesProcessed < uiValueLength) { // Determine what we are pointing at c = *pucValue; objType = GedTextObjType( c); switch( objType) { case ASCII_CHAR_CODE: objLength = 1; if( outputData) { if( (maxOutLen < 2) || (bytesOutput > maxOutLen - 2)) { rc = RC_SET( FERR_CONV_DEST_OVERFLOW); goto GedGetUNICODE_Output; } *puzStrBuf++ = c; } bytesOutput += 2; break; case CHAR_SET_CODE: objLength = 2; if( outputData) { if( (maxOutLen < 2) || (bytesOutput > maxOutLen - 2)) { rc = RC_SET( FERR_CONV_DEST_OVERFLOW); goto GedGetUNICODE_Output; } // Convert WP to UNICODE chrSet = c & 0x3F; chrVal = *(pucValue + 1); goto ConvertWPToUni; } bytesOutput += 2; break; case WHITE_SPACE_CODE: objLength = 1; if( outputData) { if( (maxOutLen < 2) || (bytesOutput > maxOutLen - 2)) { rc = RC_SET( FERR_CONV_DEST_OVERFLOW); goto GedGetUNICODE_Output; } if( c == (WHITE_SPACE_CODE | NATIVE_TAB)) { *puzStrBuf = (FLMUNICODE) 9; } else if( c == (WHITE_SPACE_CODE | NATIVE_LINEFEED)) { *puzStrBuf = (FLMUNICODE) 10; } else if( c == (WHITE_SPACE_CODE | HARD_RETURN)) { *puzStrBuf = (FLMUNICODE) 13; } else { *puzStrBuf = (FLMUNICODE) 0x20; } puzStrBuf++; } bytesOutput += 2; break; case EXT_CHAR_CODE: objLength = 3; if( outputData) { if( (maxOutLen < 2) || (bytesOutput > maxOutLen - 2)) { rc = RC_SET( FERR_CONV_DEST_OVERFLOW); goto GedGetUNICODE_Output; } // Convert back from WP to UNICODE chrSet = *(pucValue + 1); chrVal = *(pucValue + 2); ConvertWPToUni: // Code taken from _WpChWPToUni() in WPTEXT\WPCHU.C // There should always be a chrSet value. if( (chrSet < WP60toUni_MAX) && ((tablePtr = WP60toUni[ chrSet ]) != 0 )) { FLMUNICODE * pCpxUniStr; newChrVal = tablePtr[ chrVal]; if ((newChrVal & WPCH_LOMASK) == 0xF000) { /* ** Does character convert to many Unicode chars? ** Yes: Use complex character table ** Move to the correct location in the table */ pCpxUniStr = WP60toCpxUni[chrSet]; pCpxUniStr += (newChrVal & WPCH_HIMASK) * WPCH_MAX_COMPLEX; while( *pCpxUniStr) { if( outputData) { if( (maxOutLen < 2) || (bytesOutput > maxOutLen - 2)) { rc = RC_SET( FERR_CONV_DEST_OVERFLOW); goto GedGetUNICODE_Output; } *puzStrBuf++ = *pCpxUniStr++; } bytesOutput += 2; } } else { if( outputData) { if( (maxOutLen < 2) || (bytesOutput > maxOutLen - 2)) { rc = RC_SET( FERR_CONV_DEST_OVERFLOW); goto GedGetUNICODE_Output; } *puzStrBuf++ = newChrVal; } bytesOutput += 2; } } else { // Big extended WP char if( outputData) { if( (maxOutLen < 2) || (bytesOutput > maxOutLen - 2)) { rc = RC_SET( FERR_CONV_DEST_OVERFLOW); goto GedGetUNICODE_Output; } *puzStrBuf++ = 0x03; } bytesOutput += 2; } } break; case OEM_CODE: // We always just skip OEM codes objLength = 2; break; case UNICODE_CODE: objLength = 3; if( outputData) { if( (maxOutLen < 2) || (bytesOutput > maxOutLen - 2)) { rc = RC_SET( FERR_CONV_DEST_OVERFLOW); goto GedGetUNICODE_Output; } *puzStrBuf++ = (*(pucValue + 1) << 8) + *(pucValue + 2); } bytesOutput += 2; break; case UNK_EQ_1_CODE: objLength = 2; if( outputData) { if( (maxOutLen < 2) || (bytesOutput > maxOutLen - 2)) { rc = RC_SET( FERR_CONV_DEST_OVERFLOW); goto GedGetUNICODE_Output; } *puzStrBuf++ = *(pucValue+1); } bytesOutput += 2; break; default: flmAssert(0); bytesProcessed = uiValueLength; break; } pucValue += objLength; bytesProcessed += objLength; } // Add TWO terminating NULL characters, but DO NOT increment the // bytesOutput counter! GedGetUNICODE_Output: if( outputData) { *puzStrBuf = 0; } *puiStrBufLen = bytesOutput; Exit: return( rc); }