Files
mars-flaim/flaim/src/fwpasia.cpp
dsandersoremutah c55dab446f Renamed version4 to flaim and version5 to xflaim
git-svn-id: https://svn.code.sf.net/p/flaim/code/trunk@7 0109f412-320b-0410-ab79-c3e0c5ffbbe6
2006-01-27 21:06:39 +00:00

1079 lines
27 KiB
C++

//-------------------------------------------------------------------------
// Desc: Collation for Asian languages.
// Tabs: 3
//
// Copyright (c) 1991-1992,1994-2001,2003,2005-2006 Novell, Inc. All Rights Reserved.
//
// This program is free software; you can redistribute it and/or
// modify it under the terms of version 2 of the GNU General Public
// License as published by the Free Software Foundation.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, contact Novell, Inc.
//
// To contact Novell about this file by physical or electronic mail,
// you may find current contact information at www.novell.com
//
// $Id: fwpasia.cpp 12301 2006-01-19 15:02:55 -0700 (Thu, 19 Jan 2006) dsanders $
//-------------------------------------------------------------------------
#include "flaimsys.h"
#define SET_CASE_BIT 0x01
#define SET_KATAKANA_BIT 0x01
#define SET_WIDTH_BIT 0x02
#define COLS_ASIAN_MARKS 0x140
extern FLMBYTE fwp_dia60Tbl[]; /* Diacritic conversions */
/**----------------------------------------------
*** Tables
*** The tables below were taken from the
*** following files:
*** XCH2COL.ASM
*** CMPWS.ASM - k_diac (KanaSubColTbl[])
***---------------------------------------------*/
/**---------------------------------------------
*** Map special chars in CharSet (x24) to
*** collation values
***--------------------------------------------*/
BYTE_WORD_TBL fwp_Ch24ColTbl[] = /* Position in the table+1 is subColValue */
{
{1, COLLS+2}, /* comma */
{2, COLLS+1}, /* maru */
{5, COLS_ASIAN_MARKS+2}, /* chuuten */
{10, COLS_ASIAN_MARKS}, /* dakuten */
{11, COLS_ASIAN_MARKS+1}, /* handakuten */
{43, COLS2+2}, /* angled brackets */
{44, COLS2+3}, /* */
{49, COLS2+2}, /* pointy brackets */
{50, COLS2+3},
{51, COLS2+2}, /* double pointy brackets */
{52, COLS2+3},
{53, COLS1}, /* Japanese quotes */
{54, COLS1},
{55, COLS1}, /* hollow Japanese quotes */
{56, COLS1},
{57, COLS2+2}, /* filled rounded brackets */
{58, COLS2+3}
};
/**-------------------------------------
*** Kana subcollation values
*** BIT 0: set if large char
*** BIT 1: set if voiced
*** BIT 2: set if half voiced
*** Note:
*** To save space should be nibbles
*** IMPORTANT:
*** The '1' entries that do not have
*** a matching '0' entry have been
*** changed to zero to save space in
*** the subcollation area.
*** The original table is listed below.
***------------------------------------*/
FLMBYTE KanaSubColTbl[] =
{
0,1,0,1,0,1,0,1,0,1, /* a A i I u U e E o O */
1,3,0,3,0,3,1,3,0,3, /* KA GA KI GI KU GU KE GE KO GO */
0,3,0,3,0,3,0,3,0,3, /* SA ZA SHI JI SU ZU SE ZE SO ZO */
0,3,0,3,0,1,3,0,3,0,3, /* TA DA CHI JI tsu TSU ZU TE DE TO DO*/
0,0,0,0,0, /* NA NI NU NE NO */
0,3,5,0,3,5,0,3,5, /* HA BA PA HI BI PI FU BU PU */
0,3,5,0,3,5, /* HE BE PE HO BO PO */
0,0,0,0,0, /* MA MI MU ME MO */
0,1,0,1,0,1, /* ya YA yu YU yo YO */
0,0,0,0,0, /* RA RI RU RE RO */
0,1,0,0,0, /* wa WA WI WE WO */
0,3,0,0 /* N VU ka ke */
};
/**
*** Map katakana (CharSet x26) to collation values
*** kana collating values are two byte values
*** where the high byte is 0x01.
**/
FLMBYTE KanaColTbl[] =
{
0, 0, 1, 1, 2, 2, 3, 3, 4, 4,/* a A i I u U e E o O */
5, 5, 6, 6, 7, 7, 8, 8, 9, 9,/* KA GA KI GI KU GU KE GE KO GO */
10,10,11,11,12,12,13,13,14,14,/* SA ZA SHI JI SU ZU SE ZE SO ZO */
15,15,16,16,17,17,17,18,18,19,19,/* TA DA CHI JI tsu TSU ZU TE DE TO DO*/
20,21,22,23,24, /* NA NI NU NE NO */
25,25,25,26,26,26,27,27,27, /* HA BA PA HI BI PI FU BU PU */
28,28,28,29,29,29, /* HE BE PE HO BO PO */
30,31,32,33,34, /* MA MI MU ME MO */
35,35,36,36,37,37, /* ya YA yu YU yo YO */
38,39,40,41,42, /* RA RI RU RE RO */
43,43,44,45,46, /* wa WA WI WE WO */
47, 2, 5, 8 /* N VU ka ke */
};
/**---------------------------------------
*** Map KataKana collated value to vowel
*** value for use for the previous char.
***--------------------------------------*/
FLMBYTE KanaColToVowel[] =
{
0,1,2,3,4, /* a i u e o */
0,1,2,3,4, /* ka ki ku ke ko */
0,1,2,3,4, /* sa shi su se so */
0,1,2,3,4, /* ta chi tsu te to */
0,1,2,3,4, /* na ni nu ne no */
0,1,2,3,4, /* ha hi hu he ho */
0,1,2,3,4, /* ma mi mu me mo */
0,2,4, /* ya yu yo */
0,1,2,3,4, /* ra ri ru re ro */
0,1,3,4, /* wa wi we wo */
};
/**
*** Convert Zenkaku (double wide) to Hankaku (single wide)
*** Character set 0x24 maps to single wide chars in other char sets.
*** This enables collation values to be found on some symbols.
*** This is also used to convert symbols from hankaku to Zen24.
***
**/
BYTE_WORD_TBL Zen24ToHankaku[] = {
{ 0 ,0x0020 }, /* space */
{ 1 ,0x0b03 }, /* japanese comma */
{ 2 ,0x0b00 }, /* circle period */
{ 3 , 44 }, /* comma */
{ 4 , 46 }, /* period */
{ 5 ,0x0b04 }, /* center dot */
{ 6 , 58 }, /* colon */
{ 7 , 59 }, /* semicolon */
{ 8 , 63 }, /* question mark */
{ 9 , 33 }, /* exclamation mark */
{ 10 ,0x0b3d }, /* dakuten */
{ 11 ,0x0b3e }, /* handakuten */
{ 12 ,0x0106 }, /* accent mark */
{ 13 , 96 }, /* accent mark */
{ 14 ,0x0107 }, /* umlat */
{ 15 , 94 }, /* caret */
{ 16 ,0x0108 }, /* macron */
{ 17 , 95 }, /* underscore */
{ 27 ,0x0b0f }, /* extend vowel */
{ 28 ,0x0422 }, /* mdash */
{ 29 , 45 }, /* hyphen */
{ 30 , 47 }, /* slash */
{ 31 ,0x0607 }, /* backslash */
{ 32 , 126 }, /* tilde */
{ 33 ,0x0611 }, /* doubleline */
{ 34 ,0x0609 }, /* line */
{ 37 ,0x041d }, /* left apostrophe */
{ 38 ,0x041c }, /* right apostrophe */
{ 39 ,0x0420 }, /* left quote */
{ 40 ,0x041f }, /* right quote */
{ 41 , 40 }, /* left paren */
{ 42 , 41 }, /* right paren */
{ 45 , 91 }, /* left bracket */
{ 46 , 93 }, /* right bracket */
{ 47 , 123 }, /* left curly bracket */
{ 48 , 125 }, /* right curly bracket */
{ 53 ,0x0b01 }, /* left j quote */
{ 54 ,0x0b02 }, /* right j quote */
{ 59 , 43 }, /* plus */
{ 60 ,0x0600 }, /* minus */
{ 61 ,0x0601 }, /* plus/minus */
{ 62 ,0x0627 }, /* times */
{ 63 ,0x0608 }, /* divide */
{ 64 , 61 }, /* equal */
{ 65 ,0x0663 }, /* unequal */
{ 66 , 60 }, /* less */
{ 67 , 62 }, /* greater */
{ 68 ,0x0602 }, /* less/equal */
{ 69 ,0x0603 }, /* greater/equal */
{ 70 ,0x0613 }, /* infinity */
{ 71 ,0x0666 }, /* traingle dots */
{ 72 ,0x0504 }, /* man */
{ 73 ,0x0505 }, /* woman */
{ 75 ,0x062d }, /* prime */
{ 76 ,0x062e }, /* double prime */
{ 78 ,0x040c }, /* yen */
{ 79 , 36 }, /* $ */
{ 80 ,0x0413 }, /* cent */
{ 81 ,0x040b }, /* pound */
{ 82 , 37 }, /* % */
{ 83 , 35 }, /* # */
{ 84 , 38 }, /* & */
{ 85 , 42 }, /* * */
{ 86 , 64 }, /* @ */
{ 87 ,0x0406 }, /* squiggle */
{ 89 ,0x06b8 }, /* filled star */
{ 90 ,0x0425 }, /* hollow circle */
{ 91 ,0x042c }, /* filled circle */
{ 93 ,0x065f }, /* hollow diamond */
{ 94 ,0x0660 }, /* filled diamond */
{ 95 ,0x0426 }, /* hollow box */
{ 96 ,0x042e }, /* filled box */
{ 97 ,0x0688 }, /* hollow triangle */
{ 99 ,0x0689 }, /* hollow upside down triangle */
{ 103,0x0615 }, /* right arrow */
{ 104,0x0616 }, /* left arrow */
{ 105,0x0617 }, /* up arrow */
{ 106,0x0622 }, /* down arrow */
{ 119,0x060f }, /* */
{ 121,0x0645 }, /* */
{ 122,0x0646 },
{ 123,0x0643 },
{ 124,0x0644 },
{ 125,0x0642 }, /* union */
{ 126,0x0610 }, /* intersection */
{ 135,0x0655 },
{ 136,0x0656 },
{ 138,0x0638 }, /* right arrow */
{ 139,0x063c }, /* left/right arrow */
{ 140,0x067a }, /* */
{ 141,0x0679 },
{ 153,0x064f }, /* angle */
{ 154,0x0659 },
{ 155,0x065a },
{ 156,0x062c },
{ 157,0x062b },
{ 158,0x060e },
{ 159,0x06b0 },
{ 160,0x064d },
{ 161,0x064e },
{ 162,0x050e }, /* square root */
{ 164,0x0604 },
{ 175,0x0623 }, /* angstrom */
{ 176,0x044b }, /* percent */
{ 177,0x051b }, /* sharp */
{ 178,0x051c }, /* flat */
{ 179,0x0509 }, /* musical note */
{ 180,0x0427 }, /* dagger */
{ 181,0x0428 }, /* double dagger */
{ 182,0x0405 }, /* paragraph */
{ 187,0x068f } /* big hollow circle */
};
/**
*** Maps CS26 to CharSet 11
*** Taken from Char.asm
*** Used to uncollate characters for FLAIM - placed here for consistency
*** 0x80 - add dakuten
*** 0xC0 - add handakuten
*** 0xFF - no mapping exists
**/
FLMBYTE MapCS26ToCharSet11[ 86 ] = {
0x06, /* 0 a */
0x10, /* 1 A */
0x07, /* 2 i */
0x11, /* 3 I */
0x08, /* 4 u */
0x12, /* 5 U */
0x09, /* 6 e */
0x13, /* 7 E */
0x0a, /* 8 o */
0x14, /* 9 O */
0x15, /* 0x0a KA */
0x95, /* GA - 21 followed by 0x3D dakuten */
0x16, /* 0x0c KI */
0x96, /* GI */
0x17, /* 0x0e KU */
0x97, /* GU */
0x18, /* 0x10 KE */
0x98, /* GE */
0x19, /* 0x12 KO */
0x99, /* GO */
0x1a, /* 0x14 SA */
0x9a, /* ZA */
0x1b, /* 0x16 SHI */
0x9b, /* JI */
0x1c, /* 0x18 SU */
0x9c, /* ZU */
0x1d, /* 0x1a SE */
0x9d, /* ZE */
0x1e, /* 0x1c SO */
0x9e, /* ZO */
0x1f, /* 0x1e TA */
0x9f, /* DA */
0x20, /* 0x20 CHI */
0xa0, /* JI */
0x0e, /* 0x22 small tsu */
0x21, /* 0x23 TSU */
0xa1, /* ZU */
0x22, /* 0x25 TE */
0xa2, /* DE */
0x23, /* 0x27 TO */
0xa3, /* DO */
0x24, /* 0x29 NA */
0x25, /* 0x2a NI */
0x26, /* 0x2b NU */
0x27, /* 0x2c NE */
0x28, /* 0x2d NO */
0x29, /* 0x2e HA */
0xa9, /* 0x2f BA */
0xe9, /* 0x30 PA */
0x2a, /* 0x31 HI */
0xaa, /* 0x32 BI */
0xea, /* 0x33 PI */
0x2b, /* 0x34 FU */
0xab, /* 0x35 BU */
0xeb, /* 0x36 PU */
0x2c, /* 0x37 HE */
0xac, /* 0x38 BE */
0xec, /* 0x39 PE */
0x2d, /* 0x3a HO */
0xad, /* 0x3b BO */
0xed, /* 0x3c PO */
0x2e, /* 0x3d MA */
0x2f, /* 0x3e MI */
0x30, /* 0x3f MU */
0x31, /* 0x40 ME */
0x32, /* 0x41 MO */
0x0b, /* 0x42 small ya */
0x33, /* 0x43 YA */
0x0c, /* 0x44 small yu */
0x34, /* 0x45 YU */
0x0d, /* 0x46 small yo */
0x35, /* 0x47 YO */
0x36, /* 0x48 RA */
0x37, /* 0x49 RI */
0x38, /* 0x4a RU */
0x39, /* 0x4b RE */
0x3a, /* 0x4c RO */
0xff, /* 0x4d small wa */
0x3b, /* 0x4e WA */
0xff, /* 0x4f WI */
0xff, /* 0x50 WE */
0x05, /* 0x51 WO */
0x3c, /* 0x52 N */
0xff, /* 0x53 VU */
0xff, /* 0x54 ka */
0xff /* 0x55 ke */
};
/**
*** Conversion from single (Hankaku) to double (Zenkaku) wide characters
*** Used in HanToZenkaku()
**/
/* maps from charset 11 to CS24 (punctuation) (starting from 11,0) */
FLMBYTE From0AToZen[] = { /* ' changed because of windows */
0, 9, 40, 0x53, /* sp ! " # */
0x4f, 0x52, 0x54, 38, /* $ % & ' */
/* Was 187 for ! and 186 for ' */
0x29, 0x2a, 0x55, 0x3b, /* ( ) * + */
3, 0x1d, 4, 0x1e /* , - . / */
};
FLMBYTE From0BToZen[] = {
6, 7, 0x42, 0x40, /* : ; < = */
0x43, 8, 0x56 /* > ? @ */
};
FLMBYTE From0CToZen[] = {
0x2d, 0x1f, 0x2e, 0x0f, 0x11, 0x0d /* [ \ ] ^ _ ` */
};
FLMBYTE From0DToZen[] = {
0x2f, 0x22, 0x30, 0x20 /* { | } ~ */
};
FLMBYTE From8ToZen[] = { /* Fast way to convert from 8 to zen */
0x5e, 0x7e, 0x5f, 0x7f, 0x5f, 0xFF, 0x60, 0x80,
0x61, 0x81, 0x62, 0x82, 0x63, 0x83, 0x64, 0x84,
0x65, 0x85, 0x66, 0x86, 0x67, 0x87, 0x68, 0x88,
0x69, 0x89, 0x6a, 0x8a, 0x6b, 0x8b, 0x6c, 0x8c,
0x6d, 0x8d, 0x6e, 0x8e, 0x6f, 0x8f, 0x6f, 0xFF,
0x70, 0x90, 0x71, 0x91, 0x72, 0x92, 0x73, 0x93,
0x74, 0x94, 0x75, 0x95
};
static FLMBYTE From11AToZen[] = { /* 11 to 24 punctuation except dash */
2, /* japanese period */
0x35, /* left bracket */
0x36, /* right bracket */
0x01, /* comma */
0x05 /* chuuten */
};
static FLMBYTE From11BToZen[] = { /* 11 to 26 (katakana) from 11,5 */
0x51, /* wo */
0,2,4,6,8,0x42,0x44,0x46,0x22, /* small a i u e o ya yu yo tsu */
0xFF, 1, 3, 5, 7, 9, /* dash (x241b) a i u e o */
0x0a, 0x0c, 0x0e, 0x10, 0x12, /* ka ki ku ke ko */
0x14, 0x16, 0x18, 0x1a, 0x1c, /* sa shi su se so */
0x1e, 0x20, 0x23, 0x25, 0x27, /* ta chi tsu te to */
0x29, 0x2a, 0x2b, 0x2c, 0x2d, /* na ni nu ne no */
0x2e, 0x31, 0x34, 0x37, 0x3a, /* ha hi fu he ho */
0x3d, 0x3e, 0x3f, 0x40, 0x41, /* ma mi mu me mo */
0x43, 0x45, 0x47, /* ya yu yo */
0x48, 0x49, 0x4a, 0x4b, 0x4c, /* ra ri ru re ro */
0x4e, 0x52 /* WA N */
}; /* does not have wa WI WE VU ka ke */
/****************************************************************************
Desc: Returns the collation value of the input Wp character.
If in charset 11 will convert the character to Zenkaku (double wide).
In: ui16WpChar - Char to collate off of - could be in CS0..14 or x24..up
ui16NextWpChar - next WP char for CS11 voicing marks
ui16PrevColValue - previous collating value - for repeat/vowel repeat
pui16ColValue - returns 2 byte collation value
pui16SubColVal - 0, 6 or 16 bit value for the latin sub collation
or the kana size & vowel voicing
001 - set if large (upper) character
010 - set if voiced
100 - set if half voiced
pucCaseBits - returns 2 bits
Latin/Greek/Cyrillic
01 - case bit set if character is uppercase
10 - double wide character in CS 0x25xx, 0x26xx and 0x27xx
Japanese
00 - double wide hiragana 0x255e..25b0
01 - double wide katakana 0x2600..2655
10 - double wide symbols that map to charset 11
11 - single wide katakana from charset 11
Ret: 0 - no valid collation value
high values set for pui16ColValue
Sub-collation gets original WP character value
1 - valid collation value
2 - valid collation value and used the ui16NextWpChar
Notes: Code taken from XCH2COL.ASM - routine xch2col_f
also from CMPWS.ASM - routine getcase
Terms:
HANKAKU - single wide characters in charsets 0..14
ZENKAKU - double wide characters in charsets 0x24..end of kanji
KANJI - collation values are 0x2900 less than WPChar value
****************************************************************************/
FLMUINT16 fwpAsiaGetCollation(
FLMUINT16 ui16WpChar, // WP char to get collation values
FLMUINT16 ui16NextWpChar, // Next WP char - for CS11 voicing marks
FLMUINT16 ui16PrevColValue, // Previous collating value
FLMUINT16 * pui16ColValue, // Returns collation value
FLMUINT16 * pui16SubColVal, // Returns sub-collation value
FLMBYTE * pucCaseBits, // Returns case bits value
FLMUINT16 uiUppercaseFlag // Set if to convert to uppercase
)
{
FLMUINT16 ui16ColValue;
FLMUINT16 ui16SubColVal;
FLMBYTE ucCaseBits = 0;
FLMBYTE ucCharSet = ui16WpChar >> 8;
FLMBYTE ucCharVal = ui16WpChar & 0xFF;
FLMUINT16 ui16Hankaku;
FLMUINT uiLoop;
FLMUINT16 ui16ReturnValue = 1;
ui16ColValue = ui16SubColVal = 0;
// Kanji or above
if (ucCharSet >= 0x2B)
{
// Puts 2 or above into high byte.
ui16ColValue = ui16WpChar - 0x2900;
// No subcollation or case bits need to be set
goto Exit;
}
// Single wide character? (HANKAKU)
if (ucCharSet < 11)
{
// Get the values from a non-asian character
// LATIN, GREEK or CYRILLIC
// The width bit may have been set on a jump to
// label from below.
Latin_Greek_Cyrillic:
// YES: Pass US_LANG because this is what we want -
// Prevents double character sorting.
ui16ColValue = fwpGetCollation( ui16WpChar, US_LANG);
if (uiUppercaseFlag || fwpIsUpper( ui16WpChar))
{
// Uppercase - set case bit
ucCaseBits |= SET_CASE_BIT;
}
// Character for which there is no collation value?
if (ui16ColValue == COLS0)
{
ui16ReturnValue = 0;
if (!fwpIsUpper( ui16WpChar))
{
// Convert to uppercase
ui16WpChar--;
}
ui16ColValue = 0xFFFF;
ui16SubColVal = ui16WpChar;
}
else if (ucCharSet) // Don't bother with ascii
{
if (!fwpIsUpper( ui16WpChar))
{
// Convert to uppercase
ui16WpChar--;
}
if (ucCharSet == CHSMUL1)
{
FLMUINT16 ui16Base;
FLMUINT16 ui16Diacritic;
ui16SubColVal = !fwpCh6Brkcar( ui16WpChar, &ui16Base,
&ui16Diacritic)
? fwp_dia60Tbl[ ui16Diacritic & 0xFF]
: ui16WpChar;
}
else if (ucCharSet == CHSGREK) // GREEK
{
if (ui16WpChar >= 0x834 || // [8,52] or above
ui16WpChar == 0x804 || // [8,4] BETA Medial | Terminal
ui16WpChar == 0x826) // [8,38] SIGMA terminal
ui16SubColVal = ui16WpChar;
}
else if (ucCharSet == CHSCYR) // CYRILLIC
{
if (ui16WpChar >= 0xA90) // [10, 144] or above
{
ui16SubColVal = ui16WpChar; // Dup collation values
}
}
// else don't need a sub collation value
}
goto Exit;
}
// Single wide Japanese character?
if (ucCharSet == 11)
{
FLMUINT16 ui16KanaChar;
// Convert charset 11 to Zenkaku (double wide) CS24 or CS26 hex.
// All characters in charset 11 will convert to CS24 or CS26.
// when combining the collation and the sub-collation values.
if (HanToZenkaku( ui16WpChar, ui16NextWpChar, &ui16KanaChar ) == 2)
{
// Return 2
ui16ReturnValue++;
}
ucCaseBits |= SET_WIDTH_BIT; // Set so will allow to go back
ui16WpChar = ui16KanaChar; // If in CS24 will fall through to ZenKaku
ucCharSet = ui16KanaChar >> 8;
ucCharVal = ui16KanaChar & 0xFF;
}
if (ui16WpChar < 0x2400)
{
// In some other character set
goto Latin_Greek_Cyrillic;
}
else if (ui16WpChar >= 0x255e && // Hiragana?
ui16WpChar <= 0x2655) // Katakana?
{
if (ui16WpChar >= 0x2600)
{
ucCaseBits |= SET_KATAKANA_BIT;
}
// HIRAGANA & KATAKANA
// Kana contains both hiragana and katakana.
// The tables contain the same characters in same order
if (ucCharSet == 0x25)
{
// Change value to be in character set 26
ucCharVal -= 0x5E;
}
ui16ColValue = 0x0100 + KanaColTbl[ ucCharVal ];
ui16SubColVal = KanaSubColTbl[ ucCharVal ];
goto Exit;
}
// ZenKaku - means any double wide character
// Hankaku - single wide character
// Inputs: 0x2400..2559 symbols..latin - Zenkaku
// 0x265B..2750 greek..cyrillic - Zenkaku
// SET_WIDTH_BIT may have been set if original char
// was in 11 and got converted to CS24. [1,2,5,27(extendedVowel),53,54]
// Original chars from CS11 will have some collation value that when
// combined with the sub-collation value will format a character in
// CS24. The width bit will then convert back to CS11.
if ((ui16Hankaku = ZenToHankaku( ui16WpChar, (FLMUINT16 *) 0 )) != 0)
{
if ((ui16Hankaku >> 8) != 11) // if CharSet11 was a CS24 symbol
{
ui16WpChar = ui16Hankaku; // May be CS24 symbol/latin/gk/cy
ucCharSet = ui16WpChar >> 8;
ucCharVal = ui16WpChar & 0xFF;
ucCaseBits |= SET_WIDTH_BIT; // Latin symbols double wide
goto Latin_Greek_Cyrillic;
}
}
// 0x2400..0x24bc Japanese symbols that cannot be converted to Hankaku.
// All 6 original symbol chars from 11 will also be here.
// First try to find a collation value of the symbol.
// The sub-collation value will be the position in the CS24 table + 1.
for (uiLoop = 0;
uiLoop < (sizeof(fwp_Ch24ColTbl) / sizeof(BYTE_WORD_TBL));
uiLoop++ )
{
if (ucCharVal == fwp_Ch24ColTbl[ uiLoop].ByteValue)
{
if ((ui16ColValue = fwp_Ch24ColTbl[ uiLoop].WordValue) < 0x100)
{
// Don't save for chuuten, dakuten, handakuten
ui16SubColVal = (FLMUINT16)(uiLoop + 1);
}
break;
}
}
if (!ui16ColValue)
{
// Now see if it's a repeat or repeat-vowel character
if( (((ucCharVal >= 0x12) && (ucCharVal <= 0x15)) ||
(ucCharVal == 0x17) ||
(ucCharVal == 0x18)) &&
((ui16PrevColValue >> 8) == 1))
{
ui16ColValue = ui16PrevColValue;
// Store original WP character
ui16SubColVal = ui16WpChar;
}
else if( (ucCharVal == 0x1B) && // repeat vowel?
(ui16PrevColValue >= 0x100) &&
(ui16PrevColValue < COLS_ASIAN_MARKS)) // Previous kana char?
{
ui16ColValue = 0x0100 + KanaColToVowel[ ui16PrevColValue & 0xFF ];
// Store original WP character
ui16SubColVal = ui16WpChar;
}
else
{
ui16ReturnValue = 0;
ui16ColValue = 0xFFFF; // No collation value
ui16SubColVal = ui16WpChar; // Never have changed if gets here
}
}
Exit:
// Set return values
*pui16ColValue = ui16ColValue;
*pui16SubColVal = ui16SubColVal;
*pucCaseBits = ucCaseBits;
return( ui16ReturnValue);
}
/****************************************************************************
Desc: Convert a zenkaku (double wide) char to a hankaku (single wide) char
Ret: Hankaku char or 0 if a conversion doesn't exist
Notes: Taken from CHAR.ASM - zen2han_f routine
****************************************************************************/
FLMUINT16 ZenToHankaku(
FLMUINT16 ui16WpChar,
FLMUINT16 * DakutenOrHandakutenRV )
{
FLMUINT16 ui16Hankaku = 0;
FLMBYTE ucCharSet = ui16WpChar >> 8;
FLMBYTE ucCharVal = ui16WpChar & 0xFF;
FLMUINT uiLoop;
switch (ucCharSet)
{
// SYMBOLS
case 0x24:
for (uiLoop = 0;
uiLoop < (sizeof(Zen24ToHankaku) / sizeof(BYTE_WORD_TBL));
uiLoop++)
{
// List is sorted so table entry is more you are done
if (Zen24ToHankaku [uiLoop].ByteValue >= ucCharVal)
{
if (Zen24ToHankaku [uiLoop].ByteValue == ucCharVal)
{
ui16Hankaku = Zen24ToHankaku [uiLoop].WordValue;
}
break;
}
}
break;
// ROMAN - 0x250F..2559
// Hiragana - 0x255E..2580
case 0x25:
if (ucCharVal >= 0x0F && ucCharVal < 0x5E)
{
ui16Hankaku = ucCharVal + 0x21;
}
break;
// Katakana - 0x2600..2655
// Greek - 0x265B..2695
case 0x26:
if (ucCharVal <= 0x55) // Katakana range
{
FLMBYTE ucCS11CharVal;
FLMUINT16 ui16NextWpChar = 0;
if ((ucCS11CharVal = MapCS26ToCharSet11[ ucCharVal ]) != 0xFF)
{
if (ucCS11CharVal & 0x80)
{
if( ucCS11CharVal & 0x40)
{
// Handakuten voicing
ui16NextWpChar = 0xB3E;
}
else
{
// Dakuten voicing
ui16NextWpChar = 0xB3D;
}
ucCS11CharVal &= 0x3F;
}
ui16Hankaku = 0x0b00 + ucCS11CharVal;
if( ui16NextWpChar && DakutenOrHandakutenRV )
{
*DakutenOrHandakutenRV = ui16NextWpChar;
}
}
}
else if (ucCharVal <= 0x95) // Greek
{
FLMBYTE ucGreekChar = ucCharVal;
// Make a zero based number.
ucGreekChar -= 0x5E;
// Check for lowercase
if( ucGreekChar >= 0x20)
{
// Convert to upper case for now
ucGreekChar -= 0x20;
}
if (ucGreekChar >= 2)
{
ucGreekChar++;
}
if (ucGreekChar >= 19)
{
ucGreekChar++;
}
// Convert to character set 8
ui16Hankaku = (ucGreekChar << 1) + 0x800;
if (ucCharVal >= (0x5E + 0x20))
{
// Adjust to lower case character
ui16Hankaku++;
}
}
break;
// Cyrillic
case 0x27:
// Uppercase?
if (ucCharVal <= 0x20)
{
ui16Hankaku = (ucCharVal << 1) + 0xa00;
}
else if (ucCharVal >= 0x30 && ucCharVal <= 0x50)
{
// Lower case
ui16Hankaku = ((ucCharVal - 0x30) << 1) + 0xa01;
}
break;
}
return( ui16Hankaku);
}
/****************************************************************************
Desc: Convert a WPChar from hankaku (single wide) to zenkaku (double wide).
1) Used to see if a char in CS11 can map to a double wide character
2) Used to convert keys into original data.
Ret: 0 = no conversion
1 = converted character to zenkaku
2 = ui16NextWpChar dakuten or handakuten voicing got combined
Notes: Taken from char.asm - han2zen()
From8ToZen could be taken out and placed in code.
****************************************************************************/
FLMUINT16 HanToZenkaku(
FLMUINT16 ui16WpChar,
FLMUINT16 ui16NextWpChar,
FLMUINT16 * pui16Zenkaku)
{
FLMUINT16 ui16Zenkaku = 0;
FLMBYTE ucCharSet = ui16WpChar >> 8;
FLMBYTE ucCharVal = ui16WpChar & 0xFF;
FLMUINT uiLoop;
FLMUINT16 ui16CharsUsed = 1;
switch( ucCharSet)
{
// Character set 0 - symbols
case 0:
// Invalid? - all others are used.
if (ucCharVal < 0x20)
{
;
}
else if (ucCharVal <= 0x2F)
{
// Symbols A
ui16Zenkaku = 0x2400 + From0AToZen[ ucCharVal - 0x20 ];
}
else if (ucCharVal <= 0x39)
{
// 0..9
ui16Zenkaku = 0x2500 + (ucCharVal - 0x21);
}
else if (ucCharVal <= 0x40)
{
// Symbols B
ui16Zenkaku = 0x2400 + From0BToZen[ ucCharVal - 0x3A ];
}
else if (ucCharVal <= 0x5A)
{
// A..Z
ui16Zenkaku = 0x2500 + (ucCharVal - 0x21);
}
else if (ucCharVal <= 0x60)
{
// Symbols C
ui16Zenkaku = 0x2400 + From0CToZen[ ucCharVal - 0x5B ];
}
else if (ucCharVal <= 0x7A)
{
// a..z
ui16Zenkaku = 0x2500 + (ucCharVal - 0x21);
}
else if (ucCharVal <= 0x7E)
{
// Symbols D
ui16Zenkaku = 0x2400 + From0DToZen[ ucCharVal - 0x7B ];
}
break;
// GREEK
case 8:
if ((ucCharVal >= sizeof( From8ToZen)) ||
((ui16Zenkaku = 0x2600 + From8ToZen[ ucCharVal ]) == 0x26FF))
{
ui16Zenkaku = 0;
}
break;
// CYRILLIC
case 10:
// Check range
ui16Zenkaku = 0x2700 + (ucCharVal >> 1); // Uppercase value
// Convert to lower case?
if( ucCharVal & 0x01)
{
ui16Zenkaku += 0x30;
}
break;
// JAPANESE
case 11:
if (ucCharVal < 5)
{
ui16Zenkaku = 0x2400 + From11AToZen[ ucCharVal ];
}
else if (ucCharVal < 0x3D) // katakana?
{
if ((ui16Zenkaku = 0x2600 +
From11BToZen[ ucCharVal - 5 ]) == 0x26FF)
{
// Dash - convert to this
ui16Zenkaku = 0x241b;
}
else
{
if (ui16NextWpChar == 0xB3D) // dakuten? - voicing
{
// First check exception(s) then
// check if voicing exists! - will NOT access out of table
if ((ui16Zenkaku != 0x2652) && // is not 'N'?
(KanaSubColTbl[ ui16Zenkaku - 0x2600 + 1 ] == 3))
{
ui16Zenkaku++;
// Return 2
ui16CharsUsed++;
}
}
else if (ui16NextWpChar == 0xB3E) // handakuten? - voicing
{
// Check if voicing exists! - will NOT access out of table
if (KanaSubColTbl [ui16Zenkaku - 0x2600 + 2 ] == 5)
{
ui16Zenkaku += 2;
// Return 2
ui16CharsUsed++;
}
}
}
}
else if (ucCharVal == 0x3D) // dakuten?
{
// Convert to voicing symbol
ui16Zenkaku = 0x240A;
}
else if (ucCharVal == 0x3E) // handakuten?
{
// Convert to voicing symbol
ui16Zenkaku = 0x240B;
}
// else cannot convert
break;
// Other character sets
// CS 1,4,5,6 - symbols
default:
// Instead of includes more tables from char.asm - look down the
// Zen24Tohankaku[] table for a matching value - not much slower.
for (uiLoop = 0;
uiLoop < (sizeof(Zen24ToHankaku) / sizeof(BYTE_WORD_TBL));
uiLoop++)
{
if (Zen24ToHankaku[ uiLoop].WordValue == ui16WpChar)
{
ui16Zenkaku = 0x2400 + Zen24ToHankaku[ uiLoop].ByteValue;
break;
}
}
break;
}
if (!ui16Zenkaku)
{
// Change return value
ui16CharsUsed = 0;
}
*pui16Zenkaku = ui16Zenkaku;
return( ui16CharsUsed);
}