mars-flaim/flaim/src/fwpasia.cpp

//-------------------------------------------------------------------------
// Desc:	Collation for Asian languages.
// Tabs:	3
//
//		Copyright (c) 1991-1992,1994-2001,2003,2005-2006 Novell, Inc. All Rights Reserved.
//
//		This program is free software; you can redistribute it and/or
//		modify it under the terms of version 2 of the GNU General Public
//		License as published by the Free Software Foundation.
//
//		This program is distributed in the hope that it will be useful,
//		but WITHOUT ANY WARRANTY; without even the implied warranty of
//		MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
//		GNU General Public License for more details.
//
//		You should have received a copy of the GNU General Public License
//		along with this program; if not, contact Novell, Inc.
//
//		To contact Novell about this file by physical or electronic mail,
//		you may find current contact information at www.novell.com
//
// $Id: fwpasia.cpp 12301 2006-01-19 15:02:55 -0700 (Thu, 19 Jan 2006) dsanders $
//-------------------------------------------------------------------------

#include "flaimsys.h"

#define	SET_CASE_BIT			0x01
#define	SET_KATAKANA_BIT		0x01
#define	SET_WIDTH_BIT			0x02
#define	COLS_ASIAN_MARKS		0x140


extern FLMBYTE		fwp_dia60Tbl[];		/* Diacritic conversions */

/**----------------------------------------------
***  Tables
***  The tables below were taken from the
***  following files:
***    XCH2COL.ASM
***	 CMPWS.ASM   - k_diac (KanaSubColTbl[])
***---------------------------------------------*/

/**---------------------------------------------
***  Map special chars in CharSet (x24) to
***  collation values
***--------------------------------------------*/

BYTE_WORD_TBL fwp_Ch24ColTbl[] =	/* Position in the table+1 is subColValue */
{
	{1,	COLLS+2},					/* comma */
	{2,	COLLS+1},					/* maru	 */
	{5,	COLS_ASIAN_MARKS+2},		/* chuuten */
	{10,	COLS_ASIAN_MARKS},		/* dakuten */
	{11,	COLS_ASIAN_MARKS+1},		/* handakuten */
	{43,	COLS2+2},					/* angled brackets */
	{44,	COLS2+3},					/* */
	{49,	COLS2+2},					/* pointy brackets */
	{50,	COLS2+3},
	{51,	COLS2+2},					/* double pointy brackets */
	{52,	COLS2+3},
	{53,	COLS1},						/* Japanese quotes */
	{54,	COLS1},
	{55,	COLS1},						/* hollow Japanese quotes */
	{56,	COLS1},
	{57,	COLS2+2},					/* filled rounded brackets */
	{58,	COLS2+3}
};

/**-------------------------------------
***  Kana subcollation values
***  	 BIT 0: set if large char
***		 BIT 1: set if voiced
***		 BIT 2: set if half voiced
***  Note:
***    To save space should be nibbles
***  IMPORTANT:
***    The '1' entries that do not have
***    a matching '0' entry have been
***    changed to zero to save space in
***    the subcollation area.
***    The original table is listed below.
***------------------------------------*/

FLMBYTE 	KanaSubColTbl[] =
{
	0,1,0,1,0,1,0,1,0,1,				/* a    A   i   I   u   U   e   E   o   O */
	1,3,0,3,0,3,1,3,0,3,				/* KA  GA  KI  GI  KU  GU  KE  GE  KO  GO */
	0,3,0,3,0,3,0,3,0,3,				/* SA  ZA SHI  JI  SU  ZU  SE  ZE  SO  ZO */
	0,3,0,3,0,1,3,0,3,0,3,			/* TA  DA CHI  JI tsu TSU  ZU  TE DE TO DO*/
	0,0,0,0,0,							/* NA NI NU NE NO									*/
	0,3,5,0,3,5,0,3,5,				/* HA BA PA HI BI PI FU BU PU					*/
	0,3,5,0,3,5,						/* HE BE PE HO BO PO		*/
	0,0,0,0,0,							/* MA MI MU ME MO			*/
	0,1,0,1,0,1,						/* ya YA yu YU yo YO		*/
	0,0,0,0,0,							/* RA RI RU RE RO			*/
	0,1,0,0,0,							/* wa WA WI WE WO			*/
	0,3,0,0								/*  N VU ka ke				*/
};

/**
***  Map katakana (CharSet x26) to collation values
***  kana collating values are two byte values
***  where the high byte is 0x01.
**/

FLMBYTE 	KanaColTbl[] =
{
	 0, 0, 1, 1, 2, 2, 3, 3, 4, 4,/* a    A   i   I   u   U   e   E   o   O */
 	 5, 5, 6, 6, 7, 7, 8, 8, 9, 9,/* KA  GA  KI  GI  KU  GU  KE  GE  KO  GO */
	10,10,11,11,12,12,13,13,14,14,/* SA  ZA SHI  JI  SU  ZU  SE  ZE  SO  ZO */
	15,15,16,16,17,17,17,18,18,19,19,/* TA DA CHI JI tsu TSU  ZU  TE DE TO DO*/
	20,21,22,23,24,					/* NA NI NU NE NO									*/
	25,25,25,26,26,26,27,27,27,	/* HA BA PA HI BI PI FU BU PU					*/
	28,28,28,29,29,29,				/* HE BE PE HO BO PO		*/
	30,31,32,33,34,					/* MA MI MU ME MO			*/
	35,35,36,36,37,37,				/* ya YA yu YU yo YO		*/
	38,39,40,41,42,					/* RA RI RU RE RO			*/
	43,43,44,45,46,					/* wa WA WI WE WO			*/
	47, 2, 5, 8							/*  N VU ka ke				*/
};


/**---------------------------------------
***  Map KataKana collated value to vowel
***  value for use for the previous char.
***--------------------------------------*/
FLMBYTE 	KanaColToVowel[] =
{
	0,1,2,3,4,		/*  a   i   u  e  o */
	0,1,2,3,4,		/* ka  ki  ku ke ko */
	0,1,2,3,4,		/* sa shi  su se so */
	0,1,2,3,4,		/* ta chi tsu te to */
	0,1,2,3,4,		/* na  ni  nu ne no */
	0,1,2,3,4,		/* ha  hi  hu he ho */
	0,1,2,3,4,		/* ma  mi  mu me mo */
	0,2,4,			/* ya  yu  yo		  */
	0,1,2,3,4,		/* ra  ri  ru re ro */
	0,1,3,4,			/* wa  wi  we wo	  */
};

/**
***  Convert Zenkaku (double wide) to Hankaku (single wide)
***  Character set 0x24 maps to single wide chars in other char sets.
***  This enables collation values to be found on some symbols.
***  This is also used to convert symbols from hankaku to Zen24.
***
**/

BYTE_WORD_TBL	Zen24ToHankaku[] = {
	{	0  ,0x0020 },		/* space */
	{	1  ,0x0b03 },		/* japanese comma */
	{	2  ,0x0b00 },		/* circle period */
	{	3  ,  44	 },		/* comma */
	{	4  ,  46	 },		/* period */
	{	5  ,0x0b04 },		/* center dot	 */
	{	6  ,  58	 },		/* colon */
	{	7  ,  59	 },		/* semicolon */
	{	8  ,  63	 },		/* question mark */
	{	9  ,  33	 },		/* exclamation mark */
	{	10 ,0x0b3d },		/* dakuten */
	{	11 ,0x0b3e },		/* handakuten */
	{	12 ,0x0106 },		/* accent mark */
	{	13 ,  96	 },		/* accent mark */
	{	14 ,0x0107 },		/* umlat */
	{	15 ,  94	 },		/* caret */
	{	16 ,0x0108 },		/* macron */
	{	17 ,  95	 },		/* underscore */
	{	27 ,0x0b0f },		/* extend vowel */
	{	28 ,0x0422 },		/* mdash */
	{	29 ,  45	 },		/* hyphen */
	{	30 ,  47  },     	/* slash */
	{	31 ,0x0607 },		/* backslash */
	{	32 , 126	 },		/* tilde */
	{	33 ,0x0611 },		/* doubleline */
	{	34 ,0x0609 },		/* line */
	{	37 ,0x041d },		/* left apostrophe */
	{	38 ,0x041c },		/* right apostrophe */
	{	39 ,0x0420 },		/* left quote */
	{	40 ,0x041f },		/* right quote */
	{	41 ,  40	 },		/* left paren */
	{	42 ,  41	 },		/* right paren */
	{	45 ,  91	 },		/* left bracket */
	{	46 ,  93	 },		/* right bracket */
	{	47 , 123	 },		/* left curly bracket */
	{	48 , 125	 },		/* right curly bracket */
	{	53 ,0x0b01 },		/* left j quote */
	{	54 ,0x0b02 },		/* right j quote */
	{	59 ,  43	 },		/* plus */
	{	60 ,0x0600 },		/* minus */
	{	61 ,0x0601 },		/* plus/minus */
	{	62 ,0x0627 },		/* times */
	{	63 ,0x0608 },		/* divide */
	{	64 ,  61	 },		/* equal */
	{	65 ,0x0663 },		/* unequal */
	{	66 ,  60	 },		/* less */
	{	67 ,  62	 },		/* greater */
	{	68 ,0x0602 },		/* less/equal */
	{	69 ,0x0603 },		/* greater/equal */
	{	70 ,0x0613 },		/* infinity */
	{	71 ,0x0666 },		/* traingle dots */
	{	72 ,0x0504 },		/* man */
	{	73 ,0x0505 },		/* woman */
	{	75 ,0x062d },		/* prime */
	{	76 ,0x062e },		/* double prime */
	{	78 ,0x040c },		/* yen */
	{	79 ,  36	 },		/* $ */
	{	80 ,0x0413 },		/* cent */
	{	81 ,0x040b },		/* pound */
	{	82 ,  37	 },		/* % */
	{	83 ,  35	 },		/* # */
	{	84 ,  38	 },		/* & */
	{	85 ,  42	 },		/* * */
	{	86 ,  64	 },		/* @ */
	{	87 ,0x0406 },		/* squiggle */
	{	89 ,0x06b8 },		/* filled star */
	{	90 ,0x0425 },		/* hollow circle */
	{	91 ,0x042c },		/* filled circle */
	{	93 ,0x065f },		/* hollow diamond */
	{	94 ,0x0660 },		/* filled diamond */
	{	95 ,0x0426 },		/* hollow box */
	{	96 ,0x042e },		/* filled box */
	{	97 ,0x0688 },		/* hollow triangle */
	{	99 ,0x0689 },		/* hollow upside down triangle */
	{	103,0x0615 },		/* right arrow */
	{	104,0x0616 },		/* left arrow */
	{	105,0x0617 },		/* up arrow */
	{	106,0x0622 },		/* down arrow */
	{	119,0x060f },		/*  */
	{	121,0x0645 },		/*  */
	{	122,0x0646 },
	{	123,0x0643 },
	{	124,0x0644 },
	{	125,0x0642 },		/* union */
	{	126,0x0610 },		/* intersection */
	{	135,0x0655 },
	{	136,0x0656 },
	{	138,0x0638 },		/* right arrow */
	{	139,0x063c },		/* left/right arrow */
	{	140,0x067a },		/*  */
	{	141,0x0679 },
	{	153,0x064f },		/* angle */
	{	154,0x0659 },
	{	155,0x065a },
	{	156,0x062c },
	{	157,0x062b },
	{	158,0x060e },
	{	159,0x06b0 },
	{	160,0x064d },
	{	161,0x064e },
	{	162,0x050e },		/* square root */
	{	164,0x0604 },
	{	175,0x0623 },		/* angstrom */
	{	176,0x044b },		/* percent */
	{	177,0x051b },		/* sharp */
	{	178,0x051c },		/* flat */
	{	179,0x0509 },		/* musical note	 */
	{	180,0x0427 },		/* dagger */
	{	181,0x0428 },		/* double dagger */
	{	182,0x0405 },		/* paragraph */
	{	187,0x068f }		/* big hollow circle */
};

/**
***	Maps CS26 to CharSet 11
***	Taken from Char.asm
***   Used to uncollate characters for FLAIM - placed here for consistency
***	0x80 - add dakuten
***   0xC0 - add handakuten
***   0xFF - no mapping exists
**/
FLMBYTE 	MapCS26ToCharSet11[ 86 ] = {
	0x06,	/* 0     a  */
	0x10,	/*	1     A  */
	0x07,	/*	2     i  */
	0x11,	/*	3     I  */
	0x08,	/*	4     u  */
	0x12,	/*	5     U  */
	0x09,	/*	6     e  */
	0x13,	/*	7     E  */
	0x0a,	/*	8     o  */
	0x14,	/*	9     O  */

	0x15,	/*	0x0a  KA */
	0x95,	/*       GA - 21 followed by 0x3D dakuten */

	0x16,	/* 0x0c  KI */
	0x96,	/*       GI */
	0x17,	/*	0x0e  KU */
	0x97,	/*       GU */
	0x18,	/* 0x10  KE */
	0x98,	/*       GE */
	0x19,	/* 0x12  KO */
	0x99,	/*       GO */

	0x1a,	/*	0x14  SA */
	0x9a,	/*       ZA */
	0x1b,	/*	0x16  SHI */
	0x9b,	/*       JI */
	0x1c,	/*	0x18  SU */
	0x9c,	/*       ZU */
	0x1d,	/*	0x1a  SE */
	0x9d,	/*       ZE */
	0x1e,	/*	0x1c  SO */
	0x9e,	/*       ZO */

	0x1f,	/*	0x1e  TA */
	0x9f,	/*       DA */
	0x20,	/*	0x20  CHI */
	0xa0,	/*       JI */
	0x0e,	/*	0x22  small tsu */
	0x21,	/*	0x23  TSU */
	0xa1,	/*       ZU */
	0x22,	/*	0x25  TE */
	0xa2,	/*       DE */
	0x23,	/*	0x27  TO */
	0xa3,	/*       DO */

	0x24,	/*	0x29  NA */
	0x25,	/*	0x2a  NI */
	0x26,	/* 0x2b  NU */
	0x27,	/*	0x2c  NE */
	0x28,	/*	0x2d  NO */

	0x29,	/*	0x2e  HA */
	0xa9,	/* 0x2f  BA */
	0xe9,	/* 0x30  PA */
	0x2a,	/*	0x31  HI */
	0xaa,	/* 0x32  BI */
	0xea,	/* 0x33  PI */
	0x2b,	/*	0x34  FU */
	0xab,	/* 0x35  BU */
	0xeb,	/* 0x36  PU */
	0x2c,	/*	0x37  HE */
	0xac,	/* 0x38  BE */
	0xec,	/* 0x39  PE */
	0x2d,	/*	0x3a  HO */
	0xad,	/* 0x3b  BO */
	0xed,	/* 0x3c  PO */

	0x2e,	/*	0x3d  MA */
	0x2f,	/*	0x3e  MI */
	0x30,	/*	0x3f  MU */
	0x31,	/*	0x40  ME */
	0x32,	/*	0x41  MO */

	0x0b,	/*	0x42  small ya */
	0x33,	/*	0x43  YA */
	0x0c,	/*	0x44  small yu */
	0x34,	/*	0x45  YU */
	0x0d,	/*	0x46  small yo */
	0x35,	/*	0x47  YO */

	0x36,	/*	0x48  RA */
	0x37,	/*	0x49  RI */
	0x38,	/*	0x4a  RU */
	0x39,	/*	0x4b  RE */
	0x3a,	/*	0x4c  RO */

	0xff,	/* 0x4d  small wa */
	0x3b,	/*	0x4e  WA */
	0xff,	/* 0x4f  WI */
	0xff,	/* 0x50  WE */
	0x05,	/*	0x51	WO */

	0x3c,	/*	0x52	N  */
	0xff,	/* 0x53  VU */
	0xff, /* 0x54  ka */
	0xff 	/* 0x55  ke */
};


/**
***  Conversion from single (Hankaku) to double (Zenkaku) wide characters
***  Used in HanToZenkaku()
**/

/* maps from charset 11 to CS24 (punctuation) (starting from 11,0) */

FLMBYTE  From0AToZen[] = {		/* ' changed because of windows */
	 	0, 	9,		40,	0x53, 		/* sp ! " # */
	 	0x4f, 0x52, 0x54,	38, 			/* $ % & ' */
	 											/* Was 187 for ! and 186 for ' */
		0x29,	0x2a,	0x55,	0x3b, 		/* ( ) * + */
		3,		0x1d,	4,		0x1e	 		/* , - . / */
  };

FLMBYTE  From0BToZen[] = {
		6,		7,		0x42,	0x40,			/* : ; < = */
		0x43,	8,		0x56					/* > ? @ */
  };

FLMBYTE  From0CToZen[] = {
		0x2d,	0x1f,	0x2e,	0x0f,	0x11,	0x0d	/* [ \ ] ^ _ ` */
  };

FLMBYTE  From0DToZen[] = {
		0x2f,	0x22,	0x30,	0x20 			/* { | } ~ */
  };

FLMBYTE  From8ToZen[] = {		/* Fast way to convert from 8 to zen */
	0x5e, 0x7e, 0x5f, 0x7f, 0x5f, 0xFF, 0x60, 0x80,
	0x61, 0x81, 0x62, 0x82, 0x63, 0x83, 0x64, 0x84,
	0x65, 0x85, 0x66, 0x86, 0x67, 0x87, 0x68, 0x88,
	0x69, 0x89, 0x6a, 0x8a, 0x6b, 0x8b, 0x6c, 0x8c,
	0x6d, 0x8d, 0x6e, 0x8e, 0x6f, 0x8f, 0x6f, 0xFF,
	0x70, 0x90, 0x71, 0x91, 0x72, 0x92, 0x73, 0x93,
	0x74, 0x94, 0x75, 0x95
  };

static FLMBYTE  From11AToZen[] = {		/* 11 to 24 punctuation except dash */
		2,			/* japanese period	*/
		0x35,		/* left bracket		*/
		0x36,		/* right bracket		*/
		0x01,		/* comma					*/
		0x05		/* chuuten				*/
  };

static FLMBYTE	From11BToZen[] = {		/* 11 to 26 (katakana) from 11,5 */
		0x51,										/* wo 									*/
		0,2,4,6,8,0x42,0x44,0x46,0x22,	/* small a i u e o ya yu yo tsu	*/
		0xFF, 1, 3, 5, 7, 9,					/* dash (x241b) a i u e o			*/
		0x0a, 0x0c, 0x0e, 0x10, 0x12,		/* ka ki ku ke ko						*/
		0x14, 0x16, 0x18, 0x1a, 0x1c,		/* sa shi su se so				*/
		0x1e, 0x20, 0x23, 0x25, 0x27,		/* ta chi tsu te to				*/
		0x29, 0x2a, 0x2b, 0x2c, 0x2d,		/* na ni nu ne no					*/
		0x2e, 0x31, 0x34, 0x37, 0x3a,		/* ha hi fu he ho					*/
		0x3d, 0x3e, 0x3f, 0x40, 0x41,		/* ma mi mu me mo					*/
		0x43, 0x45, 0x47,						/* ya yu yo							*/
		0x48, 0x49, 0x4a, 0x4b, 0x4c,		/* ra ri ru re ro					*/
		0x4e, 0x52								/* WA N								*/
  };												/* does not have wa WI WE VU ka ke */

/****************************************************************************
Desc:	Returns the collation value of the input Wp character.
		If in charset 11 will convert the character to Zenkaku (double wide).
In:	ui16WpChar - Char to collate off of - could be in CS0..14 or x24..up
		ui16NextWpChar - next WP char for CS11 voicing marks
		ui16PrevColValue - previous collating value - for repeat/vowel repeat
		pui16ColValue - returns 2 byte collation value
		pui16SubColVal - 0, 6 or 16 bit value for the latin sub collation
								or the kana size & vowel voicing
								001 - set if large (upper) character
								010 - set if voiced
								100 - set if half voiced

		pucCaseBits - returns 2 bits
				Latin/Greek/Cyrillic
					01 - case bit set if character is uppercase
					10 - double wide character in CS 0x25xx, 0x26xx and 0x27xx
				Japanese
					00 - double wide hiragana 0x255e..25b0
					01 - double wide katakana 0x2600..2655
					10 - double wide symbols that map to charset 11
					11 - single wide katakana from charset 11
Ret:	0 - no valid collation value
				high values set for pui16ColValue
				Sub-collation gets original WP character value
		1 - valid collation value
		2 - valid collation value and used the ui16NextWpChar

Notes:	Code taken from XCH2COL.ASM - routine xch2col_f
			also from CMPWS.ASM - routine getcase
Terms:
	HANKAKU - single wide characters in charsets 0..14
	ZENKAKU - double wide characters in charsets 0x24..end of kanji
	KANJI   - collation values are 0x2900 less than WPChar value

****************************************************************************/
FLMUINT16 fwpAsiaGetCollation(
	FLMUINT16	ui16WpChar,				// WP char to get collation values
	FLMUINT16	ui16NextWpChar,		// Next WP char - for CS11 voicing marks
	FLMUINT16   ui16PrevColValue,		// Previous collating value
	FLMUINT16 *	pui16ColValue,			// Returns collation value
	FLMUINT16 * pui16SubColVal,		// Returns sub-collation value
	FLMBYTE *	pucCaseBits,		 	// Returns case bits value
	FLMUINT16	uiUppercaseFlag		// Set if to convert to uppercase
	)
{
	FLMUINT16	ui16ColValue;
	FLMUINT16	ui16SubColVal;
	FLMBYTE		ucCaseBits = 0;
	FLMBYTE		ucCharSet = ui16WpChar >> 8;
	FLMBYTE		ucCharVal = ui16WpChar & 0xFF;
	FLMUINT16	ui16Hankaku;
	FLMUINT		uiLoop;
	FLMUINT16	ui16ReturnValue = 1;

	ui16ColValue = ui16SubColVal = 0;

	// Kanji or above

	if (ucCharSet >= 0x2B)
	{

		// Puts 2 or above into high byte.

		ui16ColValue = ui16WpChar - 0x2900;

		// No subcollation or case bits need to be set

		goto	Exit;
	}

	// Single wide character? (HANKAKU)

	if (ucCharSet < 11)
	{
		// Get the values from a non-asian character
		// LATIN, GREEK or CYRILLIC
		// The width bit may have been set on a jump to
		// label from below.

Latin_Greek_Cyrillic:

		// YES: Pass US_LANG because this is what we want -
		// Prevents double character sorting.

		ui16ColValue = fwpGetCollation( ui16WpChar, US_LANG);

		if (uiUppercaseFlag || fwpIsUpper( ui16WpChar))
		{
			// Uppercase - set case bit

			ucCaseBits |= SET_CASE_BIT;
		}

		// Character for which there is no collation value?

		if (ui16ColValue == COLS0)
		{
			ui16ReturnValue = 0;
			if (!fwpIsUpper( ui16WpChar))
			{

				// Convert to uppercase

				ui16WpChar--;
			}
			ui16ColValue = 0xFFFF;
			ui16SubColVal = ui16WpChar;
		}
		else if (ucCharSet) 				// Don't bother with ascii
		{
			if (!fwpIsUpper( ui16WpChar))
			{

				// Convert to uppercase

				ui16WpChar--;
			}

        	if (ucCharSet == CHSMUL1)
			{
				FLMUINT16	ui16Base;
				FLMUINT16	ui16Diacritic;

				ui16SubColVal = !fwpCh6Brkcar( ui16WpChar, &ui16Base,
															&ui16Diacritic)
									  ? fwp_dia60Tbl[ ui16Diacritic & 0xFF]
									  : ui16WpChar;
			}
			else if (ucCharSet == CHSGREK) // GREEK
         {
         	if (ui16WpChar >= 0x834 ||		// [8,52] or above
            	 ui16WpChar == 0x804 ||		// [8,4] BETA Medial | Terminal
					 ui16WpChar == 0x826)		// [8,38] SIGMA terminal
					ui16SubColVal = ui16WpChar;
			}
			else if (ucCharSet == CHSCYR)	// CYRILLIC
			{
           	if (ui16WpChar >= 0xA90)		// [10, 144] or above
				{
              	ui16SubColVal = ui16WpChar;	// Dup collation values
				}
         }
         // else don't need a sub collation value
      }
		goto	Exit;
	}

	// Single wide Japanese character?

 	if (ucCharSet == 11)
	{
		FLMUINT16	ui16KanaChar;

		// Convert charset 11 to Zenkaku (double wide) CS24 or CS26 hex.
		// All characters in charset 11 will convert to CS24 or CS26.
		// when combining the collation and the sub-collation values.

		if (HanToZenkaku( ui16WpChar, ui16NextWpChar, &ui16KanaChar ) == 2)
		{

			// Return 2

			ui16ReturnValue++;
		}

		ucCaseBits |= SET_WIDTH_BIT;	// Set so will allow to go back
		ui16WpChar = ui16KanaChar;		// If in CS24 will fall through to ZenKaku
		ucCharSet = ui16KanaChar >> 8;
		ucCharVal = ui16KanaChar & 0xFF;
	}

	if (ui16WpChar < 0x2400)
	{

		// In some other character set

		goto Latin_Greek_Cyrillic;
	}
	else if (ui16WpChar >= 0x255e &&	// Hiragana?
				ui16WpChar <= 0x2655)	// Katakana?
	{
		if (ui16WpChar >= 0x2600)
		{
			ucCaseBits |= SET_KATAKANA_BIT;
		}

		// HIRAGANA & KATAKANA
		//		Kana contains both hiragana and katakana.
		//		The tables contain the same characters in same order

		if (ucCharSet == 0x25)
		{

			// Change value to be in character set 26

			ucCharVal -= 0x5E;
		}

		ui16ColValue = 0x0100 + KanaColTbl[ ucCharVal ];
		ui16SubColVal = KanaSubColTbl[ ucCharVal ];
		goto Exit;
	}

	// ZenKaku - means any double wide character
	// Hankaku - single wide character

	//		Inputs:	0x2400..2559	symbols..latin  - Zenkaku
	//					0x265B..2750	greek..cyrillic - Zenkaku

	//	SET_WIDTH_BIT may have been set if original char
	// was in 11 and got converted to CS24. [1,2,5,27(extendedVowel),53,54]
	// Original chars from CS11 will have some collation value that when
	// combined with the sub-collation value will format a character in
	// CS24.  The width bit will then convert back to CS11.

	if ((ui16Hankaku = ZenToHankaku( ui16WpChar, (FLMUINT16 *) 0 )) != 0)
	{
		if ((ui16Hankaku >> 8) != 11)			// if CharSet11 was a CS24 symbol
		{
			ui16WpChar = ui16Hankaku;			// May be CS24 symbol/latin/gk/cy
			ucCharSet = ui16WpChar >> 8;
			ucCharVal = ui16WpChar & 0xFF;
			ucCaseBits |= SET_WIDTH_BIT;		// Latin symbols double wide
			goto Latin_Greek_Cyrillic;
		}
	}

	// 0x2400..0x24bc Japanese symbols that cannot be converted to Hankaku.
	// All 6 original symbol chars from 11 will also be here.
	// First try to find a collation value of the symbol.
	// The sub-collation value will be the position in the CS24 table + 1.

	for (uiLoop = 0;
		  uiLoop < (sizeof(fwp_Ch24ColTbl) / sizeof(BYTE_WORD_TBL));
	  	  uiLoop++ )
	{
		if (ucCharVal == fwp_Ch24ColTbl[ uiLoop].ByteValue)
		{
			if ((ui16ColValue = fwp_Ch24ColTbl[ uiLoop].WordValue) < 0x100)
			{

				// Don't save for chuuten, dakuten, handakuten

				ui16SubColVal = (FLMUINT16)(uiLoop + 1);
			}
			break;
		}
	}
	if (!ui16ColValue)
	{

		// Now see if it's a repeat or repeat-vowel character

		if( (((ucCharVal >= 0x12) && (ucCharVal <= 0x15)) ||
			   (ucCharVal == 0x17) ||
			   (ucCharVal == 0x18)) &&
		  		((ui16PrevColValue >> 8) == 1))
		{
			ui16ColValue = ui16PrevColValue;

			// Store original WP character

			ui16SubColVal = ui16WpChar;
		}
		else if( (ucCharVal == 0x1B) &&						// repeat vowel?
					(ui16PrevColValue >= 0x100) &&
					(ui16PrevColValue < COLS_ASIAN_MARKS))	// Previous kana char?
		{
			ui16ColValue = 0x0100 + KanaColToVowel[ ui16PrevColValue & 0xFF ];

			// Store original WP character

			ui16SubColVal = ui16WpChar;
		}
		else
		{
			ui16ReturnValue = 0;
			ui16ColValue = 0xFFFF;			// No collation value
			ui16SubColVal = ui16WpChar;	// Never have changed if gets here
		}
	}

Exit:

	// Set return values

	*pui16ColValue = ui16ColValue;
	*pui16SubColVal = ui16SubColVal;
	*pucCaseBits = ucCaseBits;

	return( ui16ReturnValue);
}

/****************************************************************************
Desc:	Convert a zenkaku (double wide) char to a hankaku (single wide) char
Ret:	Hankaku char or 0 if a conversion doesn't exist
Notes:	Taken from CHAR.ASM -  zen2han_f routine
****************************************************************************/
FLMUINT16 ZenToHankaku(
	FLMUINT16	ui16WpChar,
	FLMUINT16 * DakutenOrHandakutenRV )
{
	FLMUINT16	ui16Hankaku = 0;
	FLMBYTE		ucCharSet = ui16WpChar >> 8;
	FLMBYTE		ucCharVal = ui16WpChar & 0xFF;
	FLMUINT		uiLoop;

	switch (ucCharSet)
	{
		// SYMBOLS

		case 0x24:
			for (uiLoop = 0;
				  uiLoop < (sizeof(Zen24ToHankaku) / sizeof(BYTE_WORD_TBL));
				  uiLoop++)
			{
				// List is sorted so table entry is more you are done

				if (Zen24ToHankaku [uiLoop].ByteValue >= ucCharVal)
				{
					if (Zen24ToHankaku [uiLoop].ByteValue == ucCharVal)
					{
						ui16Hankaku = Zen24ToHankaku [uiLoop].WordValue;
					}
					break;
				}
			}
			break;

		// ROMAN - 0x250F..2559
		// Hiragana - 0x255E..2580

		case 0x25:
			if (ucCharVal >= 0x0F && ucCharVal < 0x5E)
			{
				ui16Hankaku = ucCharVal + 0x21;
			}
			break;

		// Katakana - 0x2600..2655
		// Greek - 0x265B..2695

		case 0x26:
			if (ucCharVal <= 0x55)		// Katakana range
			{
				FLMBYTE		ucCS11CharVal;
				FLMUINT16	ui16NextWpChar = 0;

				if ((ucCS11CharVal = MapCS26ToCharSet11[ ucCharVal ]) != 0xFF)
				{
					if (ucCS11CharVal & 0x80)
					{
						if( ucCS11CharVal & 0x40)
						{

							// Handakuten voicing

							ui16NextWpChar = 0xB3E;
						}
						else
						{

							// Dakuten voicing

							ui16NextWpChar = 0xB3D;
						}
						ucCS11CharVal &= 0x3F;
					}
					ui16Hankaku = 0x0b00 + ucCS11CharVal;
					if( ui16NextWpChar && DakutenOrHandakutenRV )
					{
						*DakutenOrHandakutenRV = ui16NextWpChar;
					}
				}
			}
			else if (ucCharVal <= 0x95)	// Greek
			{
				FLMBYTE	ucGreekChar = ucCharVal;

				// Make a zero based number.

				ucGreekChar -= 0x5E;

				// Check for lowercase
				if( ucGreekChar >= 0x20)
				{

					// Convert to upper case for now

					ucGreekChar -= 0x20;
				}
				if (ucGreekChar >= 2)
				{
					ucGreekChar++;
				}
				if (ucGreekChar >= 19)
				{
					ucGreekChar++;
				}

				// Convert to character set 8

				ui16Hankaku = (ucGreekChar << 1) + 0x800;
				if (ucCharVal >= (0x5E + 0x20))
				{

					// Adjust to lower case character

					ui16Hankaku++;
				}
			}
			break;

		// Cyrillic

		case 0x27:

			// Uppercase?

			if (ucCharVal <= 0x20)
			{
				ui16Hankaku = (ucCharVal << 1) + 0xa00;
			}
			else if (ucCharVal >= 0x30 && ucCharVal <= 0x50)
			{

				// Lower case

				ui16Hankaku = ((ucCharVal - 0x30) << 1) + 0xa01;
			}
			break;
	}

	return( ui16Hankaku);
}

/****************************************************************************
Desc:		Convert a WPChar from hankaku (single wide) to zenkaku (double wide).
			1) Used to see if a char in CS11 can map to a double wide character
			2) Used to convert keys into original data.
Ret:		0 = no conversion
			1 = converted character to zenkaku
			2 = ui16NextWpChar dakuten or handakuten voicing got combined
Notes:	Taken from char.asm - han2zen()
			From8ToZen could be taken out and placed in code.
****************************************************************************/
FLMUINT16 HanToZenkaku(
	FLMUINT16	ui16WpChar,
	FLMUINT16	ui16NextWpChar,
	FLMUINT16 *	pui16Zenkaku)
{
	FLMUINT16	ui16Zenkaku = 0;
	FLMBYTE		ucCharSet = ui16WpChar >> 8;
	FLMBYTE		ucCharVal = ui16WpChar & 0xFF;
	FLMUINT		uiLoop;
	FLMUINT16	ui16CharsUsed = 1;

	switch( ucCharSet)
	{
		// Character set 0 - symbols

		case 0:

			// Invalid? - all others are used.

			if (ucCharVal < 0x20)
			{
				;
			}
			else if (ucCharVal <= 0x2F)
			{

				// Symbols A

				ui16Zenkaku = 0x2400 + From0AToZen[ ucCharVal - 0x20 ];
			}
			else if (ucCharVal <= 0x39)
			{

				// 0..9

				ui16Zenkaku = 0x2500 + (ucCharVal - 0x21);
			}
			else if (ucCharVal <= 0x40)
			{

				// Symbols B

				ui16Zenkaku = 0x2400 + From0BToZen[ ucCharVal - 0x3A ];
			}
			else if (ucCharVal <= 0x5A)
			{

				// A..Z

				ui16Zenkaku = 0x2500 + (ucCharVal - 0x21);
			}
			else if (ucCharVal <= 0x60)
			{

				// Symbols C

				ui16Zenkaku = 0x2400 + From0CToZen[ ucCharVal - 0x5B ];
			}
			else if (ucCharVal <= 0x7A)
			{

				// a..z

				ui16Zenkaku = 0x2500 + (ucCharVal - 0x21);
			}
			else if (ucCharVal <= 0x7E)
			{

				// Symbols D

				ui16Zenkaku = 0x2400 + From0DToZen[ ucCharVal - 0x7B ];
			}
			break;

		// GREEK

		case 8:
			if ((ucCharVal >= sizeof( From8ToZen)) ||
				 ((ui16Zenkaku = 0x2600 + From8ToZen[ ucCharVal ]) == 0x26FF))
			{
				ui16Zenkaku = 0;
			}
			break;

		// CYRILLIC

		case 10:

			// Check range

			ui16Zenkaku = 0x2700 + (ucCharVal >> 1);	// Uppercase value

			// Convert to lower case?

			if( ucCharVal & 0x01)
			{
				ui16Zenkaku += 0x30;
			}
			break;

		// JAPANESE

		case 11:
			if (ucCharVal < 5)
			{
				ui16Zenkaku = 0x2400 + From11AToZen[ ucCharVal ];
			}
			else if (ucCharVal < 0x3D)		// katakana?
			{
				if ((ui16Zenkaku = 0x2600 +
							From11BToZen[ ucCharVal - 5 ]) == 0x26FF)
				{

					// Dash - convert to this

					ui16Zenkaku = 0x241b;
				}
				else
				{
					if (ui16NextWpChar == 0xB3D)		// dakuten? - voicing
					{

						// First check exception(s) then
						// check if voicing exists! - will NOT access out of table

						if ((ui16Zenkaku != 0x2652) &&	// is not 'N'?
							 (KanaSubColTbl[ ui16Zenkaku - 0x2600 + 1 ] == 3))
						{
							ui16Zenkaku++;

							// Return 2

							ui16CharsUsed++;
						}
					}
					else if (ui16NextWpChar == 0xB3E)	// handakuten? - voicing
					{

						// Check if voicing exists! - will NOT access out of table

						if (KanaSubColTbl [ui16Zenkaku - 0x2600 + 2 ] == 5)
						{
							ui16Zenkaku += 2;

							// Return 2

							ui16CharsUsed++;
						}
					}
				}
			}
			else if (ucCharVal == 0x3D)		// dakuten?
			{

				// Convert to voicing symbol

				ui16Zenkaku = 0x240A;
			}
			else if (ucCharVal == 0x3E)		// handakuten?
			{

				// Convert to voicing symbol

				ui16Zenkaku = 0x240B;
			}
			// else cannot convert
			break;

		// Other character sets
		// CS 1,4,5,6 - symbols

		default:

			// Instead of includes more tables from char.asm - look down the
			// Zen24Tohankaku[] table for a matching value - not much slower.

			for (uiLoop = 0;
				  uiLoop < (sizeof(Zen24ToHankaku) / sizeof(BYTE_WORD_TBL));
				  uiLoop++)
			{
				if (Zen24ToHankaku[ uiLoop].WordValue == ui16WpChar)
				{
					ui16Zenkaku = 0x2400 + Zen24ToHankaku[ uiLoop].ByteValue;
					break;
				}
			}
			break;
	}
	if (!ui16Zenkaku)
	{

		// Change return value

		ui16CharsUsed = 0;
	}

	*pui16Zenkaku = ui16Zenkaku;
	return( ui16CharsUsed);
}