Added a few more routines required to port FLAIM to FTK.

git-svn-id: https://svn.code.sf.net/p/flaim/code/trunk@497 0109f412-320b-0410-ab79-c3e0c5ffbbe6
2006-06-01 19:42:56 +00:00
parent 488ad60c0d
commit 73aa65fabd
10 changed files with 1660 additions and 104 deletions
--- a/ftk/src/ftkcoll.cpp
+++ b/ftk/src/ftkcoll.cpp
@@ -345,7 +345,7 @@ static FLMUINT			gv_uiMaxWPChar = 0;
 /****************************************************************************
 Desc:		Table of # of characters in each character set
 ****************************************************************************/
-FLMBYTE fwp_c60_max[] =
+static FLMBYTE fwp_c60_max[] =
 {
 	ASC_N,	// ascii
 	ML1_N,	// multinational 1
@@ -375,7 +375,7 @@ Notes:		In the following table, the bits are numbered from left
 						EX. 00000000b   ;0-7
 						bit#   01234567
 ****************************************************************************/
-FLMBYTE fwp_ml1_cb60[] =
+static FLMBYTE fwp_ml1_cb60[] =
 {
 	0x00,    // 0-7
 	0x00,    // 8-15
@@ -419,7 +419,7 @@ Desc:		Format of index:
 Notes:	Diacritical char is always in same set as composed char
 			base is in same set if other table indicates, else in ASCII
 ****************************************************************************/
-BASE_DIACRIT_TABLE fwp_ml1c_table[] =
+static BASE_DIACRIT_TABLE fwp_ml1c_table[] =
 {
 	{'A',acute},
 	{'a',acute},
@@ -642,7 +642,7 @@ BASE_DIACRIT_TABLE fwp_ml1c_table[] =
 /****************************************************************************
 Desc:
 ****************************************************************************/
-BASE_DIACRIT fwp_ml1c =
+static BASE_DIACRIT fwp_ml1c =
 {
 	216,    	// # of characters in table
 	26,      // start char
@@ -844,7 +844,7 @@ Desc:		Format of index:
 Notes:	Diacritical char is always in same set as composed char
 			base is in same set
 ****************************************************************************/
-static BASE_DIACRIT_TABLE  fwp_rus_c_table[] =
+static BASE_DIACRIT_TABLE fwp_rus_c_table[] =
 {
 	{ 14, 204 },					// ZHE with right descender
 	{ 15, 204 },					// zhe with right descender
@@ -981,7 +981,7 @@ static BASE_DIACRIT fwp_rus_c =
 /****************************************************************************
 Desc:		Table of pointers to character component tables.
 ****************************************************************************/
-BASE_DIACRIT * fwp_car60_c[ NCHSETS] =
+static BASE_DIACRIT * fwp_car60_c[ NCHSETS] =
 {
 	(BASE_DIACRIT*)0,    // no composed characters for ascii.
 	&fwp_ml1c,
@@ -1003,7 +1003,7 @@ BASE_DIACRIT * fwp_car60_c[ NCHSETS] =
 /****************************************************************************
 Desc:		Map special chars in CharSet (x24) to collation values
 ****************************************************************************/
-BYTE_WORD_TBL fwp_Ch24ColTbl[] =	// Position in the table+1 is subColValue
+static BYTE_WORD_TBL fwp_Ch24ColTbl[] =
 {
 	{1,	COLLS+2},					// comma
 	{2,	COLLS+1},					// maru
@@ -1038,7 +1038,7 @@ Notes:
 				the subcollation area.
 				The original table is listed below.
 ****************************************************************************/
-FLMBYTE KanaSubColTbl[] =
+static FLMBYTE KanaSubColTbl[] =
 {
 	0,1,0,1,0,1,0,1,0,1,				// a    A   i   I   u   U   e   E   o   O
 	1,3,0,3,0,3,1,3,0,3,				// KA  GA  KI  GI  KU  GU  KE  GE  KO  GO
@@ -1059,7 +1059,7 @@ Desc:		Map katakana (CharSet x26) to collation values
 			kana collating values are two byte values
 			where the high byte is 0x01.
 ****************************************************************************/
-FLMBYTE KanaColTbl[] =
+static FLMBYTE KanaColTbl[] =
 {
 	 0, 0, 1, 1, 2, 2, 3, 3, 4, 4,		// a    A   i   I   u   U   e   E   o   O
 	 5, 5, 6, 6, 7, 7, 8, 8, 9, 9,		// KA  GA  KI  GI  KU  GU  KE  GE  KO  GO
@@ -1079,7 +1079,7 @@ FLMBYTE KanaColTbl[] =
 Desc:		Map KataKana collated value to vowel value for
 			use for the previous char.
 ****************************************************************************/
-FLMBYTE KanaColToVowel[] =
+static FLMBYTE KanaColToVowel[] =
 {
 	0,1,2,3,4,		//  a   i   u  e  o
 	0,1,2,3,4,		// ka  ki  ku ke ko
@@ -1099,7 +1099,7 @@ Desc:		Convert Zenkaku (double wide) to Hankaku (single wide)
 			This enables collation values to be found on some symbols.
 			This is also used to convert symbols from hankaku to Zen24.
 ****************************************************************************/
-BYTE_WORD_TBL Zen24ToHankaku[] =
+static BYTE_WORD_TBL Zen24ToHankaku[] =
 {
 	{	0  ,0x0020 },		// space
 	{	1  ,0x0b03 },		// japanese comma
@@ -1221,7 +1221,7 @@ Desc:		Maps CS26 to CharSet 11
 				0xC0 - add handakuten
 				0xFF - no mapping exists
 ****************************************************************************/
-FLMBYTE MapCS26ToCharSet11[ 86] =
+static FLMBYTE MapCS26ToCharSet11[ 86] =
 {
 	0x06,	// 0     a
 	0x10,	// 1     A
@@ -1327,7 +1327,7 @@ Desc:		Conversion from single (Hankaku) to double (Zenkaku) wide characters
 			Used in flmWPHanToZenkaku()
 			Maps from charset 11 to CS24 (punctuation) (starting from 11,0)
 ****************************************************************************/
-FLMBYTE From0AToZen[] =				// ' changed because of windows
+static FLMBYTE From0AToZen[] =
 {
 	0, 	9,		40,	0x53, 		// sp ! " #
 	0x4f, 0x52, 0x54,	38, 			// $ % & '
@@ -1339,7 +1339,7 @@ FLMBYTE From0AToZen[] =				// ' changed because of windows
 /****************************************************************************
 Desc:
 ****************************************************************************/
-FLMBYTE From0BToZen[] =
+static FLMBYTE From0BToZen[] =
 {
 	6,		7,		0x42,	0x40,			// : ; < =
 	0x43,	8,		0x56					// > ? @
@@ -1348,7 +1348,7 @@ FLMBYTE From0BToZen[] =
 /****************************************************************************
 Desc:
 ****************************************************************************/
-FLMBYTE From0CToZen[] =
+static FLMBYTE From0CToZen[] =
 {
 	0x2d,	0x1f,	0x2e,	0x0f,	0x11,	0x0d	// [ BACKSLASH ] ^ _ `
 };
@@ -1356,7 +1356,7 @@ FLMBYTE From0CToZen[] =
 /****************************************************************************
 Desc:
 ****************************************************************************/
-FLMBYTE From0DToZen[] =
+static FLMBYTE From0DToZen[] =
 {
 	0x2f,	0x22,	0x30,	0x20 			// { | } ~
 };
@@ -1364,7 +1364,7 @@ FLMBYTE From0DToZen[] =
 /****************************************************************************
 Desc:
 ****************************************************************************/
-FLMBYTE  From8ToZen[] =
+static FLMBYTE  From8ToZen[] =
 {
 	0x5e, 0x7e, 0x5f, 0x7f, 0x5f, 0xFF, 0x60, 0x80,
 	0x61, 0x81, 0x62, 0x82, 0x63, 0x83, 0x64, 0x84,
@@ -1378,7 +1378,7 @@ FLMBYTE  From8ToZen[] =
 /****************************************************************************
 Desc:
 ****************************************************************************/
-FLMBYTE From11AToZen[] =	// 11 to 24 punctuation except dash
+static FLMBYTE From11AToZen[] =
 {
 	2,								// japanese period
 	0x35,							// left bracket
@@ -1390,7 +1390,7 @@ FLMBYTE From11AToZen[] =	// 11 to 24 punctuation except dash
 /****************************************************************************
 Desc:
 ****************************************************************************/
-FLMBYTE From11BToZen[] =				// 11 to 26 (katakana) from 11,5
+static FLMBYTE From11BToZen[] =
 {
 	0x51,										// wo
 	0,2,4,6,8,0x42,0x44,0x46,0x22,	// small a i u e o ya yu yo tsu
@@ -1409,7 +1409,7 @@ FLMBYTE From11BToZen[] =				// 11 to 26 (katakana) from 11,5
 /****************************************************************************
 Desc:
 ****************************************************************************/
-FLMUINT16 fwp_indexi[] =
+static FLMUINT16 fwp_indexi[] =
 {
 	0,11,14,15,17,18,19,21,22,23,24,25,26,35,59
 };
@@ -1417,7 +1417,7 @@ FLMUINT16 fwp_indexi[] =
 /****************************************************************************
 Desc:
 ****************************************************************************/
-FLMUINT16 fwp_indexj[] =	// DOUBLE CHAR AREA - LANGUAGES
+static FLMUINT16 fwp_indexj[] =
 {
 	FLM_CA_LANG,	// Catalan (0)
 	FLM_CF_LANG,	// Canadian French
@@ -1486,7 +1486,7 @@ FLMUINT16 fwp_indexj[] =	// DOUBLE CHAR AREA - LANGUAGES
 /****************************************************************************
 Desc:
 ****************************************************************************/
-FLMUINT16 fwp_valuea[] =
+static FLMUINT16 fwp_valuea[] =
 {
 //	DOUBLE CHAR STATE VALUES
 	STATE1,		// 00
@@ -1589,7 +1589,7 @@ FLMUINT16 fwp_valuea[] =
 /****************************************************************************
 Desc:
 ****************************************************************************/
-FLMBYTE fwp_asc60Tbl[ ASCTBLLEN + 2] =
+static FLMBYTE fwp_asc60Tbl[ ASCTBLLEN + 2] =
 {
 	0x20,			// initial character offset!!
 	ASCTBLLEN,	// len of this table
@@ -1693,7 +1693,7 @@ FLMBYTE fwp_asc60Tbl[ ASCTBLLEN + 2] =
 /****************************************************************************
 Desc:
 ****************************************************************************/
-FLMBYTE fwp_mn60Tbl[ MNTBLLEN + 2] =		// multinational table
+static FLMBYTE fwp_mn60Tbl[ MNTBLLEN + 2] =
 {
 	23,			// initial character offset!!
 	MNTBLLEN,	// len of this table
@@ -1933,7 +1933,7 @@ FLMBYTE fwp_mn60Tbl[ MNTBLLEN + 2] =		// multinational table
 /****************************************************************************
 Desc:
 ****************************************************************************/
-FLMBYTE fwp_sym60Tbl[ SYMTBLLEN + 2] =
+static FLMBYTE fwp_sym60Tbl[ SYMTBLLEN + 2] =
 {
 	11,			// initial character offset!!
 	SYMTBLLEN,	// len of this table
@@ -1951,7 +1951,7 @@ FLMBYTE fwp_sym60Tbl[ SYMTBLLEN + 2] =
 /****************************************************************************
 Desc:
 ****************************************************************************/
-FLMBYTE fwp_grk60Tbl[ GRKTBLLEN + 2] =
+static FLMBYTE fwp_grk60Tbl[ GRKTBLLEN + 2] =
 {
 	0,					// starting offset
 	GRKTBLLEN,		// length
@@ -2200,7 +2200,7 @@ FLMBYTE fwp_grk60Tbl[ GRKTBLLEN + 2] =
 /****************************************************************************
 Desc:
 ****************************************************************************/
-FLMBYTE fwp_cyrl60Tbl[ CYRLTBLLEN + 2] =
+static FLMBYTE fwp_cyrl60Tbl[ CYRLTBLLEN + 2] =
 {
 	0,					// starting offset
 	CYRLTBLLEN,		// len of table
@@ -2438,7 +2438,7 @@ FLMBYTE fwp_cyrl60Tbl[ CYRLTBLLEN + 2] =
 Desc:		The Hebrew characters are collated over the Russian characters
 			Therefore sorting both Hebrew and Russian is impossible to do.
 ****************************************************************************/
-FLMBYTE fwp_heb60TblA[ HEBTBL1LEN + 2] =
+static FLMBYTE fwp_heb60TblA[ HEBTBL1LEN + 2] =
 {
 	0,					// starting offset
 	HEBTBL1LEN,		// len of table
@@ -2476,7 +2476,7 @@ Desc:		This is the ANCIENT HEBREW SCRIPT piece.
 			The actual value will be stored in the subcollation.
 			This way we don't play diacritic/subcollation games.
 ****************************************************************************/
-FLMBYTE fwp_heb60TblB[ HEBTBL2LEN + 2] =
+static FLMBYTE fwp_heb60TblB[ HEBTBL2LEN + 2] =
 {
 	84,
 	HEBTBL2LEN,
@@ -2544,7 +2544,7 @@ Desc:		The Arabic characters are collated OVER the Russian characters
 			to add more collation values.  Some chars in CS14 are combined when
 			urdu, pashto and sindhi characters overlap.
 ****************************************************************************/
-FLMBYTE fwp_ar160Tbl[ AR1TBLLEN + 2] =
+static FLMBYTE fwp_ar160Tbl[ AR1TBLLEN + 2] =
 {
 	38,				// starting offset
 	AR1TBLLEN,		// len of table
@@ -2757,7 +2757,7 @@ Desc:		Alef needs a subcollation table.
 				[13,152]..[13,153] - taa marbuuTah - nosubcoll
 				[13,64] ..[13,67]  - taa - subcoll of 1
 ****************************************************************************/
-FLMBYTE fwp_alefSubColTbl[] =
+static FLMBYTE fwp_alefSubColTbl[] =
 {
 // [13,165]
 	1,		// ?? alif hamzah
@@ -2783,7 +2783,7 @@ FLMBYTE fwp_alefSubColTbl[] =
 /****************************************************************************
 Desc:
 ****************************************************************************/
-FLMBYTE fwp_ar260Tbl[ AR2TBLLEN + 2] =
+static FLMBYTE fwp_ar260Tbl[ AR2TBLLEN + 2] =
 {
 	41,				// starting offset
 	AR2TBLLEN,		// len of table
@@ -2991,7 +2991,7 @@ Desc:		If the bit position is set then save the character in the sub-col
 			FLAIM COLTBL1 to see which characters are combined with other
 			Arabic characters.
 ****************************************************************************/
-FLMBYTE fwp_ar2BitTbl[] =
+static FLMBYTE fwp_ar2BitTbl[] =
 {
 	// Start at character 64
 	// The only 'clean' areas uncollate to the correct place, they are...
@@ -3030,7 +3030,7 @@ FLMBYTE fwp_ar2BitTbl[] =
 Desc:		This table describes and gives addresses for collating 5.0
 			character sets.  Each line corresponds with a character set.
 ***************************************************************************/
-TBL_B_TO_BP fwp_col60Tbl[] =
+static TBL_B_TO_BP fwp_col60Tbl[] =
 {
 	{CHSASCI, fwp_asc60Tbl},	// ascii - " " - "~"
 	{CHSMUL1, fwp_mn60Tbl},		// multinational
@@ -3044,7 +3044,7 @@ TBL_B_TO_BP fwp_col60Tbl[] =
 Desc:		This table is for sorting the hebrew/arabic languages.
 			These values overlap the end of ASC/european and cyrillic tables.
 ****************************************************************************/
-TBL_B_TO_BP fwp_HebArabicCol60Tbl[] =
+static TBL_B_TO_BP fwp_HebArabicCol60Tbl[] =
 {
 	{CHSASCI,	fwp_asc60Tbl},		// ascii - " " - "~"
 	{CHSMUL1,	fwp_mn60Tbl},		// multinational
@@ -3072,7 +3072,7 @@ Desc:		The diacritical to collated table translates the first 26

 			This table is index by the diacritical value.
 ****************************************************************************/
-FLMBYTE	fwp_dia60Tbl[] =
+static FLMBYTE fwp_dia60Tbl[] =
 {
 	2,			// grave		offset = 0
 	16,		//	centerd	offset = 1
@@ -3129,7 +3129,7 @@ static FLMBYTE fwp_caseConvertableRange[] =
 /****************************************************************************
 Desc:
 ****************************************************************************/
-FLMUINT16 colToWPChr[ COLS11 - COLLS] =
+static FLMUINT16 colToWPChr[ COLS11 - COLLS] =
 {
 	0x20,			// colls	-	<Spc>
 	0x2e,			// colls+1	-	.
@@ -3354,7 +3354,7 @@ FLMUINT16 colToWPChr[ COLS11 - COLLS] =
 /****************************************************************************
 Desc:
 ****************************************************************************/
-FLMUINT16 HebArabColToWPChr[] =
+static FLMUINT16 HebArabColToWPChr[] =
 {
 	// Start at COLS10a+0
 // [0]
@@ -3465,7 +3465,7 @@ FLMUINT16 HebArabColToWPChr[] =
 /****************************************************************************
 Desc:
 ****************************************************************************/
-FLMUINT16 ArabSubColToWPChr[] =
+static FLMUINT16 ArabSubColToWPChr[] =
 {
 	0x0D00 +177,	// Alef maddah - default value - here for documentation
 	0x0D00 +165,	// Alef Hamzah
@@ -3481,7 +3481,7 @@ FLMUINT16 ArabSubColToWPChr[] =
 /****************************************************************************
 Desc:		Turns a collated diacritic value into the original diacritic value
 ****************************************************************************/
-FLMBYTE ml1_COLtoD[27] =
+static FLMBYTE ml1_COLtoD[27] =
 {
 	23,		// dbls	sort value = 0  sorts as 'ss'
 	6,			// acute	sort value = 1
@@ -3516,7 +3516,7 @@ FLMBYTE ml1_COLtoD[27] =
 Desc:
 Notes:		Only 48 values + 0x40, 0x41, 0x42 (169..171)
 ****************************************************************************/
-FLMBYTE ColToKanaTbl[ 48] =
+static FLMBYTE ColToKanaTbl[ 48] =
 {
 	 0,	// a=0, A=1
 	 2,	// i=2, I=3
@@ -3626,7 +3626,7 @@ Notes:	This table is used to convert a subset of Unicode characters to
 			contractions.
 ****************************************************************************/
 #define UTOWP60_ENTRIES			1502
-FLMUINT16 WP_UTOWP60[ UTOWP60_ENTRIES][2] =
+static FLMUINT16 WP_UTOWP60[ UTOWP60_ENTRIES][2] =
 {
 	{ 0x00A1, 0x0407 },		//   7 ,  4
 	{ 0x00A2, 0x0413 },		//  19 ,  4
@@ -9953,6 +9953,9 @@ Exit:
 	return( rc);
 }

+/**************************************************************************
+Desc:
+***************************************************************************/
 void FLMAPI F_CollIStream::getCurrPosition(
 	F_CollStreamPos *		pPos)
 {