Files
mars-flaim/xflaim/src/regexp.cpp

4165 lines
87 KiB
C++

//------------------------------------------------------------------------------
// Desc: This is regular expression class.
//
// Tabs: 3
//
// Copyright (c) 2004-2006 Novell, Inc. All Rights Reserved.
//
// This program is free software; you can redistribute it and/or
// modify it under the terms of version 2 of the GNU General Public
// License as published by the Free Software Foundation.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, contact Novell, Inc.
//
// To contact Novell about this file by physical or electronic mail,
// you may find current contact information at www.novell.com
//
// $Id: regexp.cpp 3116 2006-01-19 13:31:53 -0700 (Thu, 19 Jan 2006) dsanders $
//------------------------------------------------------------------------------
#include "flaimsys.h"
typedef enum
{
EXP_LITERAL = 0,
EXP_CHAR_CLASS,
EXP_ALTERNATIVES
} eExpType;
typedef struct BlockCharRangeTag
{
FLMUNICODE uzLowChar;
FLMUNICODE uzHighChar;
const char * pszBlockName;
} BLOCK_CHAR_RANGE;
BLOCK_CHAR_RANGE FlmBlockCharRanges[] =
{
{0x0000, 0x007F, "BasicLatin"},
{0x0080, 0x00FF, "Latin-1Supplement"},
{0x0100, 0x017F, "LatinExtended-A"},
{0x0180, 0x024F, "LatinExtended-B"},
{0x0250, 0x02AF, "IPAExtensions"},
{0x02B0, 0x02FF, "SpacingModifierLetters"},
{0x0300, 0x036F, "CombiningDiacriticalMarks"},
{0x0370, 0x03FF, "Greek"},
{0x0400, 0x04FF, "Cyrillic"},
{0x0530, 0x058F, "Armenian"},
{0x0590, 0x05FF, "Hebrew"},
{0x0600, 0x06FF, "Arabic"},
{0x0700, 0x074F, "Syriac"},
{0x0780, 0x07BF, "Thaana"},
{0x0900, 0x097F, "Devanagari"},
{0x0980, 0x09FF, "Bengali"},
{0x0A00, 0x0A7F, "Gurmukhi"},
{0x0A80, 0x0AFF, "Gujarati"},
{0x0B00, 0x0B7F, "Oriya"},
{0x0B80, 0x0BFF, "Tamil"},
{0x0C00, 0x0C7F, "Telugu"},
{0x0C80, 0x0CFF, "Kannada"},
{0x0D00, 0x0D7F, "Malayalam"},
{0x0D80, 0x0DFF, "Sinhala"},
{0x0E00, 0x0E7F, "Thai"},
{0x0E80, 0x0EFF, "Lao"},
{0x0F00, 0x0FFF, "Tibetan"},
{0x1000, 0x109F, "Myanmar"},
{0x10A0, 0x10FF, "Georgian"},
{0x1100, 0x11FF, "HangulJamo"},
{0x1200, 0x137F, "Ethiopic"},
{0x13A0, 0x13FF, "Cherokee"},
{0x1400, 0x167F, "UnifiedCanadianAboriginalSyllabics"},
{0x1680, 0x169F, "Ogham"},
{0x16A0, 0x16FF, "Runic"},
{0x1780, 0x17FF, "Khmer"},
{0x1800, 0x18AF, "Mongolian"},
{0x1E00, 0x1EFF, "LatinExtendedAdditional"},
{0x1F00, 0x1FFF, "GreekExtended"},
{0x2000, 0x206F, "GeneralPunctuation"},
{0x2070, 0x209F, "SuperscriptsandSubscripts"},
{0x20A0, 0x20CF, "CurrencySymbols"},
{0x20D0, 0x20FF, "CombiningMarksforSymbols"},
{0x2100, 0x214F, "LetterlikeSymbols"},
{0x2150, 0x218F, "NumberForms"},
{0x2190, 0x21FF, "Arrows"},
{0x2200, 0x22FF, "MathematicalOperators"},
{0x2300, 0x23FF, "MiscellaneousTechnical"},
{0x2400, 0x243F, "ControlPictures"},
{0x2440, 0x245F, "OpticalCharacterRecognition"},
{0x2460, 0x24FF, "EnclosedAlphanumerics"},
{0x2500, 0x257F, "BoxDrawing"},
{0x2580, 0x259F, "BlockElements"},
{0x25A0, 0x25FF, "GeometricShapes"},
{0x2600, 0x26FF, "MiscellaneousSymbols"},
{0x2700, 0x27BF, "Dingbats"},
{0x2800, 0x28FF, "BraillePatterns"},
{0x2E80, 0x2EFF, "CJKRadicalsSupplement"},
{0x2F00, 0x2FDF, "KangxiRadicals"},
{0x2FF0, 0x2FFF, "IdeographicDescriptionCharacters"},
{0x3000, 0x303F, "CJKSymbolsandPunctuation"},
{0x3040, 0x309F, "Hiragana"},
{0x30A0, 0x30FF, "Katakana"},
{0x3100, 0x312F, "Bopomofo"},
{0x3130, 0x318F, "HangulCompatibilityJamo"},
{0x3190, 0x319F, "Kanbun"},
{0x31A0, 0x31BF, "BopomofoExtended"},
{0x3200, 0x32FF, "EnclosedCJKLettersandMonths"},
{0x3300, 0x33FF, "CJKCompatibility"},
{0x3400, 0x4DB5, "CJKUnifiedIdeographsExtensionA"},
{0x4E00, 0x9FFF, "CJKUnifiedIdeographs"},
{0xA000, 0xA48F, "YiSyllables"},
{0xA490, 0xA4CF, "YiRadicals"},
{0xAC00, 0xD7A3, "HangulSyllables"},
{0xD800, 0xDB7F, "HighSurrogates"},
{0xDB80, 0xDBFF, "HighPrivateUseSurrogates"},
{0xDC00, 0xDFFF, "LowSurrogates"},
{0xE000, 0xF8FF, "PrivateUse"},
{0xF900, 0xFAFF, "CJKCompatibilityIdeographs"},
{0xFB00, 0xFB4F, "AlphabeticPresentationForms"},
{0xFB50, 0xFDFF, "ArabicPresentationForms-A"},
{0xFE20, 0xFE2F, "CombiningHalfMarks"},
{0xFE30, 0xFE4F, "CJKCompatibilityForms"},
{0xFE50, 0xFE6F, "SmallFormVariants"},
{0xFE70, 0xFEFE, "ArabicPresentationForms-B"},
{0xFEFF, 0xFEFF, "Specials"},
{0xFF00, 0xFFEF, "HalfwidthandFullwidthForms"},
{0xFFF0, 0xFFFD, "Specials"},
{0, 0, NULL}
};
typedef struct CategoryCharRangeTag
{
FLMUNICODE uzLowChar;
FLMUNICODE uzHighChar;
} CATEGORY_CHAR_RANGE;
CATEGORY_CHAR_RANGE LuRanges[] =
{
{0x0041, 0x005A},
{0x00C0, 0x00DE},
{0x0100, 0x0000},
{0x0102, 0x0000},
{0x0104, 0x0000},
{0x0106, 0x0000},
{0x0108, 0x0000},
{0x010A, 0x0000},
{0x010C, 0x0000},
{0x010E, 0x0000},
{0x0110, 0x0000},
{0x0112, 0x0000},
{0x0114, 0x0000},
{0x0116, 0x0000},
{0x0118, 0x0000},
{0x011A, 0x0000},
{0x011C, 0x0000},
{0x011E, 0x0000},
{0x0120, 0x0000},
{0x0122, 0x0000},
{0x0124, 0x0000},
{0x0126, 0x0000},
{0x0128, 0x0000},
{0x012A, 0x0000},
{0x012C, 0x0000},
{0x012E, 0x0000},
{0x0130, 0x0000},
{0x0132, 0x0000},
{0x0134, 0x0000},
{0x0136, 0x0000},
{0x0139, 0x0000},
{0x013B, 0x0000},
{0x013D, 0x0000},
{0x013F, 0x0000},
{0x0141, 0x0000},
{0x0143, 0x0000},
{0x0145, 0x0000},
{0x0147, 0x0000},
{0x014A, 0x0000},
{0x014C, 0x0000},
{0x014E, 0x0000},
{0x0150, 0x0000},
{0x0152, 0x0000},
{0x0154, 0x0000},
{0x0156, 0x0000},
{0x0158, 0x0000},
{0x015A, 0x0000},
{0x015C, 0x0000},
{0x015E, 0x0000},
{0x0160, 0x0000},
{0x0162, 0x0000},
{0x0164, 0x0000},
{0x0166, 0x0000},
{0x0168, 0x0000},
{0x016A, 0x0000},
{0x016C, 0x0000},
{0x016E, 0x0000},
{0x0170, 0x0000},
{0x0172, 0x0000},
{0x0174, 0x0000},
{0x0176, 0x0000},
{0x0178, 0x0179},
{0x017B, 0x0000},
{0x017D, 0x0000},
{0x0181, 0x0182},
{0x0184, 0x0000},
{0x0186, 0x0187},
{0x0189, 0x018B},
{0x018E, 0x0191},
{0x0193, 0x0194},
{0x0196, 0x0198},
{0x019C, 0x019D},
{0x019F, 0x01A0},
{0x01A2, 0x0000},
{0x01A4, 0x0000},
{0x01A6, 0x01A7},
{0x01A9, 0x01AC},
{0x01AE, 0x01AF},
{0x01B1, 0x01B3},
{0x01B5, 0x0000},
{0x01B7, 0x01B8},
{0x01BC, 0x0000},
{0x01C4, 0x0000},
{0x01C7, 0x0000},
{0x01CA, 0x0000},
{0x01CD, 0x0000},
{0x01CF, 0x0000},
{0x01D1, 0x0000},
{0x01D3, 0x0000},
{0x01D5, 0x0000},
{0x01D7, 0x0000},
{0x01D9, 0x0000},
{0x01DB, 0x0000},
{0x01DE, 0x0000},
{0x01E0, 0x0000},
{0x01E2, 0x0000},
{0x01E4, 0x0000},
{0x01E6, 0x0000},
{0x01E8, 0x0000},
{0x01EA, 0x0000},
{0x01EC, 0x0000},
{0x01EE, 0x0000},
{0x01F1, 0x0000},
{0x01F4, 0x0000},
{0x01F6, 0x01F8},
{0x01FA, 0x0000},
{0x01FC, 0x0000},
{0x01FE, 0x0000},
{0x0200, 0x0000},
{0x0202, 0x0000},
{0x0204, 0x0000},
{0x0206, 0x0000},
{0x0208, 0x0000},
{0x020A, 0x0000},
{0x020C, 0x0000},
{0x020E, 0x0000},
{0x0210, 0x0000},
{0x0212, 0x0000},
{0x0214, 0x0000},
{0x0216, 0x0000},
{0x0218, 0x0000},
{0x021A, 0x0000},
{0x021C, 0x0000},
{0x021E, 0x0000},
{0x0220, 0x0000},
{0x0222, 0x0000},
{0x0224, 0x0000},
{0x0226, 0x0000},
{0x0228, 0x0000},
{0x022A, 0x0000},
{0x022C, 0x0000},
{0x022E, 0x0000},
{0x0230, 0x0000},
{0x0232, 0x0000},
{0x0386, 0x0000},
{0x0388, 0x038A},
{0x038C, 0x0000},
{0x038E, 0x038F},
{0x0391, 0x03AB},
{0x03D2, 0x03D4},
{0x03D8, 0x0000},
{0x03DA, 0x0000},
{0x03DC, 0x0000},
{0x03DE, 0x0000},
{0x03E0, 0x0000},
{0x03E2, 0x0000},
{0x03E4, 0x0000},
{0x03E6, 0x0000},
{0x03E8, 0x0000},
{0x03EA, 0x0000},
{0x03EC, 0x0000},
{0x03EE, 0x0000},
{0x03F4, 0x0000},
{0x03F7, 0x0000},
{0x03F9, 0x0000},
{0x03FA, 0x0000},
{0x0400, 0x042F},
{0x0460, 0x0000},
{0x0462, 0x0000},
{0x0464, 0x0000},
{0x0466, 0x0000},
{0x0468, 0x0000},
{0x046A, 0x0000},
{0x046C, 0x0000},
{0x046E, 0x0000},
{0x0470, 0x0000},
{0x0472, 0x0000},
{0x0474, 0x0000},
{0x0476, 0x0000},
{0x0478, 0x0000},
{0x047A, 0x0000},
{0x047C, 0x0000},
{0x047E, 0x0000},
{0x0480, 0x0000},
{0x048A, 0x0000},
{0x048C, 0x0000},
{0x048E, 0x0000},
{0x0490, 0x0000},
{0x0492, 0x0000},
{0x0494, 0x0000},
{0x0496, 0x0000},
{0x0498, 0x0000},
{0x049A, 0x0000},
{0x049C, 0x0000},
{0x049E, 0x0000},
{0x04A0, 0x0000},
{0x04A2, 0x0000},
{0x04A4, 0x0000},
{0x04A6, 0x0000},
{0x04A8, 0x0000},
{0x04AA, 0x0000},
{0x04AC, 0x0000},
{0x04AE, 0x0000},
{0x04B0, 0x0000},
{0x04B2, 0x0000},
{0x04B4, 0x0000},
{0x04B6, 0x0000},
{0x04B8, 0x0000},
{0x04BA, 0x0000},
{0x04BC, 0x0000},
{0x04BE, 0x0000},
{0x04C0, 0x04C1},
{0x04C3, 0x0000},
{0x04C5, 0x0000},
{0x04C7, 0x0000},
{0x04C9, 0x0000},
{0x04CB, 0x0000},
{0x04CD, 0x0000},
{0x04D0, 0x0000},
{0x04D2, 0x0000},
{0x04D4, 0x0000},
{0x04D6, 0x0000},
{0x04D8, 0x0000},
{0x04DA, 0x0000},
{0x04DC, 0x0000},
{0x04DE, 0x0000},
{0x04E0, 0x0000},
{0x04E2, 0x0000},
{0x04E4, 0x0000},
{0x04E6, 0x0000},
{0x04E8, 0x0000},
{0x04EA, 0x0000},
{0x04EC, 0x0000},
{0x04EE, 0x0000},
{0x04F0, 0x0000},
{0x04F2, 0x0000},
{0x04F4, 0x0000},
{0x04F8, 0x0000},
{0x0500, 0x0000},
{0x0502, 0x0000},
{0x0504, 0x0000},
{0x0506, 0x0000},
{0x0508, 0x0000},
{0x050A, 0x0000},
{0x050C, 0x0000},
{0x050E, 0x0000},
{0x0531, 0x0556},
{0x10A0, 0x10C5},
{0x1E00, 0x0000},
{0x1E02, 0x0000},
{0x1E04, 0x0000},
{0x1E06, 0x0000},
{0x1E08, 0x0000},
{0x1E0A, 0x0000},
{0x1E0C, 0x0000},
{0x1E0E, 0x0000},
{0x1E10, 0x0000},
{0x1E12, 0x0000},
{0x1E14, 0x0000},
{0x1E16, 0x0000},
{0x1E18, 0x0000},
{0x1E1A, 0x0000},
{0x1E1C, 0x0000},
{0x1E1E, 0x0000},
{0x1E20, 0x0000},
{0x1E22, 0x0000},
{0x1E24, 0x0000},
{0x1E26, 0x0000},
{0x1E28, 0x0000},
{0x1E2A, 0x0000},
{0x1E2C, 0x0000},
{0x1E2E, 0x0000},
{0x1E30, 0x0000},
{0x1E32, 0x0000},
{0x1E34, 0x0000},
{0x1E36, 0x0000},
{0x1E38, 0x0000},
{0x1E3A, 0x0000},
{0x1E3C, 0x0000},
{0x1E3E, 0x0000},
{0x1E40, 0x0000},
{0x1E42, 0x0000},
{0x1E44, 0x0000},
{0x1E46, 0x0000},
{0x1E48, 0x0000},
{0x1E4A, 0x0000},
{0x1E4C, 0x0000},
{0x1E4E, 0x0000},
{0x1E50, 0x0000},
{0x1E52, 0x0000},
{0x1E54, 0x0000},
{0x1E56, 0x0000},
{0x1E58, 0x0000},
{0x1E5A, 0x0000},
{0x1E5C, 0x0000},
{0x1E5E, 0x0000},
{0x1E60, 0x0000},
{0x1E62, 0x0000},
{0x1E64, 0x0000},
{0x1E66, 0x0000},
{0x1E68, 0x0000},
{0x1E6A, 0x0000},
{0x1E6C, 0x0000},
{0x1E6E, 0x0000},
{0x1E70, 0x0000},
{0x1E72, 0x0000},
{0x1E74, 0x0000},
{0x1E76, 0x0000},
{0x1E78, 0x0000},
{0x1E7A, 0x0000},
{0x1E7C, 0x0000},
{0x1E7E, 0x0000},
{0x1E80, 0x0000},
{0x1E82, 0x0000},
{0x1E84, 0x0000},
{0x1E86, 0x0000},
{0x1E88, 0x0000},
{0x1E8A, 0x0000},
{0x1E8C, 0x0000},
{0x1E8E, 0x0000},
{0x1E90, 0x0000},
{0x1E92, 0x0000},
{0x1E94, 0x0000},
{0x1EA0, 0x0000},
{0x1EA2, 0x0000},
{0x1EA4, 0x0000},
{0x1EA6, 0x0000},
{0x1EA8, 0x0000},
{0x1EAA, 0x0000},
{0x1EAC, 0x0000},
{0x1EAE, 0x0000},
{0x1EB0, 0x0000},
{0x1EB2, 0x0000},
{0x1EB4, 0x0000},
{0x1EB6, 0x0000},
{0x1EB8, 0x0000},
{0x1EBA, 0x0000},
{0x1EBC, 0x0000},
{0x1EBE, 0x0000},
{0x1EC0, 0x0000},
{0x1EC2, 0x0000},
{0x1EC4, 0x0000},
{0x1EC6, 0x0000},
{0x1EC8, 0x0000},
{0x1ECA, 0x0000},
{0x1ECC, 0x0000},
{0x1ECE, 0x0000},
{0x1ED0, 0x0000},
{0x1ED2, 0x0000},
{0x1ED4, 0x0000},
{0x1ED6, 0x0000},
{0x1ED8, 0x0000},
{0x1EDA, 0x0000},
{0x1EDC, 0x0000},
{0x1EDE, 0x0000},
{0x1EE0, 0x0000},
{0x1EE2, 0x0000},
{0x1EE4, 0x0000},
{0x1EE6, 0x0000},
{0x1EE8, 0x0000},
{0x1EEA, 0x0000},
{0x1EEC, 0x0000},
{0x1EEE, 0x0000},
{0x1EF0, 0x0000},
{0x1EF2, 0x0000},
{0x1EF4, 0x0000},
{0x1EF6, 0x0000},
{0x1EF8, 0x0000},
{0x1F08, 0x1F0F},
{0x1F18, 0x1F1D},
{0x1F28, 0x1F2F},
{0x1F38, 0x1F3F},
{0x1F49, 0x1F4D},
{0x1F59, 0x0000},
{0x1F5B, 0x0000},
{0x1F5D, 0x0000},
{0x1F5F, 0x0000},
{0x1F68, 0x1F6F},
{0x1FB8, 0x1FBB},
{0x1FC8, 0x1FCB},
{0x1FD8, 0x1FDB},
{0x1FE8, 0x1FEC},
{0x1FF8, 0x1FFB},
{0x2102, 0x0000},
{0x2107, 0x0000},
{0x210B, 0x210D},
{0x2110, 0x2112},
{0x2115, 0x0000},
{0x2119, 0x211D},
{0x2124, 0x0000},
{0x2126, 0x0000},
{0x2128, 0x0000},
{0x212A, 0x212D},
{0x2130, 0x2131},
{0x2133, 0x0000},
{0x213E, 0x213F},
{0x2145, 0x0000},
{0xFF21, 0xFF3A}
};
CATEGORY_CHAR_RANGE LlRanges[] =
{
{0x0061, 0x007A},
{0x00AA, 0x0000},
{0x00B5, 0x0000},
{0x00BA, 0x0000},
{0x00DF, 0x0000},
{0x00E0, 0x00FF},
{0x0101, 0x0000},
{0x0103, 0x0000},
{0x0105, 0x0000},
{0x0107, 0x0000},
{0x0109, 0x0000},
{0x010B, 0x0000},
{0x010D, 0x0000},
{0x010F, 0x0000},
{0x0111, 0x0000},
{0x0113, 0x0000},
{0x0115, 0x0000},
{0x0117, 0x0000},
{0x0119, 0x0000},
{0x011B, 0x0000},
{0x011D, 0x0000},
{0x011F, 0x0000},
{0x0121, 0x0000},
{0x0123, 0x0000},
{0x0125, 0x0000},
{0x0127, 0x0000},
{0x0129, 0x0000},
{0x012B, 0x0000},
{0x012D, 0x0000},
{0x012F, 0x0000},
{0x0131, 0x0000},
{0x0133, 0x0000},
{0x0135, 0x0000},
{0x0137, 0x0138},
{0x013A, 0x0000},
{0x013C, 0x0000},
{0x013E, 0x0000},
{0x0140, 0x0000},
{0x0142, 0x0000},
{0x0144, 0x0000},
{0x0146, 0x0000},
{0x0148, 0x0149},
{0x014B, 0x0000},
{0x014D, 0x0000},
{0x014F, 0x0000},
{0x0151, 0x0000},
{0x0153, 0x0000},
{0x0155, 0x0000},
{0x0157, 0x0000},
{0x0159, 0x0000},
{0x015B, 0x0000},
{0x015D, 0x0000},
{0x015F, 0x0000},
{0x0161, 0x0000},
{0x0163, 0x0000},
{0x0165, 0x0000},
{0x0167, 0x0000},
{0x0169, 0x0000},
{0x016B, 0x0000},
{0x016D, 0x0000},
{0x016F, 0x0000},
{0x0171, 0x0000},
{0x0173, 0x0000},
{0x0175, 0x0000},
{0x0177, 0x0000},
{0x017A, 0x0000},
{0x017C, 0x0000},
{0x017E, 0x017F},
{0x0180, 0x0000},
{0x0183, 0x0000},
{0x0185, 0x0000},
{0x0188, 0x0000},
{0x018C, 0x018D},
{0x0192, 0x0000},
{0x0195, 0x0000},
{0x0199, 0x019B},
{0x019E, 0x0000},
{0x01A1, 0x0000},
{0x01A3, 0x0000},
{0x01A5, 0x0000},
{0x01A8, 0x0000},
{0x01AA, 0x01AB},
{0x01AD, 0x0000},
{0x01B0, 0x0000},
{0x01B4, 0x0000},
{0x01B6, 0x0000},
{0x01B9, 0x01BA},
{0x01BD, 0x01BF},
{0x01C6, 0x0000},
{0x01C9, 0x0000},
{0x01CC, 0x0000},
{0x01CE, 0x0000},
{0x01D2, 0x0000},
{0x01D4, 0x0000},
{0x01D6, 0x0000},
{0x01D8, 0x0000},
{0x01DA, 0x0000},
{0x01DC, 0x01DD},
{0x01DF, 0x0000},
{0x01E1, 0x0000},
{0x01E3, 0x0000},
{0x01E5, 0x0000},
{0x01E7, 0x0000},
{0x01E9, 0x0000},
{0x01EB, 0x0000},
{0x01ED, 0x0000},
{0x01EF, 0x0000},
{0x01F0, 0x0000},
{0x01F3, 0x0000},
{0x01F5, 0x0000},
{0x01F9, 0x0000},
{0x01FB, 0x0000},
{0x01FD, 0x0000},
{0x01FF, 0x0000},
{0x0201, 0x0000},
{0x0203, 0x0000},
{0x0205, 0x0000},
{0x0207, 0x0000},
{0x0209, 0x0000},
{0x020B, 0x0000},
{0x020D, 0x0000},
{0x020F, 0x0000},
{0x0211, 0x0000},
{0x0213, 0x0000},
{0x0215, 0x0000},
{0x0217, 0x0000},
{0x0219, 0x0000},
{0x021B, 0x0000},
{0x021D, 0x0000},
{0x021F, 0x0000},
{0x0221, 0x0000},
{0x0223, 0x0000},
{0x0225, 0x0000},
{0x0227, 0x0000},
{0x0229, 0x0000},
{0x022B, 0x0000},
{0x022D, 0x0000},
{0x022F, 0x0000},
{0x0231, 0x0000},
{0x0233, 0x0236},
{0x0234, 0x0000},
{0x0250, 0x0000},
{0x0209, 0x0000},
{0x020B, 0x0000},
{0x020D, 0x0000},
{0x020F, 0x02AF},
{0x0390, 0x0000},
{0x03AC, 0x03CE},
{0x03D0, 0x03D1},
{0x03D5, 0x03D7},
{0x03D9, 0x0000},
{0x03DB, 0x0000},
{0x03DD, 0x0000},
{0x03DF, 0x0000},
{0x03E1, 0x0000},
{0x03E3, 0x0000},
{0x03E5, 0x0000},
{0x03E7, 0x0000},
{0x03E9, 0x0000},
{0x03EB, 0x0000},
{0x03ED, 0x0000},
{0x03EF, 0x03F3},
{0x03F5, 0x0000},
{0x03F8, 0x0000},
{0x03FB, 0x0000},
{0x0430, 0x045F},
{0x0461, 0x0000},
{0x0463, 0x0000},
{0x0465, 0x0000},
{0x0467, 0x0000},
{0x0469, 0x0000},
{0x046B, 0x0000},
{0x046D, 0x0000},
{0x046F, 0x0000},
{0x0471, 0x0000},
{0x0473, 0x0000},
{0x0475, 0x0000},
{0x0477, 0x0000},
{0x0479, 0x0000},
{0x047B, 0x0000},
{0x047D, 0x0000},
{0x047F, 0x0000},
{0x0481, 0x0000},
{0x048B, 0x0000},
{0x048D, 0x0000},
{0x048F, 0x0000},
{0x0491, 0x0000},
{0x0493, 0x0000},
{0x0495, 0x0000},
{0x0497, 0x0000},
{0x0499, 0x0000},
{0x049B, 0x0000},
{0x049D, 0x0000},
{0x049F, 0x0000},
{0x04A1, 0x0000},
{0x04A3, 0x0000},
{0x04A5, 0x0000},
{0x04A7, 0x0000},
{0x04A9, 0x0000},
{0x04AB, 0x0000},
{0x04AD, 0x0000},
{0x04AF, 0x0000},
{0x04B1, 0x0000},
{0x04B3, 0x0000},
{0x04B5, 0x0000},
{0x04B7, 0x0000},
{0x04B9, 0x0000},
{0x04BB, 0x0000},
{0x04BD, 0x0000},
{0x04BF, 0x0000},
{0x04C2, 0x0000},
{0x04C4, 0x0000},
{0x04C6, 0x0000},
{0x04C8, 0x0000},
{0x04CA, 0x0000},
{0x04CC, 0x0000},
{0x04CE, 0x0000},
{0x04D1, 0x0000},
{0x04D3, 0x0000},
{0x04D5, 0x0000},
{0x04D7, 0x0000},
{0x04D9, 0x0000},
{0x04DB, 0x0000},
{0x04DD, 0x0000},
{0x04DF, 0x0000},
{0x04E1, 0x0000},
{0x04E3, 0x0000},
{0x04E5, 0x0000},
{0x04E7, 0x0000},
{0x04E9, 0x0000},
{0x04EB, 0x0000},
{0x04ED, 0x0000},
{0x04EF, 0x0000},
{0x04F1, 0x0000},
{0x04F3, 0x0000},
{0x04F5, 0x0000},
{0x04F9, 0x0000},
{0x0501, 0x0000},
{0x0503, 0x0000},
{0x0505, 0x0000},
{0x0507, 0x0000},
{0x0509, 0x0000},
{0x050B, 0x0000},
{0x050D, 0x0000},
{0x050F, 0x0000},
{0x0561, 0x0587},
{0x1D00, 0x1D2B},
{0x1D62, 0x1D6B},
{0x1E01, 0x0000},
{0x1E03, 0x0000},
{0x1E05, 0x0000},
{0x1E07, 0x0000},
{0x1E09, 0x0000},
{0x1E0B, 0x0000},
{0x1E0D, 0x0000},
{0x1E0F, 0x0000},
{0x1E11, 0x0000},
{0x1E13, 0x0000},
{0x1E15, 0x0000},
{0x1E17, 0x0000},
{0x1E19, 0x0000},
{0x1E1B, 0x0000},
{0x1E1D, 0x0000},
{0x1E1F, 0x0000},
{0x1E21, 0x0000},
{0x1E23, 0x0000},
{0x1E25, 0x0000},
{0x1E27, 0x0000},
{0x1E29, 0x0000},
{0x1E2B, 0x0000},
{0x1E2D, 0x0000},
{0x1E2F, 0x0000},
{0x1E31, 0x0000},
{0x1E33, 0x0000},
{0x1E35, 0x0000},
{0x1E37, 0x0000},
{0x1E39, 0x0000},
{0x1E3B, 0x0000},
{0x1E3D, 0x0000},
{0x1E3F, 0x0000},
{0x1E41, 0x0000},
{0x1E43, 0x0000},
{0x1E45, 0x0000},
{0x1E47, 0x0000},
{0x1E49, 0x0000},
{0x1E4B, 0x0000},
{0x1E4D, 0x0000},
{0x1E4F, 0x0000},
{0x1E51, 0x0000},
{0x1E53, 0x0000},
{0x1E55, 0x0000},
{0x1E57, 0x0000},
{0x1E59, 0x0000},
{0x1E5B, 0x0000},
{0x1E5D, 0x0000},
{0x1E5F, 0x0000},
{0x1E61, 0x0000},
{0x1E63, 0x0000},
{0x1E65, 0x0000},
{0x1E67, 0x0000},
{0x1E69, 0x0000},
{0x1E6B, 0x0000},
{0x1E6D, 0x0000},
{0x1E6F, 0x0000},
{0x1E71, 0x0000},
{0x1E73, 0x0000},
{0x1E75, 0x0000},
{0x1E77, 0x0000},
{0x1E79, 0x0000},
{0x1E7B, 0x0000},
{0x1E7D, 0x0000},
{0x1E7F, 0x0000},
{0x1E81, 0x0000},
{0x1E83, 0x0000},
{0x1E85, 0x0000},
{0x1E87, 0x0000},
{0x1E89, 0x0000},
{0x1E8B, 0x0000},
{0x1E8D, 0x0000},
{0x1E8F, 0x0000},
{0x1E91, 0x0000},
{0x1E93, 0x0000},
{0x1E95, 0x1E9B},
{0x1EA1, 0x0000},
{0x1EA3, 0x0000},
{0x1EA5, 0x0000},
{0x1EA7, 0x0000},
{0x1EA9, 0x0000},
{0x1EAB, 0x0000},
{0x1EAD, 0x0000},
{0x1EAF, 0x0000},
{0x1EB1, 0x0000},
{0x1EB3, 0x0000},
{0x1EB5, 0x0000},
{0x1EB7, 0x0000},
{0x1EB9, 0x0000},
{0x1EBB, 0x0000},
{0x1EBD, 0x0000},
{0x1EBF, 0x0000},
{0x1EC1, 0x0000},
{0x1EC3, 0x0000},
{0x1EC5, 0x0000},
{0x1EC7, 0x0000},
{0x1EC9, 0x0000},
{0x1ECB, 0x0000},
{0x1ECD, 0x0000},
{0x1ECF, 0x0000},
{0x1ED1, 0x0000},
{0x1ED3, 0x0000},
{0x1ED5, 0x0000},
{0x1ED7, 0x0000},
{0x1ED9, 0x0000},
{0x1EDB, 0x0000},
{0x1EDD, 0x0000},
{0x1EDF, 0x0000},
{0x1EE1, 0x0000},
{0x1EE3, 0x0000},
{0x1EE5, 0x0000},
{0x1EE7, 0x0000},
{0x1EE9, 0x0000},
{0x1EEB, 0x0000},
{0x1EED, 0x0000},
{0x1EEF, 0x0000},
{0x1EF1, 0x0000},
{0x1EF3, 0x0000},
{0x1EF5, 0x0000},
{0x1EF7, 0x0000},
{0x1EF9, 0x0000},
{0x1F00, 0x1F07},
{0x1F10, 0x1F15},
{0x1F20, 0x1F27},
{0x1F30, 0x1F37},
{0x1F40, 0x1F45},
{0x1F50, 0x1F57},
{0x1F60, 0x1F67},
{0x1F70, 0x1F7D},
{0x1F80, 0x1F87},
{0x1F90, 0x1F97},
{0x1FA0, 0x1FA7},
{0x1FB0, 0x1FB7},
{0x1FBE, 0x0000},
{0x1FC2, 0x1FC4},
{0x1FC6, 0x1FC7},
{0x1FD0, 0x1FD7},
{0x1FE0, 0x1FE7},
{0x1FF2, 0x1FF4},
{0x1FF6, 0x1FF7},
{0x2071, 0x0000},
{0x207F, 0x0000},
{0x210A, 0x0000},
{0x210E, 0x210F},
{0x2113, 0x0000},
{0x212F, 0x0000},
{0x2134, 0x0000},
{0x2139, 0x0000},
{0x213D, 0x0000},
{0x2146, 0x2149},
{0xFB00, 0xFB06},
{0xFB13, 0xFB17},
{0xFF41, 0xFF5A},
{0x1E0B, 0x0000},
{0x1E0D, 0x0000},
{0x1E0F, 0x0000}
};
CATEGORY_CHAR_RANGE LtRanges[] =
{
{0x01C5, 0x0000},
{0x01C8, 0x0000},
{0x01CB, 0x0000},
{0x01F2, 0x0000},
{0x1F88, 0x1F8F},
{0x1F98, 0x1F9F},
{0x1FA8, 0x1FAF},
{0x1FBC, 0x0000},
{0x1FCC, 0x0000},
{0x1FFC, 0x0000}
};
CATEGORY_CHAR_RANGE LmRanges[] =
{
{0x02B0, 0x02C1},
{0x02C6, 0x02D1},
{0x02E0, 0x02E4},
{0x02EE, 0x0000},
{0x037A, 0x0000},
{0x0559, 0x0000},
{0x0640, 0x0000},
{0x06E5, 0x06E6},
{0x0E46, 0x0000},
{0x0EC6, 0x0000},
{0x17D7, 0x0000},
{0x1843, 0x0000},
{0x1D2C, 0x1D61},
{0x3005, 0x0000},
{0x3031, 0x3035},
{0x303B, 0x0000},
{0x309D, 0x309E},
{0x30FC, 0x30FE},
{0xFF70, 0x0000},
{0xFF9E, 0xFF9F}
};
CATEGORY_CHAR_RANGE LoRanges[] =
{
{0x01BB, 0x0000},
{0x01C0, 0x01C3},
{0x05D0, 0x05EA},
{0x05F0, 0x05F2},
{0x0621, 0x063A},
{0x0641, 0x064A},
{0x066E, 0x066F},
{0x0671, 0x06D3},
{0x06D5, 0x0000},
{0x06EE, 0x06EF},
{0x06FA, 0x06FC},
{0x06FF, 0x0000},
{0x0710, 0x0000},
{0x0712, 0x072F},
{0x074D, 0x074F},
{0x0780, 0x07A5},
{0x07B1, 0x0000},
{0x0904, 0x0939},
{0x093D, 0x0000},
{0x0950, 0x0000},
{0x0958, 0x0961},
{0x0985, 0x098C},
{0x098F, 0x0990},
{0x0993, 0x09A8},
{0x09AA, 0x09B0},
{0x09B2, 0x0000},
{0x09B6, 0x09B9},
{0x09BD, 0x0000},
{0x09DC, 0x09DD},
{0x09DF, 0x09E1},
{0x09F0, 0x09F1},
{0x0A05, 0x0A0A},
{0x0A0F, 0x0A10},
{0x0A13, 0x0A28},
{0x0A2A, 0x0A30},
{0x0A32, 0x0A33},
{0x0A35, 0x0A36},
{0x0A38, 0x0A39},
{0x0A59, 0x0A5C},
{0x0A5E, 0x0000},
{0x0A72, 0x0A74},
{0x0A85, 0x0A8D},
{0x0A8F, 0x0A91},
{0x0A93, 0x0AB0},
{0x0AB2, 0x0AB3},
{0x0AB5, 0x0AB9},
{0x0ABD, 0x0000},
{0x0AD0, 0x0000},
{0x0AE0, 0x0AE1},
{0x0B05, 0x0B0C},
{0x0B0F, 0x0B10},
{0x0B13, 0x0B30},
{0x0B32, 0x0B33},
{0x0B35, 0x0B39},
{0x0B3D, 0x0000},
{0x0B5C, 0x0B5D},
{0x0B5F, 0x0B61},
{0x0B71, 0x0000},
{0x0B83, 0x0000},
{0x0B85, 0x0B8A},
{0x0B8E, 0x0B90},
{0x0B92, 0x0B95},
{0x0B99, 0x0B9A},
{0x0B9C, 0x0000},
{0x0B9E, 0x0B9F},
{0x0BA3, 0x0BA4},
{0x0BA8, 0x0BAA},
{0x0BAE, 0x0BAF},
{0x0BB0, 0x0BB9},
{0x0C05, 0x0C0C},
{0x0C0E, 0x0C10},
{0x0C12, 0x0C28},
{0x0C2A, 0x0C33},
{0x0C35, 0x0C39},
{0x0C60, 0x0C61},
{0x0C85, 0x0C8C},
{0x0C8E, 0x0C90},
{0x0C92, 0x0CB3},
{0x0CB5, 0x0CB9},
{0x0CBD, 0x0000},
{0x0CDE, 0x0000},
{0x0CE0, 0x0CE1},
{0x0D05, 0x0D0C},
{0x0D0E, 0x0D10},
{0x0D12, 0x0D28},
{0x0D2A, 0x0D39},
{0x0D60, 0x0D61},
{0x0D85, 0x0D96},
{0x0D9A, 0x0DB1},
{0x0DB3, 0x0DBB},
{0x0DBD, 0x0000},
{0x0DC0, 0x0DC6},
{0x0E01, 0x0E30},
{0x0E32, 0x0E33},
{0x0E40, 0x0E45},
{0x0E81, 0x0E82},
{0x0E84, 0x0000},
{0x0E87, 0x0E88},
{0x0E8A, 0x0000},
{0x0E8D, 0x0000},
{0x0E94, 0x0E97},
{0x0E99, 0x0E9F},
{0x0EA1, 0x0EA3},
{0x0EA5, 0x0000},
{0x0EA7, 0x0000},
{0x0EAA, 0x0EAB},
{0x0EAD, 0x0EB0},
{0x0EB2, 0x0EB3},
{0x0EBD, 0x0000},
{0x0EC0, 0x0EC4},
{0x0EDC, 0x0EDD},
{0x0F00, 0x0000},
{0x0F40, 0x0F47},
{0x0F49, 0x0F6A},
{0x0F88, 0x0F8B},
{0x1000, 0x1021},
{0x1023, 0x1027},
{0x1029, 0x102A},
{0x1050, 0x1055},
{0x10D0, 0x10F8},
{0x1100, 0x11A2},
{0x11A8, 0x11F9},
{0x1200, 0x1248},
{0x124A, 0x124D},
{0x1250, 0x1256},
{0x1258, 0x0000},
{0x125A, 0x125D},
{0x1260, 0x1286},
{0x1288, 0x0000},
{0x128A, 0x128D},
{0x1290, 0x12AE},
{0x12B0, 0x0000},
{0x12B2, 0x12B5},
{0x12B8, 0x12BE},
{0x12C0, 0x0000},
{0x12C2, 0x12C5},
{0x12C8, 0x12CE},
{0x12D0, 0x12D6},
{0x12D8, 0x12EE},
{0x12F0, 0x130E},
{0x1310, 0x0000},
{0x1312, 0x1315},
{0x1318, 0x131E},
{0x1320, 0x1346},
{0x1348, 0x135A},
{0x13A0, 0x13F4},
{0x1401, 0x166C},
{0x166F, 0x1676},
{0x1681, 0x169A},
{0x16A0, 0x16EA},
{0x1700, 0x170C},
{0x170E, 0x1711},
{0x1720, 0x1731},
{0x1740, 0x1751},
{0x1760, 0x176C},
{0x176E, 0x1770},
{0x1780, 0x17B3},
{0x17DC, 0x0000},
{0x1820, 0x1842},
{0x1844, 0x1877},
{0x1880, 0x18A8},
{0x1900, 0x191C},
{0x1950, 0x196D},
{0x1970, 0x1974},
{0x2135, 0x2138},
{0x3006, 0x0000},
{0x303C, 0x0000},
{0x3041, 0x3096},
{0x309F, 0x0000},
{0x30A1, 0x30FA},
{0x30FF, 0x0000},
{0x3105, 0x312C},
{0x3131, 0x318E},
{0x31A0, 0x31B7},
{0x31F0, 0x31FF},
{0x3400, 0x0000},
{0x4DB5, 0x0000},
{0x4E00, 0x0000},
{0x9FA5, 0x0000},
{0xA000, 0xA48C},
{0xAC00, 0x0000},
{0xD7A3, 0x0000},
{0xF900, 0xFA2D},
{0xFA30, 0xFA6A},
{0xFB1D, 0x0000},
{0xFB1F, 0xFB28},
{0xFB2A, 0xFB36},
{0xFB38, 0xFB3C},
{0xFB3E, 0x0000},
{0xFB40, 0xFB41},
{0xFB43, 0xFB44},
{0xFB46, 0xFBB1},
{0xFBD3, 0xFD3D},
{0xFD50, 0xFD8F},
{0xFD92, 0xFDC7},
{0xFDF0, 0xFDFB},
{0xFE70, 0xFE74},
{0xFE76, 0xFEFC},
{0xFF66, 0xFF6F},
{0xFF71, 0xFF9D},
{0xFFA0, 0xFFBE},
{0xFFC2, 0xFFC7},
{0xFFCA, 0xFFCF},
{0xFFD2, 0xFFD7},
{0xFFDA, 0xFFDC}
};
CATEGORY_CHAR_RANGE MnRanges[] =
{
{0x0300, 0x0357},
{0x035D, 0x036F},
{0x0483, 0x0486},
{0x0591, 0x05A1},
{0x05A3, 0x05B9},
{0x05BB, 0x05BD},
{0x05BF, 0x0000},
{0x05C1, 0x05C2},
{0x05C4, 0x0000},
{0x0610, 0x0615},
{0x064B, 0x0658},
{0x0670, 0x0000},
{0x06D6, 0x06DC},
{0x06DF, 0x06E4},
{0x06E7, 0x06E8},
{0x06EA, 0x06ED},
{0x0711, 0x0000},
{0x0730, 0x074A},
{0x07A6, 0x07B0},
{0x0901, 0x0902},
{0x093C, 0x0000},
{0x0941, 0x0948},
{0x094D, 0x0000},
{0x0951, 0x0954},
{0x0962, 0x0963},
{0x0981, 0x0000},
{0x09BC, 0x0000},
{0x09C1, 0x09C4},
{0x09CD, 0x0000},
{0x09E2, 0x09E3},
{0x0A01, 0x0A02},
{0x0A3C, 0x0000},
{0x0A41, 0x0A42},
{0x0A47, 0x0A48},
{0x0A4B, 0x0A4D},
{0x0A70, 0x0A71},
{0x0A81, 0x0A82},
{0x0ABC, 0x0000},
{0x0AC1, 0x0AC5},
{0x0AC7, 0x0AC8},
{0x0ACD, 0x0000},
{0x0AE2, 0x0AE3},
{0x0B01, 0x0000},
{0x0B3C, 0x0000},
{0x0B3F, 0x0000},
{0x0B41, 0x0B43},
{0x0B4D, 0x0000},
{0x0B56, 0x0000},
{0x0B82, 0x0000},
{0x0BC0, 0x0000},
{0x0BCD, 0x0000},
{0x0C3E, 0x0C40},
{0x0C46, 0x0C48},
{0x0C4A, 0x0C4D},
{0x0C55, 0x0C56},
{0x0CBC, 0x0000},
{0x0CBF, 0x0000},
{0x0CC6, 0x0000},
{0x0CCC, 0x0CCD},
{0x0D41, 0x0D43},
{0x0D4D, 0x0000},
{0x0DCA, 0x0000},
{0x0DD2, 0x0DD4},
{0x0DD6, 0x0000},
{0x0E31, 0x0000},
{0x0E34, 0x0E3A},
{0x0E47, 0x0E4E},
{0x0EB1, 0x0000},
{0x0EB4, 0x0EB9},
{0x0EBB, 0x0EBC},
{0x0EC8, 0x0ECD},
{0x0F18, 0x0F19},
{0x0F35, 0x0000},
{0x0F37, 0x0000},
{0x0F39, 0x0000},
{0x0F71, 0x0F7E},
{0x0F80, 0x0F84},
{0x0F86, 0x0F87},
{0x0F90, 0x0F97},
{0x0F99, 0x0FBC},
{0x0FC6, 0x0000},
{0x102D, 0x1030},
{0x1032, 0x0000},
{0x1036, 0x1037},
{0x1039, 0x0000},
{0x1058, 0x1059},
{0x1712, 0x1714},
{0x1732, 0x1734},
{0x1752, 0x1753},
{0x1772, 0x1773},
{0x17B7, 0x17BD},
{0x17C6, 0x0000},
{0x17C9, 0x17D3},
{0x17DD, 0x0000},
{0x180B, 0x180D},
{0x18A9, 0x0000},
{0x1920, 0x1922},
{0x1927, 0x1928},
{0x1932, 0x0000},
{0x1939, 0x193B},
{0x20D0, 0x20DC},
{0x20E1, 0x0000},
{0x20E5, 0x20EA},
{0x302A, 0x302F},
{0x3099, 0x309A},
{0xFB1E, 0x0000},
{0xFE00, 0xFE0F},
{0xFE20, 0xFE23}
};
CATEGORY_CHAR_RANGE McRanges[] =
{
{0x0903, 0x0000},
{0x093E, 0x0940},
{0x0949, 0x094C},
{0x0982, 0x0983},
{0x09BE, 0x09C0},
{0x09C7, 0x09C8},
{0x09CB, 0x09CC},
{0x09D7, 0x0000},
{0x0A03, 0x0000},
{0x0A3E, 0x0A40},
{0x0A83, 0x0000},
{0x0ABE, 0x0AC0},
{0x0AC9, 0x0000},
{0x0ACB, 0x0ACC},
{0x0B02, 0x0B03},
{0x0BE3, 0x0000},
{0x0B40, 0x0000},
{0x0B47, 0x0B48},
{0x0B4B, 0x0B4C},
{0x0B57, 0x0000},
{0x0BBE, 0x0BBF},
{0x0BC1, 0x0BC2},
{0x0BC6, 0x0BC8},
{0x0BCA, 0x0BCC},
{0x0BD7, 0x0000},
{0x0C01, 0x0C03},
{0x0C41, 0x0C44},
{0x0C82, 0x0C83},
{0x0CBE, 0x0000},
{0x0CC0, 0x0CC4},
{0x0CC7, 0x0CC8},
{0x0CCA, 0x0CCB},
{0x0CD5, 0x0CD6},
{0x0D02, 0x0D03},
{0x0D3E, 0x0D40},
{0x0D46, 0x0D48},
{0x0D4A, 0x0D4C},
{0x0D57, 0x0000},
{0x0D82, 0x0D83},
{0x0DCF, 0x0DD1},
{0x0DD8, 0x0DDF},
{0x0DF2, 0x0DF3},
{0x0F3E, 0x0F3F},
{0x0F7F, 0x0000},
{0x102C, 0x0000},
{0x1031, 0x0000},
{0x1038, 0x0000},
{0x1056, 0x1057},
{0x17B6, 0x0000},
{0x17BE, 0x17C5},
{0x17C7, 0x17C8},
{0x1923, 0x1926},
{0x1929, 0x192B},
{0x1930, 0x1931},
{0x1933, 0x1938}
};
CATEGORY_CHAR_RANGE MeRanges[] =
{
{0x0488, 0x0489},
{0x06DE, 0x0000},
{0x20DD, 0x20DF},
{0x20E0, 0x0000},
{0x20E2, 0x20E4}
};
CATEGORY_CHAR_RANGE DigitRanges[] = // Part 1 of Nd category
{
{0x0030, 0x0039},
{0x0660, 0x0669},
{0x06F0, 0x06F9},
{0x0966, 0x096F},
{0x09E6, 0x09EF},
{0x0A66, 0x0A6F},
{0x0AE6, 0x0AEF},
{0x0B66, 0x0B6F},
{0x0BE7, 0x0BEF},
{0x0C66, 0x0C6F},
{0x0CE6, 0x0CEF},
{0x0D66, 0x0D6F},
{0x0E50, 0x0E59},
{0x0ED0, 0x0ED9},
{0x0F20, 0x0F29}
};
CATEGORY_CHAR_RANGE Nd2Ranges[] = // Part 2 of Nd category
{
{0x1040, 0x1049},
{0x1369, 0x1371},
{0x17E0, 0x17E9},
{0x1810, 0x1819},
{0x1946, 0x194F},
{0xFF10, 0xFF19}
};
CATEGORY_CHAR_RANGE NlRanges[] =
{
{0x16EE, 0x16F0},
{0x2160, 0x2183},
{0x3007, 0x0000},
{0x3021, 0x3029},
{0x3038, 0x303A}
};
CATEGORY_CHAR_RANGE NoRanges[] =
{
{0x00B2, 0x00B3},
{0x00B9, 0x0000},
{0x00BC, 0x00BE},
{0x09F4, 0x09F9},
{0x0BF0, 0x0BF2},
{0x0F2A, 0x0F33},
{0x1372, 0x137C},
{0x17F0, 0x17F9},
{0x2070, 0x0000},
{0x2074, 0x2079},
{0x2080, 0x2089},
{0x2153, 0x215F},
{0x2460, 0x249B},
{0x24EA, 0x24FF},
{0x2776, 0x2793},
{0x3192, 0x3195},
{0x3220, 0x3229},
{0x3251, 0x325F},
{0x3280, 0x3289},
{0x32B1, 0x32BF}
};
CATEGORY_CHAR_RANGE PcRanges[] =
{
{0x005F, 0x0000},
{0x203F, 0x2040},
{0x2054, 0x0000},
{0x30FB, 0x0000},
{0xFE33, 0xFE34},
{0xFE4D, 0xFE4F},
{0xFF3F, 0x0000},
{0xFF65, 0x0000}
};
CATEGORY_CHAR_RANGE PdRanges[] =
{
{0x002D, 0x0000},
{0x058A, 0x0000},
{0x1806, 0x0000},
{0x2010, 0x2015},
{0x301C, 0x0000},
{0x3030, 0x0000},
{0x30A0, 0x0000},
{0xFE31, 0xFE32},
{0xFE58, 0x0000},
{0xFE63, 0x0000},
{0xFF0D, 0x0000}
};
CATEGORY_CHAR_RANGE PsRanges[] =
{
{0x0028, 0x0000},
{0x005B, 0x0000},
{0x007B, 0x0000},
{0x0F3A, 0x0000},
{0x0F3C, 0x0000},
{0x169B, 0x0000},
{0x201A, 0x0000},
{0x201E, 0x0000},
{0x2045, 0x0000},
{0x207D, 0x0000},
{0x208D, 0x0000},
{0x2329, 0x0000},
{0x23B4, 0x0000},
{0x2768, 0x0000},
{0x276A, 0x0000},
{0x276C, 0x0000},
{0x276E, 0x0000},
{0x2770, 0x0000},
{0x2772, 0x0000},
{0x2774, 0x0000},
{0x27E6, 0x0000},
{0x27E8, 0x0000},
{0x27EA, 0x0000},
{0x2983, 0x0000},
{0x2985, 0x0000},
{0x2987, 0x0000},
{0x2989, 0x0000},
{0x298B, 0x0000},
{0x298D, 0x0000},
{0x298F, 0x0000},
{0x2991, 0x0000},
{0x2993, 0x0000},
{0x2995, 0x0000},
{0x2997, 0x0000},
{0x29D8, 0x0000},
{0x29DA, 0x0000},
{0x29FC, 0x0000},
{0x3008, 0x0000},
{0x300A, 0x0000},
{0x300C, 0x0000},
{0x300E, 0x0000},
{0x3010, 0x0000},
{0x3014, 0x0000},
{0x3016, 0x0000},
{0x3018, 0x0000},
{0x301A, 0x0000},
{0x301D, 0x0000},
{0xFD3E, 0x0000},
{0xFE35, 0x0000},
{0xFE37, 0x0000},
{0xFE39, 0x0000},
{0xFE3B, 0x0000},
{0xFE3D, 0x0000},
{0xFE3F, 0x0000},
{0xFE41, 0x0000},
{0xFE43, 0x0000},
{0xFE47, 0x0000},
{0xFE59, 0x0000},
{0xFE5B, 0x0000},
{0xFE5D, 0x0000},
{0xFF08, 0x0000},
{0xFF3B, 0x0000},
{0xFF5B, 0x0000},
{0xFF5F, 0x0000},
{0xFF62, 0x0000}
};
CATEGORY_CHAR_RANGE PeRanges[] =
{
{0x0029, 0x0000},
{0x005D, 0x0000},
{0x007D, 0x0000},
{0x0F3B, 0x0000},
{0x0F3D, 0x0000},
{0x169C, 0x0000},
{0x2046, 0x0000},
{0x207E, 0x0000},
{0x208E, 0x0000},
{0x232A, 0x0000},
{0x23B5, 0x0000},
{0x2769, 0x0000},
{0x276B, 0x0000},
{0x276D, 0x0000},
{0x276F, 0x0000},
{0x2771, 0x0000},
{0x2773, 0x0000},
{0x2775, 0x0000},
{0x27E7, 0x0000},
{0x27E9, 0x0000},
{0x27EB, 0x0000},
{0x2984, 0x0000},
{0x2986, 0x0000},
{0x2988, 0x0000},
{0x298A, 0x0000},
{0x298C, 0x0000},
{0x298E, 0x0000},
{0x2990, 0x0000},
{0x2992, 0x0000},
{0x2994, 0x0000},
{0x2996, 0x0000},
{0x2998, 0x0000},
{0x29D9, 0x0000},
{0x29DB, 0x0000},
{0x29FD, 0x0000},
{0x3009, 0x0000},
{0x300B, 0x0000},
{0x300D, 0x0000},
{0x300F, 0x0000},
{0x3011, 0x0000},
{0x3015, 0x0000},
{0x3017, 0x0000},
{0x3019, 0x0000},
{0x301B, 0x0000},
{0x301E, 0x0000},
{0x301F, 0x0000},
{0xFD3F, 0x0000},
{0xFE36, 0x0000},
{0xFE38, 0x0000},
{0xFE3A, 0x0000},
{0xFE3C, 0x0000},
{0xFE3E, 0x0000},
{0xFE40, 0x0000},
{0xFE42, 0x0000},
{0xFE44, 0x0000},
{0xFE48, 0x0000},
{0xFE5A, 0x0000},
{0xFE5C, 0x0000},
{0xFE5E, 0x0000},
{0xFF09, 0x0000},
{0xFF3D, 0x0000},
{0xFF5D, 0x0000},
{0xFF60, 0x0000},
{0xFF63, 0x0000}
};
CATEGORY_CHAR_RANGE PiRanges[] =
{
{0x00AB, 0x0000},
{0x2018, 0x0000},
{0x201B, 0x201C},
{0x201F, 0x0000},
{0x2039, 0x0000}
};
CATEGORY_CHAR_RANGE PfRanges[] =
{
{0x00BB, 0x0000},
{0x2019, 0x0000},
{0x201D, 0x0000},
{0x203A, 0x0000}
};
CATEGORY_CHAR_RANGE PoRanges[] =
{
{0x0021, 0x0023},
{0x0025, 0x0027},
{0x002A, 0x0000},
{0x002C, 0x0000},
{0x002E, 0x002F},
{0x003A, 0x003B},
{0x003F, 0x0040},
{0x005C, 0x0000},
{0x00A1, 0x0000},
{0x00B7, 0x0000},
{0x00BF, 0x0000},
{0x037E, 0x0000},
{0x0387, 0x0000},
{0x055A, 0x055F},
{0x0589, 0x0000},
{0x05BE, 0x0000},
{0x05C0, 0x0000},
{0x05C3, 0x0000},
{0x05F3, 0x05F4},
{0x060C, 0x060D},
{0x061B, 0x0000},
{0x061F, 0x0000},
{0x066A, 0x066D},
{0x06D4, 0x0000},
{0x0700, 0x070D},
{0x0964, 0x0965},
{0x0970, 0x0000},
{0x0DF4, 0x0000},
{0x0E4F, 0x0000},
{0x0E5A, 0x0E5B},
{0x0F04, 0x0F12},
{0x0F85, 0x0000},
{0x104A, 0x104F},
{0x10FB, 0x0000},
{0x1361, 0x1368},
{0x166D, 0x166E},
{0x16EB, 0x16ED},
{0x1735, 0x1736},
{0x17D4, 0x17D6},
{0x17D8, 0x17DA},
{0x1800, 0x1805},
{0x1807, 0x180A},
{0x1944, 0x1945},
{0x2016, 0x2017},
{0x2020, 0x2027},
{0x2030, 0x2038},
{0x203B, 0x203E},
{0x2041, 0x2043},
{0x2047, 0x2051},
{0x2053, 0x0000},
{0x2057, 0x0000},
{0x23B6, 0x0000},
{0x3001, 0x3003},
{0x303D, 0x0000},
{0xFE30, 0x0000},
{0xFE45, 0xFE46},
{0xFE49, 0xFE4C},
{0xFE50, 0xFE52},
{0xFE54, 0xFE57},
{0xFE5F, 0xFE61},
{0xFE68, 0x0000},
{0xFE6A, 0xFE6B},
{0xFF01, 0xFF03},
{0xFF05, 0xFF07},
{0xFF0A, 0x0000},
{0xFF0C, 0x0000},
{0xFF0E, 0xFF0F},
{0xFF1A, 0xFF1B},
{0xFF1F, 0xFF20},
{0xFF3C, 0x0000},
{0xFF61, 0x0000},
{0xFF64, 0x0000}
};
CATEGORY_CHAR_RANGE ZsRanges[] =
{
{0x0020, 0x0000},
{0x00A0, 0x0000},
{0x1680, 0x0000},
{0x180E, 0x0000},
{0x2000, 0x200B},
{0x202F, 0x0000},
{0x205F, 0x0000},
{0x3000, 0x0000}
};
CATEGORY_CHAR_RANGE ZlRanges[] =
{
{0x2028, 0x0000}
};
CATEGORY_CHAR_RANGE ZpRanges[] =
{
{0x2029, 0x0000}
};
CATEGORY_CHAR_RANGE SmRanges[] =
{
{0x002B, 0x0000},
{0x003C, 0x003E},
{0x007C, 0x0000},
{0x007E, 0x0000},
{0x00AC, 0x0000},
{0x00B1, 0x0000},
{0x00D7, 0x0000},
{0x00F7, 0x0000},
{0x03F6, 0x0000},
{0x2044, 0x0000},
{0x2052, 0x0000},
{0x207A, 0x207C},
{0x208A, 0x208C},
{0x2140, 0x2144},
{0x214B, 0x0000},
{0x2190, 0x2194},
{0x219A, 0x219B},
{0x21A0, 0x0000},
{0x21A3, 0x0000},
{0x21A6, 0x0000},
{0x21AE, 0x0000},
{0x21CE, 0x21CF},
{0x21D2, 0x0000},
{0x21D4, 0x0000},
{0x21F4, 0x21FF},
{0x2200, 0x22FF},
{0x2308, 0x230B},
{0x2320, 0x2321},
{0x237C, 0x0000},
{0x239B, 0x23B3},
{0x25B7, 0x0000},
{0x25C1, 0x0000},
{0x25F8, 0x25FF},
{0x266F, 0x0000},
{0x27D0, 0x27E5},
{0x27F0, 0x27FF},
{0x2900, 0x2982},
{0x2999, 0x29D7},
{0x29DC, 0x29FB},
{0x29FE, 0x2AFF},
{0xFB29, 0x0000},
{0xFE62, 0x0000},
{0xFE64, 0xFE66},
{0xFF0B, 0x0000},
{0xFF1C, 0xFF1E},
{0xFF5C, 0x0000},
{0xFF5E, 0x0000},
{0xFFE2, 0x0000},
{0xFFE9, 0xFFEC}
};
CATEGORY_CHAR_RANGE ScRanges[] =
{
{0x0024, 0x0000},
{0x00A2, 0x00A5},
{0x09F2, 0x09F3},
{0x0AF1, 0x0000},
{0x0BF9, 0x0000},
{0x0E3F, 0x0000},
{0x17DB, 0x0000},
{0x20A0, 0x20B1},
{0xFDFC, 0x0000},
{0xFE69, 0x0000},
{0xFF04, 0x0000},
{0xFFE0, 0xFFE1},
{0xFFE5, 0xFFE6}
};
CATEGORY_CHAR_RANGE SkRanges[] =
{
{0x005E, 0x0000},
{0x0060, 0x0000},
{0x00A8, 0x0000},
{0x00AF, 0x0000},
{0x00B4, 0x0000},
{0x00B8, 0x0000},
{0x02C2, 0x02C5},
{0x02D2, 0x02DF},
{0x02E5, 0x02FF},
{0x0374, 0x0375},
{0x0384, 0x0385},
{0x1FBD, 0x0000},
{0x1FBF, 0x1FC1},
{0x1FCD, 0x1FCF},
{0x1FDD, 0x1FDF},
{0x1FED, 0x1FEF},
{0x1FFD, 0x0000},
{0x1FFE, 0x0000},
{0x309B, 0x309C},
{0xFF3E, 0x0000},
{0xFF40, 0x0000},
{0xFFE3, 0x0000}
};
CATEGORY_CHAR_RANGE SoRanges[] =
{
{0x00A6, 0x00A7},
{0x00A9, 0x0000},
{0x00AE, 0x0000},
{0x00B0, 0x0000},
{0x00B6, 0x0000},
{0x0482, 0x0000},
{0x060E, 0x060F},
{0x06E9, 0x0000},
{0x06FD, 0x06FE},
{0x09FA, 0x0000},
{0x0B70, 0x0000},
{0x0BF3, 0x0BFA},
{0x0F01, 0x0F03},
{0x0F13, 0x0F17},
{0x0F1A, 0x0F1F},
{0x0F34, 0x0000},
{0x0F36, 0x0000},
{0x0F38, 0x0000},
{0x0FBE, 0x0FBF},
{0x0FC0, 0x0FC5},
{0x0FC7, 0x0FCC},
{0x0FCF, 0x0000},
{0x1940, 0x0000},
{0x19E0, 0x19FF},
{0x2100, 0x2101},
{0x2103, 0x2106},
{0x2108, 0x2109},
{0x2114, 0x0000},
{0x2116, 0x2118},
{0x211E, 0x211F},
{0x2120, 0x2123},
{0x2125, 0x0000},
{0x2127, 0x0000},
{0x2129, 0x0000},
{0x212E, 0x0000},
{0x2132, 0x0000},
{0x213A, 0x213B},
{0x214A, 0x0000},
{0x2195, 0x2199},
{0x219C, 0x219F},
{0x21A1, 0x21A2},
{0x21A4, 0x21A5},
{0x21A7, 0x21AD},
{0x21AF, 0x21CD},
{0x21D0, 0x21D1},
{0x21D3, 0x0000},
{0x21D5, 0x21F3},
{0x2300, 0x2307},
{0x230C, 0x231F},
{0x2322, 0x2328},
{0x232B, 0x237B},
{0x237D, 0x239A},
{0x23B7, 0x23D0},
{0x2400, 0x2426},
{0x2440, 0x244A},
{0x249C, 0x24E9},
{0x2500, 0x25C0},
{0x25C2, 0x25F7},
{0x2600, 0x2617},
{0x2619, 0x266E},
{0x2670, 0x267D},
{0x2680, 0x2691},
{0x26A0, 0x26A1},
{0x2701, 0x2704},
{0x2706, 0x2709},
{0x270C, 0x2727},
{0x2729, 0x274B},
{0x274D, 0x0000},
{0x274F, 0x2752},
{0x2756, 0x0000},
{0x2758, 0x275E},
{0x2761, 0x2767},
{0x2794, 0x0000},
{0x2798, 0x27AF},
{0x27B1, 0x27BE},
{0x2800, 0x28FF},
{0x2B00, 0x2B0D},
{0x2E80, 0x2E99},
{0x2E9B, 0x2EF3},
{0x2F00, 0x2FD5},
{0x2FF0, 0x2FFB},
{0x3004, 0x0000},
{0x3012, 0x3013},
{0x3020, 0x0000},
{0x3036, 0x3037},
{0x303E, 0x303F},
{0x3190, 0x3191},
{0x3196, 0x319F},
{0x3200, 0x321E},
{0x322A, 0x3243},
{0x3250, 0x0000},
{0x3260, 0x327D},
{0x327F, 0x0000},
{0x328A, 0x32FE},
{0x3300, 0x33FF},
{0x4DC0, 0x4DFF},
{0xA490, 0xA4C6},
{0xFDFD, 0x0000},
{0xFFE4, 0x0000},
{0xFFE8, 0x0000},
{0xFFED, 0xFFEE},
{0xFFFC, 0xFFFD}
};
CATEGORY_CHAR_RANGE CcRanges[] =
{
{0x0001, 0x001F},
{0x007F, 0x009F}
};
CATEGORY_CHAR_RANGE CfRanges[] =
{
{0x00AD, 0x0000},
{0x0600, 0x0603},
{0x06DD, 0x0000},
{0x070F, 0x0000},
{0x17B4, 0x17B5},
{0x200C, 0x200F},
{0x202A, 0x202E},
{0x2060, 0x2063},
{0x206A, 0x206F},
{0xFEFF, 0x0000},
{0xFFF9, 0xFFFB}
};
CATEGORY_CHAR_RANGE CoRanges[] =
{
{0xE000, 0x0000},
{0xF8FF, 0x0000}
};
CATEGORY_CHAR_RANGE * CnRanges = NULL;
typedef struct CATEGORY_CHARS
{
FLMUINT uiNumArrays;
FLMUINT * puiRangeArraySizes;
CATEGORY_CHAR_RANGE ** ppRangeArrays;
} CATEGORY_CHARS;
#define LuRangeSize (sizeof( LuRanges) / sizeof( CATEGORY_CHAR_RANGE))
#define LlRangeSize (sizeof( LlRanges) / sizeof( CATEGORY_CHAR_RANGE))
#define LtRangeSize (sizeof( LtRanges) / sizeof( CATEGORY_CHAR_RANGE))
#define LmRangeSize (sizeof( LmRanges) / sizeof( CATEGORY_CHAR_RANGE))
#define LoRangeSize (sizeof( LoRanges) / sizeof( CATEGORY_CHAR_RANGE))
FLMUINT LCategoryRangeSizes [] =
{LuRangeSize, LlRangeSize, LtRangeSize, LmRangeSize, LoRangeSize};
CATEGORY_CHAR_RANGE * LCategoryRanges [] =
{LuRanges, LlRanges, LtRanges, LmRanges, LoRanges};
CATEGORY_CHARS LCategory = { 5, &LCategoryRangeSizes [0], &LCategoryRanges [0]};
CATEGORY_CHARS LuCategory = { 1, &LCategoryRangeSizes [0], &LCategoryRanges [0]};
CATEGORY_CHARS LlCategory = { 1, &LCategoryRangeSizes [1], &LCategoryRanges [1]};
CATEGORY_CHARS LtCategory = { 1, &LCategoryRangeSizes [2], &LCategoryRanges [2]};
CATEGORY_CHARS LmCategory = { 1, &LCategoryRangeSizes [3], &LCategoryRanges [3]};
CATEGORY_CHARS LoCategory = { 1, &LCategoryRangeSizes [4], &LCategoryRanges [4]};
#define MnRangeSize (sizeof( MnRanges) / sizeof( CATEGORY_CHAR_RANGE))
#define McRangeSize (sizeof( McRanges) / sizeof( CATEGORY_CHAR_RANGE))
#define MeRangeSize (sizeof( MeRanges) / sizeof( CATEGORY_CHAR_RANGE))
FLMUINT MCategoryRangeSizes [] = {MnRangeSize, McRangeSize, MeRangeSize};
CATEGORY_CHAR_RANGE * MCategoryRanges[] = {MnRanges, McRanges, MeRanges};
CATEGORY_CHARS MCategory = { 3, &MCategoryRangeSizes [0], &MCategoryRanges [0]};
CATEGORY_CHARS MnCategory = { 1, &MCategoryRangeSizes [0], &MCategoryRanges [0]};
CATEGORY_CHARS McCategory = { 1, &MCategoryRangeSizes [1], &MCategoryRanges [1]};
CATEGORY_CHARS MeCategory = { 1, &MCategoryRangeSizes [2], &MCategoryRanges [2]};
#define DigitRangeSize (sizeof( DigitRanges) / sizeof( CATEGORY_CHAR_RANGE))
#define Nd2RangeSize (sizeof( Nd2Ranges) / sizeof( CATEGORY_CHAR_RANGE))
#define NlRangeSize (sizeof( NlRanges) / sizeof( CATEGORY_CHAR_RANGE))
#define NoRangeSize (sizeof( NoRanges) / sizeof( CATEGORY_CHAR_RANGE))
FLMUINT NCategoryRangeSizes [] =
{DigitRangeSize, Nd2RangeSize, NlRangeSize, NoRangeSize};
CATEGORY_CHAR_RANGE * NCategoryRanges[] =
{DigitRanges, Nd2Ranges, NlRanges, NoRanges};
CATEGORY_CHARS NCategory = { 4, &NCategoryRangeSizes [0], &NCategoryRanges [0]};
CATEGORY_CHARS DigitCategory = { 1, &NCategoryRangeSizes [0], &NCategoryRanges [0]};
CATEGORY_CHARS NdCategory = { 2, &NCategoryRangeSizes [0], &NCategoryRanges [0]};
CATEGORY_CHARS Nd2Category = { 1, &NCategoryRangeSizes [1], &NCategoryRanges [1]};
CATEGORY_CHARS NlCategory = { 1, &NCategoryRangeSizes [2], &NCategoryRanges [2]};
CATEGORY_CHARS NoCategory = { 1, &NCategoryRangeSizes [3], &NCategoryRanges [3]};
#define PcRangeSize (sizeof( PcRanges) / sizeof( CATEGORY_CHAR_RANGE))
#define PdRangeSize (sizeof( PdRanges) / sizeof( CATEGORY_CHAR_RANGE))
#define PsRangeSize (sizeof( PsRanges) / sizeof( CATEGORY_CHAR_RANGE))
#define PeRangeSize (sizeof( PeRanges) / sizeof( CATEGORY_CHAR_RANGE))
#define PiRangeSize (sizeof( PiRanges) / sizeof( CATEGORY_CHAR_RANGE))
#define PfRangeSize (sizeof( PfRanges) / sizeof( CATEGORY_CHAR_RANGE))
#define PoRangeSize (sizeof( PoRanges) / sizeof( CATEGORY_CHAR_RANGE))
FLMUINT PCategoryRangeSizes [] =
{PcRangeSize, PdRangeSize, PsRangeSize, PeRangeSize,
PiRangeSize, PfRangeSize, PoRangeSize};
CATEGORY_CHAR_RANGE * PCategoryRanges[] =
{PcRanges, PdRanges, PsRanges, PeRanges, PiRanges, PfRanges, PoRanges};
CATEGORY_CHARS PCategory = { 7, &PCategoryRangeSizes [0], &PCategoryRanges [0]};
CATEGORY_CHARS PcCategory = { 1, &PCategoryRangeSizes [0], &PCategoryRanges [0]};
CATEGORY_CHARS PdCategory = { 1, &PCategoryRangeSizes [1], &PCategoryRanges [1]};
CATEGORY_CHARS PsCategory = { 1, &PCategoryRangeSizes [2], &PCategoryRanges [2]};
CATEGORY_CHARS PeCategory = { 1, &PCategoryRangeSizes [3], &PCategoryRanges [3]};
CATEGORY_CHARS PiCategory = { 1, &PCategoryRangeSizes [4], &PCategoryRanges [4]};
CATEGORY_CHARS PfCategory = { 1, &PCategoryRangeSizes [5], &PCategoryRanges [5]};
CATEGORY_CHARS PoCategory = { 1, &PCategoryRangeSizes [6], &PCategoryRanges [6]};
#define ZsRangeSize (sizeof( ZsRanges) / sizeof( CATEGORY_CHAR_RANGE))
#define ZlRangeSize (sizeof( ZlRanges) / sizeof( CATEGORY_CHAR_RANGE))
#define ZpRangeSize (sizeof( ZpRanges) / sizeof( CATEGORY_CHAR_RANGE))
FLMUINT ZCategoryRangeSizes [] = {ZsRangeSize, ZlRangeSize, ZpRangeSize};
CATEGORY_CHAR_RANGE * ZCategoryRanges[] = {ZsRanges, ZlRanges, ZpRanges};
CATEGORY_CHARS ZCategory = { 3, &ZCategoryRangeSizes [0], &ZCategoryRanges [0]};
CATEGORY_CHARS ZsCategory = { 1, &ZCategoryRangeSizes [0], &ZCategoryRanges [0]};
CATEGORY_CHARS ZlCategory = { 1, &ZCategoryRangeSizes [1], &ZCategoryRanges [1]};
CATEGORY_CHARS ZpCategory = { 1, &ZCategoryRangeSizes [2], &ZCategoryRanges [2]};
#define SmRangeSize (sizeof( SmRanges) / sizeof( CATEGORY_CHAR_RANGE))
#define ScRangeSize (sizeof( ScRanges) / sizeof( CATEGORY_CHAR_RANGE))
#define SkRangeSize (sizeof( SkRanges) / sizeof( CATEGORY_CHAR_RANGE))
#define SoRangeSize (sizeof( SoRanges) / sizeof( CATEGORY_CHAR_RANGE))
FLMUINT SCategoryRangeSizes [] = {SmRangeSize, ScRangeSize, SkRangeSize, SoRangeSize};
CATEGORY_CHAR_RANGE * SCategoryRanges[] = {SmRanges, ScRanges, SkRanges, SoRanges};
CATEGORY_CHARS SCategory = { 4, &SCategoryRangeSizes [0], &SCategoryRanges [0]};
CATEGORY_CHARS SmCategory = { 1, &SCategoryRangeSizes [0], &SCategoryRanges [0]};
CATEGORY_CHARS ScCategory = { 1, &SCategoryRangeSizes [1], &SCategoryRanges [1]};
CATEGORY_CHARS SkCategory = { 1, &SCategoryRangeSizes [2], &SCategoryRanges [2]};
CATEGORY_CHARS SoCategory = { 1, &SCategoryRangeSizes [3], &SCategoryRanges [3]};
#define CcRangeSize (sizeof( CcRanges) / sizeof( CATEGORY_CHAR_RANGE))
#define CfRangeSize (sizeof( CfRanges) / sizeof( CATEGORY_CHAR_RANGE))
#define CoRangeSize (sizeof( CoRanges) / sizeof( CATEGORY_CHAR_RANGE))
#define CnRangeSize 0
FLMUINT CCategoryRangeSizes [] = {CcRangeSize, CfRangeSize, CoRangeSize, CnRangeSize};
CATEGORY_CHAR_RANGE * CCategoryRanges[] = {CcRanges, CfRanges, CoRanges, CnRanges};
CATEGORY_CHARS CCategory = { 4, &CCategoryRangeSizes [0], &CCategoryRanges [0]};
CATEGORY_CHARS CcCategory = { 1, &CCategoryRangeSizes [0], &CCategoryRanges [0]};
CATEGORY_CHARS CfCategory = { 1, &CCategoryRangeSizes [1], &CCategoryRanges [1]};
CATEGORY_CHARS CoCategory = { 1, &CCategoryRangeSizes [2], &CCategoryRanges [2]};
CATEGORY_CHARS CnCategory = { 1, &CCategoryRangeSizes [3], &CCategoryRanges [3]};
CATEGORY_CHAR_RANGE LetterRanges [] =
{
{0x0041, 0x005A},
{0x0061, 0x007A},
{0x00C0, 0x00D6},
{0x00D8, 0x00F6},
{0x00F8, 0x00FF},
{0x0100, 0x0131},
{0x0134, 0x013E},
{0x0141, 0x0148},
{0x014A, 0x017E},
{0x0180, 0x01C3},
{0x01CD, 0x01F0},
{0x01F4, 0x01F5},
{0x01FA, 0x0217},
{0x0250, 0x02A8},
{0x02BB, 0x02C1},
{0x0386, 0x0000},
{0x0388, 0x038A},
{0x038C, 0x0000},
{0x038E, 0x03A1},
{0x03A3, 0x03CE},
{0x03D0, 0x03D6},
{0x03DA, 0x0000},
{0x03DC, 0x0000},
{0x03DE, 0x0000},
{0x03E0, 0x0000},
{0x03E2, 0x03F3},
{0x0401, 0x040C},
{0x040E, 0x044F},
{0x0451, 0x045C},
{0x045E, 0x0481},
{0x0490, 0x04C4},
{0x04C7, 0x04C8},
{0x04CB, 0x04CC},
{0x04D0, 0x04EB},
{0x04EE, 0x04F5},
{0x04F8, 0x04F9},
{0x0531, 0x0556},
{0x0559, 0x0000},
{0x0561, 0x0586},
{0x05D0, 0x05EA},
{0x05F0, 0x05F2},
{0x0621, 0x063A},
{0x0641, 0x064A},
{0x0671, 0x06B7},
{0x06BA, 0x06BE},
{0x06C0, 0x06CE},
{0x06D0, 0x06D3},
{0x06D5, 0x0000},
{0x06E5, 0x06E6},
{0x0905, 0x0939},
{0x093D, 0x0000},
{0x0958, 0x0961},
{0x0985, 0x098C},
{0x098F, 0x0990},
{0x0993, 0x09A8},
{0x09AA, 0x09B0},
{0x09B2, 0x0000},
{0x09B6, 0x09B9},
{0x09DC, 0x09DD},
{0x09DF, 0x09E1},
{0x09F0, 0x09F1},
{0x0A05, 0x0A0A},
{0x0A0F, 0x0A10},
{0x0A13, 0x0A28},
{0x0A2A, 0x0A30},
{0x0A32, 0x0A33},
{0x0A35, 0x0A36},
{0x0A38, 0x0A39},
{0x0A59, 0x0A5C},
{0x0A5E, 0x0000},
{0x0A72, 0x0A74},
{0x0A85, 0x0A8B},
{0x0A8D, 0x0000},
{0x0A8F, 0x0A91},
{0x0A93, 0x0AA8},
{0x0AAA, 0x0AB0},
{0x0AB2, 0x0AB3},
{0x0AB5, 0x0AB9},
{0x0ABD, 0x0000},
{0x0AE0, 0x0000},
{0x0B05, 0x0B0C},
{0x0B0F, 0x0B10},
{0x0B13, 0x0B28},
{0x0B2A, 0x0B30},
{0x0B32, 0x0B33},
{0x0B36, 0x0B39},
{0x0B3D, 0x0000},
{0x0B5C, 0x0B5D},
{0x0B5F, 0x0B61},
{0x0B85, 0x0B8A},
{0x0B8E, 0x0B90},
{0x0B92, 0x0B95},
{0x0B99, 0x0B9A},
{0x0B9C, 0x0000},
{0x0B9E, 0x0B9F},
{0x0BA3, 0x0BA4},
{0x0BA8, 0x0BAA},
{0x0BAE, 0x0BB5},
{0x0BB7, 0x0BB9},
{0x0C05, 0x0C0C},
{0x0C0E, 0x0C10},
{0x0C12, 0x0C28},
{0x0C2A, 0x0C33},
{0x0C35, 0x0C39},
{0x0C60, 0x0C61},
{0x0C85, 0x0C8C},
{0x0C8E, 0x0C90},
{0x0C92, 0x0CA8},
{0x0CAA, 0x0CB3},
{0x0CB5, 0x0CB9},
{0x0CDE, 0x0000},
{0x0CE0, 0x0CE1},
{0x0D05, 0x0D0C},
{0x0D0E, 0x0D10},
{0x0D12, 0x0D28},
{0x0D2A, 0x0D39},
{0x0D60, 0x0D61},
{0x0E01, 0x0E2E},
{0x0E30, 0x0000},
{0x0E32, 0x0E33},
{0x0E40, 0x0E45},
{0x0E81, 0x0E82},
{0x0E84, 0x0000},
{0x0E87, 0x0E88},
{0x0E8A, 0x0000},
{0x0E8D, 0x0000},
{0x0E94, 0x0E97},
{0x0E99, 0x0E9F},
{0x0EA1, 0x0EA3},
{0x0EA5, 0x0000},
{0x0EA7, 0x0000},
{0x0EAA, 0x0EAB},
{0x0EAD, 0x0EAE},
{0x0EB0, 0x0000},
{0x0EB2, 0x0EB3},
{0x0EBD, 0x0000},
{0x0EC0, 0x0EC4},
{0x0F40, 0x0F47},
{0x0F49, 0x0F69},
{0x10A0, 0x10C5},
{0x10D0, 0x10F6},
{0x1100, 0x0000},
{0x1102, 0x1103},
{0x1105, 0x1107},
{0x1109, 0x0000},
{0x110B, 0x110C},
{0x110E, 0x1112},
{0x113C, 0x0000},
{0x113E, 0x0000},
{0x1140, 0x0000},
{0x114C, 0x0000},
{0x114E, 0x0000},
{0x1150, 0x0000},
{0x1154, 0x1155},
{0x1159, 0x0000},
{0x115F, 0x1161},
{0x1163, 0x0000},
{0x1165, 0x0000},
{0x1167, 0x0000},
{0x1169, 0x0000},
{0x116D, 0x116E},
{0x1172, 0x1173},
{0x1175, 0x0000},
{0x119E, 0x0000},
{0x11A8, 0x0000},
{0x11AB, 0x0000},
{0x11AE, 0x11AF},
{0x11B7, 0x11B8},
{0x11BA, 0x0000},
{0x11BC, 0x11C2},
{0x11EB, 0x0000},
{0x11F0, 0x0000},
{0x11F9, 0x0000},
{0x1E00, 0x1E9B},
{0x1EA0, 0x1EF9},
{0x1F00, 0x1F15},
{0x1F18, 0x1F1D},
{0x1F20, 0x1F45},
{0x1F48, 0x1F4D},
{0x1F50, 0x1F57},
{0x1F59, 0x0000},
{0x1F5B, 0x0000},
{0x1F5D, 0x0000},
{0x1F5F, 0x1F7D},
{0x1F80, 0x1FB4},
{0x1FB6, 0x1FBC},
{0x1FBE, 0x0000},
{0x1FC2, 0x1FC4},
{0x1FC6, 0x1FCC},
{0x1FD0, 0x1FD3},
{0x1FD6, 0x1FDB},
{0x1FE0, 0x1FEC},
{0x1FF2, 0x1FF4},
{0x1FF6, 0x1FFC},
{0x2126, 0x0000},
{0x212A, 0x212B},
{0x212E, 0x0000},
{0x2180, 0x2182},
{0x3007, 0x0000},
{0x3021, 0x3029},
{0x3041, 0x3094},
{0x30A1, 0x30FA},
{0x3105, 0x312C},
{0x4E00, 0x9FA5},
{0xAC00, 0xD7A3}
};
CATEGORY_CHAR_RANGE CombiningRanges [] =
{
{0x0300, 0x0345},
{0x0360, 0x0361},
{0x0483, 0x0486},
{0x0591, 0x05A1},
{0x05A3, 0x05B9},
{0x05BB, 0x05BD},
{0x05BF, 0x0000},
{0x05C1, 0x05C2},
{0x05C4, 0x0000},
{0x064B, 0x0652},
{0x0670, 0x0000},
{0x06D6, 0x06DC},
{0x06DD, 0x06DF},
{0x06E0, 0x06E4},
{0x06E7, 0x06E8},
{0x06EA, 0x06ED},
{0x0901, 0x0903},
{0x093C, 0x0000},
{0x093E, 0x094C},
{0x094D, 0x0000},
{0x0951, 0x0954},
{0x0962, 0x0963},
{0x0981, 0x0983},
{0x09BC, 0x0000},
{0x09BE, 0x0000},
{0x09BF, 0x0000},
{0x09C0, 0x09C4},
{0x09C7, 0x09C8},
{0x09CB, 0x09CD},
{0x09D7, 0x0000},
{0x09E2, 0x09E3},
{0x0A02, 0x0000},
{0x0A3C, 0x0000},
{0x0A3E, 0x0000},
{0x0A3F, 0x0000},
{0x0A40, 0x0A42},
{0x0A47, 0x0A48},
{0x0A4B, 0x0A4D},
{0x0A70, 0x0A71},
{0x0A81, 0x0A83},
{0x0ABC, 0x0000},
{0x0ABE, 0x0AC5},
{0x0AC7, 0x0AC9},
{0x0ACB, 0x0ACD},
{0x0B01, 0x0B03},
{0x0B3C, 0x0000},
{0x0B3E, 0x0B43},
{0x0B47, 0x0B48},
{0x0B4B, 0x0B4D},
{0x0B56, 0x0B57},
{0x0B82, 0x0B83},
{0x0BBE, 0x0BC2},
{0x0BC6, 0x0BC8},
{0x0BCA, 0x0BCD},
{0x0BD7, 0x0000},
{0x0C01, 0x0C03},
{0x0C3E, 0x0C44},
{0x0C46, 0x0C48},
{0x0C4A, 0x0C4D},
{0x0C55, 0x0C56},
{0x0C82, 0x0C83},
{0x0CBE, 0x0CC4},
{0x0CC6, 0x0CC8},
{0x0CCA, 0x0CCD},
{0x0CD5, 0x0CD6},
{0x0D02, 0x0D03},
{0x0D3E, 0x0D43},
{0x0D46, 0x0D48},
{0x0D4A, 0x0D4D},
{0x0D57, 0x0000},
{0x0E31, 0x0000},
{0x0E34, 0x0E3A},
{0x0E47, 0x0E4E},
{0x0EB1, 0x0000},
{0x0EB4, 0x0EB9},
{0x0EBB, 0x0EBC},
{0x0EC8, 0x0ECD},
{0x0F18, 0x0F19},
{0x0F35, 0x0000},
{0x0F37, 0x0000},
{0x0F39, 0x0000},
{0x0F3E, 0x0000},
{0x0F3F, 0x0000},
{0x0F71, 0x0F84},
{0x0F86, 0x0F8B},
{0x0F90, 0x0F95},
{0x0F97, 0x0000},
{0x0F99, 0x0FAD},
{0x0FB1, 0x0FB7},
{0x0FB9, 0x0000},
{0x20D0, 0x20DC},
{0x20E1, 0x0000},
{0x302A, 0x302F},
{0x3099, 0x0000},
{0x309A, 0x0000}
};
CATEGORY_CHAR_RANGE ExtenderRanges [] =
{
{0x00B7, 0x0000},
{0x02D0, 0x0000},
{0x02D1, 0x0000},
{0x0387, 0x0000},
{0x0640, 0x0000},
{0x0E46, 0x0000},
{0x0EC6, 0x0000},
{0x3005, 0x0000},
{0x3031, 0x3035},
{0x309D, 0x309E},
{0x30FC, 0x30FE}
};
#define LetterRangeSize (sizeof( LetterRanges) / sizeof( CATEGORY_CHAR_RANGE))
#define CombiningRangeSize (sizeof( CombiningRanges) / sizeof( CATEGORY_CHAR_RANGE))
#define ExtenderRangeSize (sizeof( ExtenderRanges) / sizeof( CATEGORY_CHAR_RANGE))
FLMUINT NameCharCategoryRangeSizes [] =
{LetterRangeSize, CombiningRangeSize, ExtenderRangeSize, DigitRangeSize};
CATEGORY_CHAR_RANGE * NameCharCategoryRanges[] =
{LetterRanges, CombiningRanges, ExtenderRanges, DigitRanges};
CATEGORY_CHARS NameCharCategory = { 4, &NameCharCategoryRangeSizes [0], &NameCharCategoryRanges [0]};
CATEGORY_CHARS LetterCategory = { 1, &NameCharCategoryRangeSizes [0], &NameCharCategoryRanges [0]};
CATEGORY_CHARS CombiningCategory = { 1, &NameCharCategoryRangeSizes [1], &NameCharCategoryRanges [1]};
CATEGORY_CHARS ExtenderCategory = { 1, &NameCharCategoryRangeSizes [2], &NameCharCategoryRanges [2]};
typedef struct REGEXP_LITERAL
{
FLMUNICODE * puzLiteral;
FLMUINT uiNumChars;
} REGEXP_LITERAL;
typedef struct CHAR_RANGE
{
FLMBOOL bNegatedRange;
FLMUNICODE uzLowChar;
FLMUNICODE uzHighChar;
CHAR_RANGE * pNext;
CHAR_RANGE * pPrev;
} CHAR_RANGE;
typedef struct CHAR_LIST
{
FLMBOOL bNegatedChars;
FLMUNICODE * puzChars;
FLMUINT uiNumChars;
CHAR_LIST * pNext;
CHAR_LIST * pPrev;
} CHAR_LIST;
typedef struct CHAR_CATEGORY
{
FLMBOOL bNegatedCategory;
CATEGORY_CHARS * pCategoryChars;
CHAR_CATEGORY * pNext;
CHAR_CATEGORY * pPrev;
} CHAR_CATEGORY;
typedef struct REG_EXP * REG_EXP_p;
typedef struct CHAR_CLASS
{
FLMBOOL bNegatedClass;
CHAR_LIST * pFirstCharList;
CHAR_LIST * pLastCharList;
CHAR_RANGE * pFirstCharRange;
CHAR_RANGE * pLastCharRange;
CHAR_CATEGORY * pFirstCharCategory;
CHAR_CATEGORY * pLastCharCategory;
CHAR_CLASS * pFirstCharClass;
CHAR_CLASS * pLastCharClass;
CHAR_CLASS * pSubtractionClass;
CHAR_CLASS * pNext;
CHAR_CLASS * pPrev;
} CHAR_CLASS;
typedef struct REG_EXP_BRANCH
{
REG_EXP_p pParentExpr;
REG_EXP_p pFirstExpr;
REG_EXP_p pLastExpr;
REG_EXP_BRANCH * pNextBranch;
REG_EXP_BRANCH * pPrevBranch;
} REG_EXP_BRANCH;
typedef struct REG_EXP_ALTERNATIVE
{
REG_EXP_BRANCH * pFirstBranch;
REG_EXP_BRANCH * pLastBranch;
} REG_EXP_ALTERNATIVE;
typedef struct REG_EXP
{
eExpType eType;
FLMUINT uiMinOccurs;
FLMUINT uiMaxOccurs;
FLMBOOL bUnlimited;
FLMBOOL bQuantified;
union
{
REGEXP_LITERAL literal;
CHAR_CLASS charClass;
REG_EXP_ALTERNATIVE alternative;
} exp;
REG_EXP_BRANCH * pBranch;
REG_EXP * pNext;
REG_EXP * pPrev;
} REG_EXP;
/*****************************************************************************
Desc: The regular expression class.
*****************************************************************************/
class F_RegExp : public XF_Base
{
public:
F_RegExp();
~F_RegExp();
RCODE setExpression(
FLMUNICODE * puzRegExp);
FLMBOOL testString(
IF_PosIStream * pIStream);
private:
RCODE createRegExp(
eExpType eType,
REG_EXP ** ppExpr);
RCODE addLiteralChar(
FLMUNICODE uzChar);
RCODE addLiteralExpr(
FLMUNICODE * puzLiteral,
FLMUINT uiNumChars,
REG_EXP ** ppExpr);
RCODE saveLiteral( void);
RCODE createCharCategory(
CATEGORY_CHARS * pCategoryChars,
FLMBOOL bNegatedCategory,
CHAR_CLASS * pCharClass);
RCODE createCharRange(
FLMUNICODE uzLowChar,
FLMUNICODE uzHighChar,
FLMBOOL bNegatedRange,
CHAR_CLASS * pCharClass);
RCODE createCharList(
const char * pszChars,
FLMUNICODE * puzChars,
FLMUINT uiNumChars,
FLMBOOL bNegatedChars,
CHAR_CLASS * pCharClass);
RCODE createCharClass(
FLMBOOL bNegatedClass,
CHAR_CLASS * pCharClass,
CHAR_CLASS ** ppNewCharClass);
RCODE parseEscape(
FLMUNICODE ** ppuzRegExp,
CHAR_CLASS * pCharClass,
FLMUNICODE * puzChar);
RCODE parseCharClass(
FLMUNICODE ** ppuzRegExp,
CHAR_CLASS * pCharClass);
RCODE parseQuantifier(
FLMUNICODE ** ppuzRegExp);
RCODE startAlternative( void);
RCODE endAlternative( void);
RCODE startNewBranch( void);
F_Pool m_Pool;
REG_EXP_BRANCH m_topBranch;
REG_EXP_BRANCH * m_pCurrBranch; // Used only when parsing
FLMUNICODE m_uzLiteral [256];
FLMUNICODE * m_puzLiteral;
FLMUINT m_uiMaxLiteralChars;
FLMUINT m_uiNumLiteralChars;
};
// Local function prototypes
FSTATIC FLMBOOL isCategory(
CATEGORY_CHARS ** ppCategoryChars,
FLMUNICODE ** ppuzRegExp);
FSTATIC FLMBOOL isBlock(
FLMUNICODE * puzLowChar,
FLMUNICODE * puzHighChar,
FLMUNICODE ** ppuzRegExp);
/*****************************************************************************
Desc: Constructor
*****************************************************************************/
F_RegExp::F_RegExp()
{
m_Pool.poolInit( 256);
m_topBranch.pParentExpr = NULL;
m_topBranch.pNextBranch = NULL;
m_topBranch.pPrevBranch = NULL;
m_topBranch.pFirstExpr = NULL;
m_topBranch.pLastExpr = NULL;
m_pCurrBranch = &m_topBranch;
m_puzLiteral = &m_uzLiteral [0];
m_uiMaxLiteralChars = sizeof( m_uzLiteral) / sizeof( FLMUNICODE);
m_uiNumLiteralChars = 0;
}
/*****************************************************************************
Desc: Destructor
*****************************************************************************/
F_RegExp::~F_RegExp()
{
m_Pool.poolFree();
if (m_puzLiteral != &m_uzLiteral [0])
{
f_free( &m_puzLiteral);
}
}
/*****************************************************************************
Desc: Skip whitespace in a string.
*****************************************************************************/
FINLINE FLMBOOL isWhitespace(
FLMUNICODE uzChar
)
{
return( (uzChar == ' ' || uzChar == '\t' ||
uzChar == '\n' || uzChar == '\r') ? TRUE : FALSE);
}
/*****************************************************************************
Desc: Skip whitespace in a string.
*****************************************************************************/
FINLINE FLMUNICODE * skipWhitespace(
FLMUNICODE * puzRegExp
)
{
while (isWhitespace( *puzRegExp))
{
puzRegExp++;
}
return( puzRegExp);
}
/*****************************************************************************
Desc: Create a new regular expression.
*****************************************************************************/
RCODE F_RegExp::createRegExp(
eExpType eType,
REG_EXP ** ppExpr
)
{
RCODE rc = NE_XFLM_OK;
REG_EXP * pExpr;
if (RC_BAD( rc = m_Pool.poolCalloc( sizeof( REG_EXP),
(void **)&pExpr)))
{
goto Exit;
}
*ppExpr = pExpr;
pExpr->eType = eType;
if ((pExpr->pPrev = m_pCurrBranch->pLastExpr) != NULL)
{
m_pCurrBranch->pLastExpr->pNext = pExpr;
}
else
{
m_pCurrBranch->pFirstExpr = pExpr;
}
m_pCurrBranch->pLastExpr = pExpr;
pExpr->pBranch = m_pCurrBranch;
Exit:
return( rc);
}
/*****************************************************************************
Desc: Add a literal character to our current literal buffer.
*****************************************************************************/
RCODE F_RegExp::addLiteralChar(
FLMUNICODE uzChar
)
{
RCODE rc = NE_XFLM_OK;
// See if we need to allocate a new buffer.
if (m_uiNumLiteralChars == m_uiMaxLiteralChars)
{
FLMUNICODE * puzTmp;
FLMUINT uiNewMax = m_uiMaxLiteralChars + 128;
if (RC_BAD( rc = f_alloc( sizeof( FLMUNICODE) * uiNewMax,
&puzTmp)))
{
goto Exit;
}
if (m_uiNumLiteralChars)
{
f_memcpy( puzTmp, m_puzLiteral,
sizeof( FLMUNICODE) * m_uiNumLiteralChars);
}
if (m_puzLiteral != &m_uzLiteral [0])
{
f_free( &m_puzLiteral);
}
m_puzLiteral = puzTmp;
m_uiMaxLiteralChars = uiNewMax;
}
m_puzLiteral [m_uiNumLiteralChars] = uzChar;
m_uiNumLiteralChars++;
Exit:
return( rc);
}
/*****************************************************************************
Desc: Add a literal expression.
*****************************************************************************/
RCODE F_RegExp::addLiteralExpr(
FLMUNICODE * puzLiteral,
FLMUINT uiNumChars,
REG_EXP ** ppExpr
)
{
RCODE rc = NE_XFLM_OK;
REG_EXP * pTmpExpr = 0;
if (RC_BAD( rc = createRegExp( EXP_LITERAL, &pTmpExpr)))
{
goto Exit;
}
if (ppExpr)
{
*ppExpr = pTmpExpr;
}
if (RC_BAD( rc = m_Pool.poolAlloc( uiNumChars * sizeof( FLMUNICODE),
(void **)&pTmpExpr->exp.literal.puzLiteral)))
{
goto Exit;
}
f_memcpy( pTmpExpr->exp.literal.puzLiteral, puzLiteral,
uiNumChars * sizeof( FLMUNICODE));
pTmpExpr->exp.literal.uiNumChars = uiNumChars;
Exit:
return( rc);
}
/*****************************************************************************
Desc: Save out our current literal into our expression tree.
*****************************************************************************/
RCODE F_RegExp::saveLiteral( void)
{
RCODE rc = NE_XFLM_OK;
if (m_uiNumLiteralChars)
{
if (RC_BAD( rc = addLiteralExpr( m_puzLiteral,
m_uiNumLiteralChars, NULL)))
{
goto Exit;
}
// Zero out the literal and start over.
m_uiNumLiteralChars = 0;
}
Exit:
return( rc);
}
/*****************************************************************************
Desc: See if an escape sequence is a category
*****************************************************************************/
FSTATIC FLMBOOL isCategory(
CATEGORY_CHARS ** ppCategoryChars,
FLMUNICODE ** ppuzRegExp
)
{
FLMBOOL bIsCategory = FALSE;
FLMUNICODE * puzRegExp = *ppuzRegExp;
// Skip past the 'p' or 'P'
puzRegExp++;
// Next character better be a '{'
if (*puzRegExp != '{')
{
goto Exit;
}
puzRegExp++;
switch (*puzRegExp)
{
case 'L':
puzRegExp++;
switch (*puzRegExp)
{
case '}': *ppCategoryChars = &LCategory; break;
case 'u': *ppCategoryChars = &LuCategory; puzRegExp++; break;
case 'l': *ppCategoryChars = &LlCategory; puzRegExp++; break;
case 't': *ppCategoryChars = &LtCategory; puzRegExp++; break;
case 'm': *ppCategoryChars = &LmCategory; puzRegExp++; break;
case 'o': *ppCategoryChars = &LoCategory; puzRegExp++; break;
default: goto Exit;
}
break;
case 'M':
puzRegExp++;
switch (*puzRegExp)
{
case '}': *ppCategoryChars = &MCategory; break;
case 'n': *ppCategoryChars = &MnCategory; puzRegExp++; break;
case 'c': *ppCategoryChars = &McCategory; puzRegExp++; break;
case 'e': *ppCategoryChars = &MeCategory; puzRegExp++; break;
default: goto Exit;
}
break;
case 'N':
puzRegExp++;
switch (*puzRegExp)
{
case '}': *ppCategoryChars = &NCategory; break;
case 'd': *ppCategoryChars = &NdCategory; puzRegExp++; break;
case 'l': *ppCategoryChars = &NlCategory; puzRegExp++; break;
case 'o': *ppCategoryChars = &NoCategory; puzRegExp++; break;
default: goto Exit;
}
break;
case 'P':
puzRegExp++;
switch (*puzRegExp)
{
case '}': *ppCategoryChars = &PCategory; break;
case 'c': *ppCategoryChars = &PcCategory; puzRegExp++; break;
case 'd': *ppCategoryChars = &PdCategory; puzRegExp++; break;
case 's': *ppCategoryChars = &PsCategory; puzRegExp++; break;
case 'e': *ppCategoryChars = &PeCategory; puzRegExp++; break;
case 'i': *ppCategoryChars = &PiCategory; puzRegExp++; break;
case 'f': *ppCategoryChars = &PfCategory; puzRegExp++; break;
case 'o': *ppCategoryChars = &PoCategory; puzRegExp++; break;
default: goto Exit;
}
break;
case 'Z':
puzRegExp++;
switch (*puzRegExp)
{
case '}': *ppCategoryChars = &ZCategory; break;
case 's': *ppCategoryChars = &ZsCategory; puzRegExp++; break;
case 'l': *ppCategoryChars = &ZlCategory; puzRegExp++; break;
case 'p': *ppCategoryChars = &ZpCategory; puzRegExp++; break;
default: goto Exit;
}
break;
case 'S':
puzRegExp++;
switch (*puzRegExp)
{
case '}': *ppCategoryChars = &SCategory; break;
case 'm': *ppCategoryChars = &SmCategory; puzRegExp++; break;
case 'c': *ppCategoryChars = &ScCategory; puzRegExp++; break;
case 'k': *ppCategoryChars = &SkCategory; puzRegExp++; break;
case 'o': *ppCategoryChars = &SoCategory; puzRegExp++; break;
default: goto Exit;
}
break;
case 'C':
puzRegExp++;
switch (*puzRegExp)
{
case '}': *ppCategoryChars = &CCategory; break;
case 'c': *ppCategoryChars = &CcCategory; puzRegExp++; break;
case 'f': *ppCategoryChars = &CfCategory; puzRegExp++; break;
case 'o': *ppCategoryChars = &CoCategory; puzRegExp++; break;
case 'n': *ppCategoryChars = &CnCategory; puzRegExp++; break;
default: goto Exit;
}
break;
default:
goto Exit;
}
// The last letter better be a '}'
if (*puzRegExp != '}')
{
goto Exit;
}
puzRegExp++;
bIsCategory = TRUE;
Exit:
// Only move the pointer forward if it is, in fact, a category
if (bIsCategory)
{
*ppuzRegExp = puzRegExp;
}
return( bIsCategory);
}
/*****************************************************************************
Desc: See if an escape sequence is a block range of characters
*****************************************************************************/
FSTATIC FLMBOOL isBlock(
FLMUNICODE * puzLowChar,
FLMUNICODE * puzHighChar,
FLMUNICODE ** ppuzRegExp
)
{
FLMBOOL bIsBlock = FALSE;
FLMUINT uiLoop;
FLMUNICODE * puzRegExp = *ppuzRegExp;
FLMUNICODE * puzBlockName;
FLMUNICODE * puzSaveBlockName;
const char * pszBlockName;
// Skip past the 'p' or 'P'
puzRegExp++;
// Next three characters better be '{Is'
if (*puzRegExp != '{')
{
goto Exit;
}
puzRegExp++;
if (*puzRegExp != 'I')
{
goto Exit;
}
puzRegExp++;
if (*puzRegExp != 's')
{
goto Exit;
}
puzRegExp++;
puzSaveBlockName = puzRegExp;
uiLoop = 0;
for (uiLoop = 0;;uiLoop++)
{
if ((pszBlockName = FlmBlockCharRanges [uiLoop].pszBlockName) == NULL)
{
goto Exit;
}
// Compare the name
puzBlockName = puzSaveBlockName;
while (*pszBlockName && *puzBlockName != '}')
{
if ((FLMUNICODE)(*pszBlockName) != *puzBlockName)
{
break;
}
pszBlockName++;
puzBlockName++;
}
if (*pszBlockName == 0 && *puzBlockName == '}')
{
puzRegExp = puzBlockName + 1;
bIsBlock = TRUE;
*puzLowChar = FlmBlockCharRanges [uiLoop].uzLowChar;
*puzHighChar = FlmBlockCharRanges [uiLoop].uzLowChar;
bIsBlock = TRUE;
break;
}
}
Exit:
// Only move the pointer forward if it is, in fact, a category
if (bIsBlock)
{
*ppuzRegExp = puzRegExp;
}
return( bIsBlock);
}
/*****************************************************************************
Desc: Create a character category.
*****************************************************************************/
RCODE F_RegExp::createCharCategory(
CATEGORY_CHARS * pCategoryChars,
FLMBOOL bNegatedCategory,
CHAR_CLASS * pCharClass
)
{
RCODE rc = NE_XFLM_OK;
CHAR_CATEGORY * pCharCategory;
if (RC_BAD( rc = m_Pool.poolCalloc( sizeof( CHAR_CATEGORY),
(void **)&pCharCategory)))
{
goto Exit;
}
pCharCategory->bNegatedCategory = bNegatedCategory;
pCharCategory->pCategoryChars = pCategoryChars;
// Link at end of list of character categories
if ((pCharCategory->pPrev = pCharClass->pLastCharCategory) != NULL)
{
pCharCategory->pPrev->pNext = pCharCategory;
}
else
{
pCharClass->pFirstCharCategory = pCharCategory;
}
pCharClass->pLastCharCategory = pCharCategory;
Exit:
return( rc);
}
/*****************************************************************************
Desc: Create a character range.
*****************************************************************************/
RCODE F_RegExp::createCharRange(
FLMUNICODE uzLowChar,
FLMUNICODE uzHighChar,
FLMBOOL bNegatedRange,
CHAR_CLASS * pCharClass
)
{
RCODE rc = NE_XFLM_OK;
CHAR_RANGE * pCharRange;
if (RC_BAD( rc = m_Pool.poolCalloc( sizeof( CHAR_RANGE),
(void **)&pCharRange)))
{
goto Exit;
}
pCharRange->bNegatedRange = bNegatedRange;
pCharRange->uzLowChar = uzLowChar;
pCharRange->uzHighChar = uzHighChar;
// Link at end of list of character ranges
if ((pCharRange->pPrev = pCharClass->pLastCharRange) != NULL)
{
pCharRange->pPrev->pNext = pCharRange;
}
else
{
pCharClass->pFirstCharRange = pCharRange;
}
pCharClass->pLastCharRange = pCharRange;
Exit:
return( rc);
}
/*****************************************************************************
Desc: Create a character list.
*****************************************************************************/
RCODE F_RegExp::createCharList(
const char * pszChars,
FLMUNICODE * puzChars,
FLMUINT uiNumChars,
FLMBOOL bNegatedChars,
CHAR_CLASS * pCharClass)
{
RCODE rc = NE_XFLM_OK;
CHAR_LIST * pCharList;
// Allocate the CHAR_LIST structure.
if (RC_BAD( rc = m_Pool.poolCalloc( sizeof( CHAR_LIST),
(void **)&pCharList)))
{
goto Exit;
}
pCharList->bNegatedChars = bNegatedChars;
// Allocate an array for the characters.
if (RC_BAD( rc = m_Pool.poolAlloc( sizeof( FLMUNICODE) * uiNumChars,
(void **)&pCharList->puzChars)))
{
goto Exit;
}
pCharList->uiNumChars = uiNumChars;
// Copy the characters from pszChars or puzChars into pCharList->puzChars
if (pszChars)
{
flmAssert( !puzChars);
puzChars = pCharList->puzChars;
while (*pszChars)
{
*puzChars = (FLMUNICODE)(*pszChars);
puzChars++;
pszChars++;
}
}
else
{
f_memcpy( pCharList->puzChars, puzChars,
sizeof( FLMUNICODE) * uiNumChars);
}
// Link at end of list of character lists
if ((pCharList->pPrev = pCharClass->pLastCharList) != NULL)
{
pCharList->pPrev->pNext = pCharList;
}
else
{
pCharClass->pFirstCharList = pCharList;
}
pCharClass->pLastCharList = pCharList;
Exit:
return( rc);
}
/*****************************************************************************
Desc: Create a sub-character class
*****************************************************************************/
RCODE F_RegExp::createCharClass(
FLMBOOL bNegatedClass,
CHAR_CLASS * pCharClass,
CHAR_CLASS ** ppNewCharClass
)
{
RCODE rc = NE_XFLM_OK;
CHAR_CLASS * pNewCharClass;
// Allocate the CHAR_CLASS structure.
if (RC_BAD( rc = m_Pool.poolCalloc( sizeof( CHAR_CLASS),
(void **)&pNewCharClass)))
{
goto Exit;
}
*ppNewCharClass = pNewCharClass;
pNewCharClass->bNegatedClass = bNegatedClass;
// Link at end of list of character classes
if ((pNewCharClass->pPrev = pCharClass->pLastCharClass) != NULL)
{
pNewCharClass->pPrev->pNext = pCharClass;
}
else
{
pNewCharClass->pFirstCharClass = pCharClass;
}
pCharClass->pLastCharClass = pNewCharClass;
Exit:
return( rc);
}
/*****************************************************************************
Desc: Parse an escape sequence.
*****************************************************************************/
RCODE F_RegExp::parseEscape(
FLMUNICODE ** ppuzRegExp,
CHAR_CLASS * pCharClass,
FLMUNICODE * puzChar
)
{
RCODE rc = NE_XFLM_OK;
FLMUNICODE * puzRegExp = *ppuzRegExp;
CHAR_CLASS * pNewCharClass;
CATEGORY_CHARS * pCategoryChars;
FLMUNICODE uzLowChar;
FLMUNICODE uzHighChar;
FLMBOOL bNegated;
REG_EXP * pTmpExpr;
*puzChar = 0;
// Skip past the '\'
puzRegExp++;
switch (*puzRegExp)
{
case 'p':
case 'P':
bNegated = (*puzRegExp == 'P') ? TRUE : FALSE;
if (isCategory( &pCategoryChars, &puzRegExp))
{
if (!pCharClass)
{
if (RC_BAD( rc = createRegExp( EXP_CHAR_CLASS, &pTmpExpr)))
{
goto Exit;
}
pCharClass = &pTmpExpr->exp.charClass;
}
if (RC_BAD( rc = createCharCategory( pCategoryChars, bNegated,
pCharClass)))
{
goto Exit;
}
}
else if (isBlock( &uzLowChar, &uzHighChar, &puzRegExp))
{
if (!pCharClass)
{
if (RC_BAD( rc = createRegExp( EXP_CHAR_CLASS, &pTmpExpr)))
{
goto Exit;
}
pCharClass = &pTmpExpr->exp.charClass;
}
if (RC_BAD( rc = createCharRange( uzLowChar, uzHighChar,
bNegated, pCharClass)))
{
goto Exit;
}
}
else
{
// Treat as a regular character.
*puzChar = *puzRegExp;
puzRegExp++;
}
break;
case 's':
case 'S':
bNegated = (*puzRegExp == 'S') ? TRUE : FALSE;
puzRegExp++;
if (!pCharClass)
{
if (RC_BAD( rc = createRegExp( EXP_CHAR_CLASS, &pTmpExpr)))
{
goto Exit;
}
pCharClass = &pTmpExpr->exp.charClass;
}
if (RC_BAD( rc = createCharList( " \t\n\r", NULL, 4,
bNegated, pCharClass)))
{
goto Exit;
}
break;
case 'd':
case 'D':
// The same as {Nd} category
bNegated = (*puzRegExp == 'D') ? TRUE : FALSE;
puzRegExp++;
if (!pCharClass)
{
if (RC_BAD( rc = createRegExp( EXP_CHAR_CLASS, &pTmpExpr)))
{
goto Exit;
}
pCharClass = &pTmpExpr->exp.charClass;
}
if (RC_BAD( rc = createCharCategory( &NdCategory,
bNegated, pCharClass)))
{
goto Exit;
}
break;
case 'w':
case 'W':
// Create a sub-character class that excludes the
// categories {P} - punctuation, {Z} - separators, and {C} - others
// NOTE: bNegated should be set to TRUE for lowercase 'w', unlike
// others above where negated flag is TRUE for uppercase
// character. This is because we are trying to create a class
// that is everything EXCEPT the three character categories
// mentioned. In the case of 'W', it should include those
// three categories - just the opposite of 'w'.
bNegated = (*puzRegExp == 'w') ? TRUE : FALSE;
puzRegExp++;
if (!pCharClass)
{
if (RC_BAD( rc = createRegExp( EXP_CHAR_CLASS, &pTmpExpr)))
{
goto Exit;
}
pCharClass = &pTmpExpr->exp.charClass;
}
if (RC_BAD( rc = createCharClass( bNegated, pCharClass,
&pNewCharClass)))
{
goto Exit;
}
if (RC_BAD( rc = createCharCategory( &PCategory,
FALSE, pNewCharClass)))
{
goto Exit;
}
if (RC_BAD( rc = createCharCategory( &ZCategory,
FALSE, pNewCharClass)))
{
goto Exit;
}
if (RC_BAD( rc = createCharCategory( &CCategory,
FALSE, pNewCharClass)))
{
goto Exit;
}
break;
case 'c':
case 'C':
// NameChar and '.', '_', '-', ':'
bNegated = (*puzRegExp == 'C') ? TRUE : FALSE;
puzRegExp++;
if (!pCharClass)
{
if (RC_BAD( rc = createRegExp( EXP_CHAR_CLASS, &pTmpExpr)))
{
goto Exit;
}
pCharClass = &pTmpExpr->exp.charClass;
}
if (RC_BAD( rc = createCharClass( bNegated, pCharClass, &pNewCharClass)))
{
goto Exit;
}
if (RC_BAD( rc = createCharCategory( &NameCharCategory, FALSE,
pNewCharClass)))
{
goto Exit;
}
if (RC_BAD( rc = createCharList( "._-:", NULL, 4, FALSE, pNewCharClass)))
{
goto Exit;
}
break;
case 'i':
case 'I':
// Letter and '_', ':'
bNegated = (*puzRegExp == 'I') ? TRUE : FALSE;
puzRegExp++;
if (!pCharClass)
{
if (RC_BAD( rc = createRegExp( EXP_CHAR_CLASS, &pTmpExpr)))
{
goto Exit;
}
pCharClass = &pTmpExpr->exp.charClass;
}
if (RC_BAD( rc = createCharClass( bNegated, pCharClass, &pNewCharClass)))
{
goto Exit;
}
if (RC_BAD( rc = createCharCategory( &LetterCategory, FALSE,
pNewCharClass)))
{
goto Exit;
}
if (RC_BAD( rc = createCharList( "_:", NULL, 2, FALSE, pNewCharClass)))
{
goto Exit;
}
break;
case 'n':
*puzChar = 0xA;
puzRegExp++;
break;
case 'r':
*puzChar = 0xD;
puzRegExp++;
break;
case 't':
*puzChar = 0x9;
puzRegExp++;
break;
default:
*puzChar = *puzRegExp;
puzRegExp++;
break;
}
Exit:
*ppuzRegExp = puzRegExp;
return( rc);
}
/*****************************************************************************
Desc: Parse a character class - [xxxxx].
*****************************************************************************/
RCODE F_RegExp::parseCharClass(
FLMUNICODE ** ppuzRegExp,
CHAR_CLASS * pCharClass
)
{
RCODE rc = NE_XFLM_OK;
FLMUNICODE * puzRegExp = *ppuzRegExp;
FLMBOOL bAtBeginning;
FLMBOOL bHaveDash;
FLMUNICODE uzChar;
flmAssert( *puzRegExp == '[');
// skip past the '['
puzRegExp++;
// Save whatever literal expression may have built up.
if (RC_BAD( rc = saveLiteral()))
{
goto Exit;
}
bAtBeginning = TRUE;
bHaveDash = FALSE;
for (;;)
{
switch (*puzRegExp)
{
case 0:
rc = RC_SET( NE_XFLM_UNEXPECTED_END_OF_EXPR);
goto Exit;
case '^':
// This is only the negation character if it comes immediately
// after the opening '['. Otherwise, it is a regular character.
if (bAtBeginning && !pCharClass->bNegatedClass)
{
pCharClass->bNegatedClass = TRUE;
puzRegExp++;
}
else
{
goto Save_Char;
}
break;
case '-':
// This is NOT a range operator if it comes immediately after
// the opening '[' or right before the closing ']'. In those
// two cases, it is a regular character.
if (bAtBeginning || *(puzRegExp + 1) == ']')
{
goto Save_Char;
}
else if (!m_uiNumLiteralChars)
{
// The dash doesn't have a preceding character we can use
// as the beginning character of the range.
rc = RC_SET( NE_XFLM_UNESCAPED_METACHAR);
goto Exit;
}
else
{
bHaveDash = TRUE;
puzRegExp++;
}
break;
case '\\':
if (RC_BAD( rc = parseEscape( &puzRegExp, pCharClass, &uzChar)))
{
goto Exit;
}
if (uzChar)
{
goto Save_Char;
}
bAtBeginning = FALSE;
break;
case ']':
// If it comes right after the opening '[', or after the '^' when it
// is used as a negation (see above), it is a regular character.
// In both of those cases, bAtBeginning will still be TRUE.
// Otherwise, it is the end of the character class.
if (bAtBeginning)
{
goto Save_Char;
}
goto End_Of_Expr;
case '[':
// If it comes right after a '-', it represents the beginning of
// a subtraction group.
if (!bHaveDash)
{
goto Save_Char;
}
// Won't be at the beginning in this case
flmAssert( !bAtBeginning);
// Had the dash character, toggle flag back to FALSE.
bHaveDash = FALSE;
// Before calling self recursively, need to clear out
// any characters we have gathered up so far.
if (m_uiNumLiteralChars)
{
if (RC_BAD( rc = createCharList( NULL, m_puzLiteral,
m_uiNumLiteralChars, FALSE,
pCharClass)))
{
goto Exit;
}
m_uiNumLiteralChars = 0;
}
if (RC_BAD( rc = m_Pool.poolCalloc( sizeof( CHAR_CLASS),
(void **)&pCharClass->pSubtractionClass)))
{
goto Exit;
}
if (RC_BAD( rc = parseCharClass( &puzRegExp,
pCharClass->pSubtractionClass)))
{
goto Exit;
}
// Next character must be a ']' to end this character class
if (*puzRegExp == ']')
{
goto End_Of_Expr;
}
rc = (RCODE)((*puzRegExp)
? RC_SET( NE_XFLM_ILLEGAL_CLASS_SUBTRACTION)
: RC_SET( NE_XFLM_UNEXPECTED_END_OF_EXPR));
goto Exit;
default:
Save_Char:
if (!bHaveDash)
{
if (RC_BAD( rc = addLiteralChar( *puzRegExp)))
{
goto Exit;
}
}
else
{
FLMUNICODE uzLowChar;
// Reset the bHaveDash flag.
bHaveDash = FALSE;
uzLowChar = m_puzLiteral [m_uiNumLiteralChars - 1];
if (*puzRegExp < uzLowChar)
{
rc = RC_SET( NE_XFLM_ILLEGAL_CHAR_RANGE_IN_EXPR);
goto Exit;
}
// No need to do anything if the character is equal to
// the last one. i.e., they are doing a range like A-A
else if (*puzRegExp > uzLowChar)
{
// Save off the characters we have gathered so far,
// except for the last one, which will be the beginning
// of our range.
if (m_uiNumLiteralChars > 1)
{
if (RC_BAD( rc = createCharList( NULL, m_puzLiteral,
m_uiNumLiteralChars - 1, FALSE,
pCharClass)))
{
goto Exit;
}
}
// Need to zero out number of characters so that if there
// is another dash, it will be reported as an unescaped
// metacharacter.
m_uiNumLiteralChars = 0;
// Create a range.
if (RC_BAD( rc = createCharRange( uzLowChar, *puzRegExp,
FALSE, pCharClass)))
{
goto Exit;
}
}
}
bAtBeginning = FALSE;
puzRegExp++;
break;
}
}
End_Of_Expr:
// Keep the final set of characters we may have
// gathered.
if (m_uiNumLiteralChars)
{
if (RC_BAD( rc = createCharList( NULL, m_puzLiteral,
m_uiNumLiteralChars, FALSE, pCharClass)))
{
goto Exit;
}
m_uiNumLiteralChars = 0;
}
// Skip past the ']'
puzRegExp++;
Exit:
*ppuzRegExp = puzRegExp;
return( rc);
}
/*****************************************************************************
Desc: Parse a quantifier expression.
All of the following forms are allowed:
{3} - exactly 3
{,4} - same as {0,4}
{3,} - 3 to unlimited
{3,5} - min of 3, max of 5
+ - same as {1,}
* - same as {0,}
? - same as {0,1}
*****************************************************************************/
RCODE F_RegExp::parseQuantifier(
FLMUNICODE ** ppuzRegExp)
{
RCODE rc = NE_XFLM_OK;
FLMUNICODE * puzRegExp = *ppuzRegExp;
FLMUINT uiMin;
FLMUINT uiMax;
FLMBOOL bUnlimited;
REG_EXP * pTmpExpr;
if (!m_uiNumLiteralChars &&
(!m_pCurrBranch->pLastExpr ||
m_pCurrBranch->pLastExpr->bQuantified))
{
rc = RC_SET( NE_XFLM_ILLEGAL_QUANTIFIER);
goto Exit;
}
// Skip the first character
if (*puzRegExp == '?')
{
uiMin = 0;
uiMax = 1;
bUnlimited = FALSE;
// Skip past the '?'
puzRegExp++;
}
else if (*puzRegExp == '*')
{
uiMin = 0;
uiMax = 0;
bUnlimited = TRUE;
// Skip past the '*'
puzRegExp++;
}
else if (*puzRegExp == '+')
{
uiMin = 1;
uiMax = 0;
bUnlimited = TRUE;
// Skip past the '+'
puzRegExp++;
}
else
{
// Only thing left better be a left brace
flmAssert( *puzRegExp == '{');
// Skip past the left brace
puzRegExp++;
// Skip any white space
puzRegExp = skipWhitespace( puzRegExp);
// Get the first number, if any
uiMin = 0;
while (*puzRegExp >= '0' && *puzRegExp <= '9')
{
uiMin *= 10;
uiMin += (FLMUINT)(*puzRegExp - '0');
puzRegExp++;
}
// Skip any whitespace that comes after the number.
puzRegExp = skipWhitespace( puzRegExp);
// Better have landed on a comma or right brace
if (*puzRegExp == ',')
{
puzRegExp++;
// Skip any whitespace that comes after the comma
puzRegExp = skipWhitespace( puzRegExp);
}
else if (*puzRegExp == '}')
{
if (!uiMin)
{
rc = RC_SET( NE_XFLM_ILLEGAL_QUANTIFIER);
goto Exit;
}
uiMax = uiMin;
bUnlimited = FALSE;
}
else
{
rc = RC_SET( NE_XFLM_ILLEGAL_MIN_COUNT);
goto Exit;
}
uiMax = 0;
bUnlimited = TRUE;
// Get the next number, if any
while (*puzRegExp >= '0' && *puzRegExp <= '9')
{
uiMax *= 10;
uiMax += (FLMUINT)(*puzRegExp - '0');
bUnlimited = FALSE;
puzRegExp++;
}
// Skip any whitespace that comes after the number.
puzRegExp = skipWhitespace( puzRegExp);
// Better have landed on a right brace
if (*puzRegExp != '}')
{
rc = (RCODE)((*puzRegExp)
? RC_SET( NE_XFLM_ILLEGAL_MAX_COUNT)
: RC_SET( NE_XFLM_UNEXPECTED_END_OF_EXPR));
goto Exit;
}
// Got the '}', see if min and max are legal.
puzRegExp++;
if (!bUnlimited && (!uiMax || uiMax < uiMin))
{
rc = RC_SET( NE_XFLM_ILLEGAL_MAX_COUNT);
goto Exit;
}
}
// If we have a literal, create two expressions. First
// expression will be all but the last character of the
// literal. Second expression will be the one character
// literal with a count.
if (m_uiNumLiteralChars)
{
if (m_uiNumLiteralChars > 1)
{
if (RC_BAD( rc = addLiteralExpr( m_puzLiteral,
m_uiNumLiteralChars - 1, NULL)))
{
goto Exit;
}
}
if (RC_BAD( rc = addLiteralExpr(
&m_puzLiteral [m_uiNumLiteralChars - 1], 1,
&pTmpExpr)))
{
goto Exit;
}
// Zero out the literal and start over.
m_uiNumLiteralChars = 0;
}
else
{
pTmpExpr = m_pCurrBranch->pLastExpr;
}
flmAssert( pTmpExpr);
pTmpExpr->uiMinOccurs = uiMin;
pTmpExpr->uiMaxOccurs = uiMax;
pTmpExpr->bUnlimited = bUnlimited;
pTmpExpr->bQuantified = TRUE;
Exit:
*ppuzRegExp = puzRegExp;
return( rc);
}
/*****************************************************************************
Desc: Start an alternative - Called when we hit a left paren.
*****************************************************************************/
RCODE F_RegExp::startAlternative( void)
{
RCODE rc = NE_XFLM_OK;
REG_EXP * pTmpExpr = 0;
// If we were gathering up a literal, save it out.
if (RC_BAD( rc = saveLiteral()))
{
goto Exit;
}
// Start a new alternative expression node
if (RC_BAD( rc = createRegExp( EXP_ALTERNATIVES, &pTmpExpr)))
{
goto Exit;
}
if (RC_BAD( rc = m_Pool.poolCalloc( sizeof( REG_EXP_BRANCH),
(void **)&pTmpExpr->exp.alternative.pFirstBranch)))
{
goto Exit;
}
pTmpExpr->exp.alternative.pLastBranch =
pTmpExpr->exp.alternative.pFirstBranch;
m_pCurrBranch = pTmpExpr->exp.alternative.pFirstBranch;
m_pCurrBranch->pParentExpr = pTmpExpr;
Exit:
return( rc);
}
/*****************************************************************************
Desc: End an alternative - Called when we hit a right paren.
*****************************************************************************/
RCODE F_RegExp::endAlternative( void)
{
RCODE rc = NE_XFLM_OK;
REG_EXP * pParentExpr;
REG_EXP * pTmpExpr;
// If the current branch doesn't have a parent
// expression, this is an illegal unescaped right paren.
if ((pParentExpr = m_pCurrBranch->pParentExpr) == NULL)
{
rc = RC_SET( NE_XFLM_UNESCAPED_METACHAR);
goto Exit;
}
flmAssert( pParentExpr->eType == EXP_ALTERNATIVES);
// If we were gathering up a literal, save it out.
if (RC_BAD( rc = saveLiteral()))
{
goto Exit;
}
// Make sure the current branch isn't empty
if (!m_pCurrBranch->pFirstExpr)
{
rc = RC_SET( NE_XFLM_EMPTY_BRANCH_IN_EXPR);
goto Exit;
}
// If there is only one alternative, link these
// nodes right in where the parent expression would
// have gone. This is not strictly necessary, because
// the processor can handle only one alternative, but
// it may save processing time in the end.
if (pParentExpr->exp.alternative.pFirstBranch ==
pParentExpr->exp.alternative.pLastBranch)
{
if ((m_pCurrBranch->pFirstExpr->pPrev = pParentExpr->pPrev) != NULL)
{
pParentExpr->pPrev->pNext = m_pCurrBranch->pFirstExpr;
}
else
{
pParentExpr->pBranch->pFirstExpr = m_pCurrBranch->pFirstExpr;
}
// Need to alter the branch pointed to by each of the
// expressions in this list.
pTmpExpr = m_pCurrBranch->pFirstExpr;
while (pTmpExpr)
{
pTmpExpr->pBranch = pParentExpr->pBranch;
pTmpExpr = pTmpExpr->pNext;
}
pParentExpr->pBranch->pLastExpr = m_pCurrBranch->pLastExpr;
}
// Go back to the parent branch
m_pCurrBranch = pParentExpr->pBranch;
Exit:
return( rc);
}
/*****************************************************************************
Desc: Start a new branch of an alternatives list. This is called when
we hit the '|' character.
*****************************************************************************/
RCODE F_RegExp::startNewBranch( void)
{
RCODE rc = NE_XFLM_OK;
REG_EXP * pTmpExpr;
REG_EXP_BRANCH * pTmpBranch;
// If we were gathering up a literal, save it out.
if (RC_BAD( rc = saveLiteral()))
{
goto Exit;
}
// Make sure the current branch isn't empty
if (!m_pCurrBranch->pFirstExpr)
{
rc = RC_SET( NE_XFLM_EMPTY_BRANCH_IN_EXPR);
goto Exit;
}
// Create a new branch to link to current branch.
if (RC_BAD( rc = m_Pool.poolCalloc( sizeof( REG_EXP_BRANCH),
(void **)&pTmpBranch)))
{
goto Exit;
}
// Link this branch after the current branch
pTmpBranch->pPrevBranch = m_pCurrBranch;
m_pCurrBranch->pNextBranch = pTmpBranch;
// If current branch has a parent, the parent should be
// an alternative, and it's last branch should now point
// to this new branch.
if ((pTmpBranch->pParentExpr = m_pCurrBranch->pParentExpr) != NULL)
{
pTmpExpr = pTmpBranch->pParentExpr;
flmAssert( pTmpExpr->eType == EXP_ALTERNATIVES);
pTmpExpr->exp.alternative.pLastBranch = pTmpBranch;
}
// Current branch should now become this newly created branch
m_pCurrBranch = pTmpBranch;
Exit:
return( rc);
}
/*****************************************************************************
Desc: Set a regular expression. Parse the expression, turning it into
constructs that can be used to test strings more easily.
*****************************************************************************/
RCODE F_RegExp::setExpression(
FLMUNICODE * puzRegExp
)
{
RCODE rc = NE_XFLM_OK;
REG_EXP * pTmpExpr = 0;
FLMUNICODE uzChar;
while (*puzRegExp)
{
switch (*puzRegExp)
{
case '[':
if (RC_BAD( rc = createRegExp( EXP_CHAR_CLASS, &pTmpExpr)))
{
goto Exit;
}
if (RC_BAD( rc = parseCharClass( &puzRegExp, &pTmpExpr->exp.charClass)))
{
goto Exit;
}
break;
case '\\':
if (RC_BAD( rc = parseEscape( &puzRegExp, NULL, &uzChar)))
{
goto Exit;
}
if (uzChar)
{
if (RC_BAD( rc = addLiteralChar( uzChar)))
{
goto Exit;
}
}
break;
case '|':
if (RC_BAD( rc = startNewBranch()))
{
goto Exit;
}
// Skip past the '|'
puzRegExp++;
break;
case '(':
if (RC_BAD( rc = startAlternative()))
{
goto Exit;
}
// Skip past the '('
puzRegExp++;
break;
case ')':
if (RC_BAD( rc = endAlternative()))
{
goto Exit;
}
// Skip past the ')'
puzRegExp++;
break;
case '{':
case '+':
case '*':
case '?':
if (RC_BAD( rc = parseQuantifier( &puzRegExp)))
{
goto Exit;
}
break;
case '.':
case '^':
case ']':
case '}':
rc = RC_SET( NE_XFLM_UNESCAPED_METACHAR);
goto Exit;
default:
// Add character to the literal expression we
// are saving up.
if (RC_BAD( rc = addLiteralChar( *puzRegExp)))
{
goto Exit;
}
puzRegExp++;
break;
}
}
// Output the last literal, if any
if (RC_BAD( rc = saveLiteral()))
{
goto Exit;
}
// Make sure we are not nested in parens.
if (m_pCurrBranch->pParentExpr)
{
rc = RC_SET( NE_XFLM_UNEXPECTED_END_OF_EXPR);
goto Exit;
}
Exit:
return( rc);
}
/*****************************************************************************
Desc: Test a string to see if it matches the regular expression.
*****************************************************************************/
FLMBOOL F_RegExp::testString(
IF_PosIStream * // pIStream
)
{
return( FALSE);
}