Files
mars-nwe/include/core/xUnicode.h
OpenAI 9bf95f557e
All checks were successful
Source release / source-package (push) Successful in 1m40s
core: import NSS byte unicode conversion entry points into libnwcore
2026-06-12 18:39:57 +02:00

579 lines
23 KiB
C

/****************************************************************************
|
| (C) Copyright 1985, 1991, 1993, 1996 Novell, Inc.
| All Rights Reserved.
|
| This program is free software; you can redistribute it and/or
| modify it under the terms of version 2 of the GNU General Public
| License as published by the Free Software Foundation.
|
| This program is distributed in the hope that it will be useful,
| but WITHOUT ANY WARRANTY; without even the implied warranty of
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
| GNU General Public License for more details.
|
| You should have received a copy of the GNU General Public License
| along with this program; if not, contact Novell, Inc.
|
| To contact Novell about this file by physical or electronic mail,
| you may find current contact information at www.novell.com
|
|***************************************************************************
|
| NetWare Advance File Services (NSS) module
|
|---------------------------------------------------------------------------
|
| $Author: taysom $
| $Date: 2004-12-31 01:10:58 +0530 (Fri, 31 Dec 2004) $
|
| $RCSfile$
| $Revision: 465 $
|
|---------------------------------------------------------------------------
| This module is used to:
| Define all of the UNICODE interfaces. This is actually an
| external INCLUDE file which is included temporarily for now.
|
| WARNING! WARNING! WARNING! WARNING! WARNING! WARNING! WARNING! WARNING!
|
| This header file should ONLY be used for NSS internal development.
| This includes Semantic Agents (SA) and Loadable Storage Services (LSS).
| Any other use may cause conflicts which NSS will NOT fix.
+-------------------------------------------------------------------------*/
#ifndef _XUNICODE_H_
#define _XUNICODE_H_
#ifndef _OMNI_H_
# include <omni.h>
#endif
#ifdef __cplusplus
extern "C" {
#endif
/*-------------------------------------------------------------------------
* Globals defintions
*-------------------------------------------------------------------------*/
#ifndef unisizeof
# define unisizeof(buf) (sizeof((buf))/sizeof(unicode_t)) /* get size of buffer in UNICODE chars*/
#endif
/*-------------------------------------------------------------------------
* Character definitions
*-------------------------------------------------------------------------*/
#define UNI_UA L'A'
#define UNI_UZ L'Z'
#define UNI_LA L'a'
#define UNI_LZ L'z'
#define UNI_0 L'0'
#define UNI_9 L'9'
#define UNI_COLON L':'
#define UNI_SLASH L'/'
#define UNI_BSLASH L'\\'
#define UNI_PERIOD L'.'
#define UNI_UNDERSCORE L'_'
#define UNI_BLANK L' '
#define UNI_ASTERISK L'*'
#define UNI_QMARK L'?'
#define UNI_CHANGE_NAMESPACE 0xf8f4
#define UNI_PREVIOUS_DIR 0xf8f5
#define UNI_CURRENT_DIR 0xf8f6
#define UNI_PATH_SEPARATOR 0xF8F7 /* not used by NSS */
#define UNI_VOLUMENAME_ROOT 0xf8f8
#define UNI_VOLUME_ROOT 0xf8f9
#define UNI_NDS_ROOT 0xf8fa
#define UNI_WILD_QMARK 0xf8fb
#define UNI_WILD_ASTERISK 0xf8fc
#define UNI_WILD_AUG_QMARK 0xf8fd
#define UNI_WILD_AUG_ASTERISK 0xf8fe
#define UNI_WILD_AUG_PERIOD 0xf8ff
/* UNICODE strings equivalents for the special characters */
#define UNI_CHANGE_NAMESPACE_STR L"\xf8f4"
#define UNI_PREVIOUS_DIR_STR L"\xf8f5"
#define UNI_CURRENT_DIR_STR L"\xf8f6"
#define UNI_PATH_SEPARATOR_STR L"\xF8F7" /* not used by NSS */
#define UNI_VOLUMENAME_ROOT_STR L"\xf8f8"
#define UNI_VOLUME_ROOT_STR L"\xf8f9"
#define UNI_NDS_ROOT_STR L"\xf8fa"
#define UNI_WILD_QMARK_STR L"\xf8fb"
#define UNI_WILD_ASTERISK_STR L"\xf8fc"
#define UNI_WILD_AUG_QMARK_STR L"\xf8fd"
#define UNI_WILD_AUG_ASTERISK_STR L"\xf8fe"
#define UNI_WILD_AUG_PERIOD_STR L"\xf8ff"
#define IS_UNICODE_AUG_WILDCARD_CHAR(ch) (((ch) >= 0xf8fb) && ((ch) <= 0xf8ff))
/*---------------------------------------------------------------------------
* This defines INVALID unicode_t characters and a macro that returns TRUE
* if the given character is invalid.
*---------------------------------------------------------------------------*/
#define UNI_INVALID1 0xfeff
#define UNI_INVALID2 0xfffd
#define UNI_INVALID3 0xfffe
#define UNI_INVALID4 0xffff
#define IS_INVALID_UNICODE_CHAR(ch) (((ch) == 0xfeff) || ((ch) >= 0xfffd))
/*---------------------------------------------------------------------------
* This defines whether or not a unicode_t character is mappable to an
* ASCII character.
*---------------------------------------------------------------------------*/
extern NINT NSSUnicodeMappableToAsciiBitMap[];
#define IS_UNICODE_MAPPABLE_TO_ASCII(ch) \
(TST_BIT(NSSUnicodeMappableToAsciiBitMap,(unicode_t)(ch)))
/*---------------------------------------------------------------------------
* This defines whether or not a unicode_t character maps to a double-byte
* ASCII character
*---------------------------------------------------------------------------*/
extern NINT NSSUnicodeIsDoubleByteAsciiBitMap[];
#define IS_UNICODE_DOUBLE_BYTE_ASCII(ch) \
(TST_BIT(NSSUnicodeIsDoubleByteAsciiBitMap,(unicode_t)(ch)))
/*---------------------------------------------------------------------------
* This defines whether or not a unicode_t character is mappable to an
* ASCII character in the current MACintosh code page. (Mac code pages are
* different than Windows code pages).
*---------------------------------------------------------------------------*/
extern NINT NSSUnicodeMappableToMacAsciiBitMap[];
#define IS_UNICODE_MAPPABLE_TO_MAC_ASCII(ch) \
(TST_BIT(NSSUnicodeMappableToMacAsciiBitMap,(unicode_t)(ch)))
/*---------------------------------------------------------------------------
* This defines whether or not a unicode_t character maps to a double-byte
* ASCII character in the current MACintosh code page. (Mac code pages are
* different than Windows code pages).
*---------------------------------------------------------------------------*/
extern NINT NSSUnicodeIsDoubleByteMacAsciiBitMap[];
#define IS_UNICODE_DOUBLE_BYTE_MAC_ASCII(ch) \
(TST_BIT(NSSUnicodeIsDoubleByteMacAsciiBitMap,(unicode_t)(ch)))
/*---------------------------------------------------------------------------
* Ascii forms of augmented wildcard characters that are being converted
*---------------------------------------------------------------------------*/
/* augmented characters used in path parsing */
#define ASCII_QMARK ('?')
#define ASCII_ASTERISK ('*')
#define ASCII_AUG_QMARK (0x80 + '?')
#define ASCII_AUG_ASTERISK (0x80 + '*')
#define ASCII_AUG_PERIOD (0x80 + '.')
#define ASCII_WILDCARD_BREAK (0xff)
/* ASCII Strings used for wildcarding. The Break character is already
* added into the string */
#define ASCII_WILD_QMARK_STR "\xff""?"
#define ASCII_WILD_ASTERISK_STR "\xff""*"
#define ASCII_WILD_AUG_QMARK_STR "\xff""\xbf"
#define ASCII_WILD_AUG_ASTERISK_STR "\xff""\xaa"
#define ASCII_WILD_AUG_PERIOD_STR "\xff""\xae"
#if zNETWARE || defined(__KERNEL__)
/*===========================================================================
*===========================================================================
*
* UNICODE version functions
*
*===========================================================================
*===========================================================================*/
extern NINT LB_GetNssUnicodeVersion( void );
/*===========================================================================
*===========================================================================
*
* UNICODE Component string routines.
*
*===========================================================================
*===========================================================================*/
/*---------------------------------------------------------------------------
* Copy Unicode Component string from one location to another
*---------------------------------------------------------------------------*/
extern unicode_t *LB_componentUnicpy( /* Corresponds to strcpy*/
unicode_t *dest, /* Source string*/
CONST unicode_t *src);
/*---------------------------------------------------------------------------
* Return length of Unicode Component string
*---------------------------------------------------------------------------*/
extern size_t LB_componentUnilen( /* Corresponds to strlen*/
CONST unicode_t *src);
/**************************************************************************
* UNICODE initialization routines.
***************************************************************************/
extern STATUS LB_UnicodeStartup(void);
extern void LB_UnicodeShutdown(void);
#endif
/*===========================================================================
*===========================================================================
*
* NSS Unicode support libraries
*
*===========================================================================
*===========================================================================*/
extern STATUS LB_ByteToUnicode(
NINT conversionType, /* type of conversion to do */
unicode_t *unicodeOutput,
NINT outputBufferLen,
CONST char *byteInput,
NINT *retActualLength); /* may be NULL */
extern STATUS LB_LenByteToUnicode(
NINT conversionType, /* type of conversion to do */
unicode_t *unicodeOutput,
NINT outputBufferLen,
CONST char *byteInput,
NINT inLength,
NINT *retActualLength); /* may be NULL */
extern STATUS LB_UnicodeToByte(
NINT conversionType, /* type of conversion to do */
char *byteOutput,
NINT outputBufferLen,
CONST unicode_t *unicodeInput,
NINT *retActualLength); /* may be NULL */
extern STATUS LB_UnicodeToUntermByte(
NINT conversionType, /* type of conversion to do */
char *byteOutput,
NINT outputBufferLen,
CONST unicode_t *unicodeInput,
NINT *retActualLength);
extern STATUS LB_MacByteToUnicode(
NINT conversionType, /* type of conversion to do */
unicode_t *unicodeOutput,
NINT outputBufferLen,
CONST char *byteInput,
NINT *retActualLength); /* may be NULL */
extern STATUS LB_LenMacByteToUnicode(
NINT conversionType, /* type of conversion to do */
unicode_t *unicodeOutput,
NINT outputBufferLen,
CONST char *byteInput,
NINT inLength,
NINT *retActualLength); /* may be NULL */
extern STATUS LB_UnicodeToMacByte(
NINT conversionType, /* type of conversion to do */
char *byteOutput,
NINT outputBufferLen,
CONST unicode_t *unicodeInput,
NINT *retActualLength); /* may be NULL */
extern STATUS LB_UnicodeToUntermMacByte(
NINT conversionType, /* type of conversion to do */
char *byteOutput,
NINT outputBufferLen,
CONST unicode_t *unicodeInput,
NINT *retActualLength);
extern STATUS LB_UTF8ToUniChar(
CONST char *utf8Input, /* pointer to next UTF8 character */
char **retNextUtf8Input, /* next utf8 pointer is returned here */
unicode_t *retUnicodeChar);
extern STATUS LB_UTF8LenToUniChar(
CONST char *utf8Input, /* pointer to next UTF8 character */
CONST NINT utf8InputLen, /* Length of the UTF8 input string */
char **retNextUtf8Input, /* next utf8 pointer is returned here */
unicode_t *retUnicodeChar,
NINT *retUtf8Len); /* remainling length of the UTF8 input string */
/*-------------------------------------------------------------------------
* Conversion type definitions
*-------------------------------------------------------------------------*/
#define NSS_UNI_CONVERSION_NSPACE_DEFAULT 0 /* Use default provided by name space */
#define NSS_UNI_CONVERSION_WILD 1 /* Default NSS converter type with default wildcard handling */
#define NSS_UNI_CONVERSION_RAW 2 /* No wildCard handling, noMap -> [xxxx] */
#define NSS_UNI_CONVERSION_WILD_DOS 3 /* Default NSS converter type with default wildcard handling, DOS FF handling */
#if defined(_NSS_INTERNAL_) || defined(MARS_NWE_NWCORE_UNICODE)
# define NSS_UNI_CONVERSION_LAST_DEFINED 3 /* Highest Number of pre-defined types */
# define NSS_UNI_CONVERSION_COUNT 16 /* Max registerable converter types */
/*---------------------------------------------------------------------------
* Tables to store mapped single or double bytes to unicode
*---------------------------------------------------------------------------*/
extern unicode_t *NSSSingleByteToUnicodeTable[NSS_UNI_CONVERSION_COUNT];
extern unicode_t *NSSDoubleByteToUnicodeTable[NSS_UNI_CONVERSION_COUNT];
extern unicode_t *NSSMacSingleByteToUnicodeTable;
extern unicode_t *NSSMacDoubleByteToUnicodeTable;
/*---------------------------------------------------------------------------
* Variable to store raw unicode translation of ASCII 0xFF
*---------------------------------------------------------------------------*/
extern unicode_t NSSUnicodeFF;
extern unicode_t NSSUnicodeMacFF;
/*---------------------------------------------------------------------------
* Tables to store mapped unicode to single or double bytes
*---------------------------------------------------------------------------*/
extern BYTE *NSSUnicodeToByteTable[NSS_UNI_CONVERSION_COUNT];
extern BYTE *NSSMacUnicodeToByteTable;
#endif
/*===========================================================================
*===========================================================================
*
* NULL TERMINATED UNICODE string routines.
*
*===========================================================================
*===========================================================================*/
/*-------------------------------------------------------------------------
* This will copy the source string to the destination string and will
* not overflow the destination buffer. This will guarentee there is a
* null at the end of the destination string.
*-------------------------------------------------------------------------*/
#define unimcpy(d,s,sz) LB_unimcpy(d,s,sz)
extern unicode_t *LB_unimcpy(
unicode_t *dest,
CONST unicode_t *src,
size_t destSize); /* in unicode_t chars*/
/*-------------------------------------------------------------------------
* Compare UNICODE to ASCII string.
*-------------------------------------------------------------------------*/
//extern int unistrcmp(
// CONST unicode_t *src1,
// CONST char *src2);
#if zNETWARE || defined(__KERNEL__) || defined(MARS_NWE_NWCORE_UNICODE)
/*-------------------------------------------------------------------------
* Case conversion Routines (not in standard library)
*-------------------------------------------------------------------------*/
//#define unitoupper(ch) LB_unitoupper(ch)
#define unitoupper(ch) (NSSUniToUpper[(unicode_t)(ch)])
extern unicode_t NSSUniToUpper[];
extern unicode_t LB_unitoupper(
CONST unicode_t c);
//#define unitolower(ch) LB_unitolower(ch)
#define unitolower(ch) (NSSUniToLower[(unicode_t)(ch)])
extern unicode_t NSSUniToLower[];
extern unicode_t LB_unitolower(
CONST unicode_t c);
extern void LB_UnicodeCaseStartup(void);
extern void LB_UnicodeLowerStartup(void);
#define unilwr(str) LB_unilwr(str)
extern unicode_t *LB_unilwr(
unicode_t *string);
#define uniupr(str) LB_uniupr(str)
extern unicode_t *LB_uniupr(
unicode_t *string);
extern int utf_tolower(
const unsigned char *utf, /* Pointer to first byte of UTF-8 character*/
const unsigned char **p_utf); /* Return poitner to first byte beyond UTF-8 char */
#endif
/**************************************************************************
* Unicode Functions that work like those in string.h. Those routines
* that have "LB_" are defined inside of the NSS library. Those that don't
* are define in UNICODE.NLN or LOCNLM32.NLM.
***************************************************************************/
#define unicat(dest,src) LB_unicat(dest,src)
extern unicode_t *LB_unicat( /* Corresponds to strcat*/
unicode_t *s1, /* Original string*/
const unicode_t *s2); /* String to be appended*/
#define unichr(s,c) LB_unichr(s,c)
extern unicode_t *LB_unichr( /* Corresponds to strchr*/
const unicode_t *s, /* String to be scanned*/
unicode_t ch); /* Character to be found*/
#define unirchr(s,c) LB_unirchr(s,c)
extern unicode_t *LB_unirchr( /* Corresponds to strrchr*/
const unicode_t *s, /* String to be scanned*/
unicode_t ch); /* Character to be found*/
#define unicpy(dest,src) LB_unicpy(dest,src)
extern unicode_t *LB_unicpy( /* Corresponds to strcpy*/
unicode_t *dest, /* Destination string*/
const unicode_t *src); /* Source string*/
extern size_t unicspn( /* Corresponds to strcspn*/
const unicode_t *s1, /* String to be scanned*/
const unicode_t *s2); /* Character set*/
#define unilen(s) LB_unilen(s)
extern size_t LB_unilen( /* Corresponds to strlen*/
const unicode_t *s); /* String to determine length of*/
#define unincat(dest,src,n) LB_unincat(dest,src,n)
extern unicode_t *LB_unincat( /* Corresponds to strncat*/
unicode_t *s1, /* Original string*/
const unicode_t *s2, /* String to be appended*/
size_t n); /* Maximum characters to be appended*/
#define unincpy(dest,src,n) LB_unincpy(dest,src,n)
extern unicode_t *LB_unincpy( /* Corresponds to strncpy*/
unicode_t *s1, /* Destination string*/
const unicode_t *s2, /* Source string*/
size_t n); /* Maximum length*/
extern unicode_t *uninset( /* Corresponds to strnset*/
unicode_t *s, /* String to be modified*/
int c, /* Fill character*/
size_t n); /* Maximum length*/
extern unicode_t *unipbrk( /* Corresponds to strpbrk*/
const unicode_t *s1, /* String to be scanned*/
const unicode_t *s2); /* Character set*/
extern unicode_t *unipcpy( /* Corresponds to strpcpy*/
unicode_t *s1, /* Destination string*/
const unicode_t *s2); /* Source string*/
//extern unicode_t *unirchr( /* Corresponds to strrchr*/
// const unicode_t *s, /* String to be scanned*/
// int c); /* Character to be found*/
extern unicode_t *unirev( /* Corresponds to strrev*/
unicode_t *s); /* String to be reversed*/
extern unicode_t *uniset( /* Corresponds to strset*/
unicode_t *s, /* String to modified*/
int c); /* Fill character*/
extern size_t unispn( /* Corresponds to strspn*/
const unicode_t *s1, /* String to be tested*/
const unicode_t *s2); /* Character set*/
extern unicode_t *unistr( /* Corresponds to strstr*/
const unicode_t *s1, /* String to be scanned*/
const unicode_t *s2); /* String to be located*/
extern unicode_t *unitok( /* Corresponds to strtok*/
unicode_t *s1, /* String to be parsed*/
const unicode_t *s2); /* Delimiter values*/
#define unicmp(s1, s2) LB_unicmp(s1, s2)
extern int LB_unicmp( /* Corresponds to strcmp*/
const unicode_t *src1,
const unicode_t *src2);
#define unincmp(s1, s2, n) LB_unincmp(s1, s2, n)
extern int LB_unincmp( /* Unicode length compare */
const unicode_t *s1,
const unicode_t *s2,
size_t len);
#if zNETWARE || defined(__KERNEL__) || defined(MARS_NWE_NWCORE_UNICODE)
#undef uniicmp
#define uniicmp(s1, s2) LB_uniicmp(s1, s2)
extern int LB_uniicmp(
const unicode_t *s1,
const unicode_t *s2);
#define uniicmpMac(s1, s2) LB_uniicmpMac(s1, s2)
extern int LB_uniicmpMac(
const unicode_t *s1,
const unicode_t *s2);
#undef uninicmp
#define uninicmp(s1, s2, l) LB_uninicmp(s1, s2, l)
extern int LB_uninicmp(
const unicode_t *s1,
const unicode_t *s2,
size_t len);
#endif
/*===========================================================================*/
/*===========================================================================*/
/****************************************************************************
* Modules may register their own unicode_t translation functions.
* Use these defines and prototypes to do this
*****************************************************************************/
/*===========================================================================*/
/*===========================================================================*/
/*---------------------------------------------------------------------------
* Type definitions for unicode_t override function pointers
* These functions return TRUE if they override the char(s) at input and
* translate them to a non-standard value.
* They return FALSE if no translation override was done.
*
* The UNI_OverrideByte2UniFunc will translate one or two ASCII 8-bit bytes
* into a single unicode character.
*
* The UNI_OverrideUni2ByteFunc will translate one unicode character into
* one or two 8-bit ASCII bytes. The output buffer must be at least 3
* bytes in length.
*---------------------------------------------------------------------------*/
typedef BOOL (*UNI_OverrideByte2UniFunc_t)(
unicode_t *output, /* Returned unicode character. */
BYTE *input); /* Buffer containing NULL-terminated byte input */
typedef BOOL (*UNI_OverrideUni2ByteFunc_t)(
BYTE *output, /* Buffer for NULL-terminated bytes output. */
unicode_t input); /* Buffer containing input unicode character. */
/*---------------------------------------------------------------------------
* Use these functions to register/unregister a unicode_t converter type
* RegisterUnicodeConverter registers a new converter type and builds a new
* set of tables for translating unicode.
* The "conversionType" is used to uniquely identify the new tables.
* The overrideByte2Uni and overrideUni2Byte functions are used to specify
* translation overrides for the new tables.
*---------------------------------------------------------------------------*/
#define RegisterUnicodeConverter(type,ovByte2Uni,ovUni2Byte) \
LB_RegisterUnicodeConverter(type,ovByte2Uni,ovUni2Byte)
extern STATUS LB_RegisterUnicodeConverter(
NINT *conversionType,
UNI_OverrideByte2UniFunc_t overrideByte2Uni,
UNI_OverrideUni2ByteFunc_t overrideUni2Byte);
#define UnRegisterUnicodeConverter(type) LB_UnRegisterUnicodeConverter(type)
extern void LB_UnRegisterUnicodeConverter(
NINT conversionType);
extern STATUS buildUnicodeToByteTable(NINT conversionType);
extern STATUS buildByteToUnicodeTable(NINT conversionType);
/*---------------------------------------------------------------------------
* These functions provide conversion between unicode and UTF8
*---------------------------------------------------------------------------*/
#if zLINUX && !defined(__KERNEL__)
#define utf2uni(s1,s2,len) LB_utf2uni(s1,s2,len)
#define uni2utf(s1,s2,len) LB_uni2utf(s1,s2,len)
#endif
extern NINT uni2utf (const unicode_t *unicode, utf8_t *utf, NINT bufLen);
extern NINT utf2uni (utf8_t *utf, unicode_t *unicode, NINT bufLen);
/*---------------------------------------------------------------------------
* These functions are to help with the MACINTOSH NFAP implementation
*---------------------------------------------------------------------------*/
#if zNETWARE || defined(__KERNEL__)
extern STATUS LB_GetMacCodePageName(BYTE *nameBuffer, NINT bufferLen);
#endif
#ifdef __cplusplus
}
#endif
#endif /* _XUNICODE_H_ */