From 4111bbdc38bb3627aafb5a64ea049b6ea8e3c353 Mon Sep 17 00:00:00 2001 From: OpenAI Date: Fri, 19 Jun 2026 17:19:06 +0000 Subject: [PATCH] nwnss: restore unicode loader userspace boundary --- include/nwnss/internal/nssUnicodeUserspace.h | 42 + src/nwnss/CMakeLists.txt | 1 + src/nwnss/nsslnxlib/unilib.c | 1891 ++++++++++++++---- src/nwnss/nsslnxlib/unilibUserspace.c | 226 +++ 4 files changed, 1744 insertions(+), 416 deletions(-) create mode 100644 include/nwnss/internal/nssUnicodeUserspace.h create mode 100644 src/nwnss/nsslnxlib/unilibUserspace.c diff --git a/include/nwnss/internal/nssUnicodeUserspace.h b/include/nwnss/internal/nssUnicodeUserspace.h new file mode 100644 index 0000000..a8f5ebc --- /dev/null +++ b/include/nwnss/internal/nssUnicodeUserspace.h @@ -0,0 +1,42 @@ +#ifndef NSS_UNICODE_USERSPACE_H +#define NSS_UNICODE_USERSPACE_H + +#include +#include +#include + +#include + +#ifndef PAGE_SIZE +#define PAGE_SIZE 4096 +#endif + +#ifndef MPK_SLEEP_OK +#define MPK_SLEEP_OK 0 +#endif + +struct file { + FILE *stream; + long f_pos; +}; + +void mpkEnter(void); +void mpkExit(void); +void *mpkPageAlloc(unsigned int bytes, int flags); +void mpkPageFree(void *ptr); +void *kMutexAlloc(const char *name); +void kMutexFree(void *lock); +void kMutexLock(void *lock); +void kMutexUnlock(void *lock); +struct file *kFileOpen(const char *path, int flags, int mode); +int kFileRead(struct file *filep, void *buf, unsigned int count); +void klseek(struct file *filep, unsigned int pos, int whence); +int filp_close(struct file *filep, void *unused); +int printk(const char *fmt, ...); +LONG OSGetCodePage(void); + +#ifndef IS_ERR +#define IS_ERR(ptr) ((ptr) == NULL) +#endif + +#endif /* NSS_UNICODE_USERSPACE_H */ diff --git a/src/nwnss/CMakeLists.txt b/src/nwnss/CMakeLists.txt index 04de5ea..bdb8ae9 100644 --- a/src/nwnss/CMakeLists.txt +++ b/src/nwnss/CMakeLists.txt @@ -309,6 +309,7 @@ add_library(nwnss SHARED nss/lib/setErrno.c nss/msg/slab.c nsslnxlib/unilib.c + nsslnxlib/unilibUserspace.c lsa/lsaComn.c lsa/lsaErr.c lsa/lsaSuperXattr.c diff --git a/src/nwnss/nsslnxlib/unilib.c b/src/nwnss/nsslnxlib/unilib.c index bc457e6..dde48ad 100644 --- a/src/nwnss/nsslnxlib/unilib.c +++ b/src/nwnss/nsslnxlib/unilib.c @@ -1,497 +1,1556 @@ /**************************************************************************** | - | NSS userspace port of the NSS Unicode rule-table loader. + | (C) Copyright 2003 Novell, Inc. + | All Rights Reserved. | - | The table format and public entry points match public_core/nsslnxlib/unilib.c. - | Kernel file and lock primitives are replaced by libc file IO and process-local - | storage so the userspace NSS build can consume installed/generated NSS-compatible .TAB files. + | This program is free software; you can redistribute it and/or + | modify it under the terms of version 2 of the GNU General Public + | License as published by the Free Software Foundation. + | + | This program is distributed in the hope that it will be useful, + | but WITHOUT ANY WARRANTY; without even the implied warranty of + | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + | GNU General Public License for more details. + | + | You should have received a copy of the GNU General Public License + | along with this program; if not, contact Novell, Inc. + | + | To contact Novell about this file by physical or electronic mail, + | you may find current contact information at www.novell.com + | + |*************************************************************************** + | + | NetWare Advance File Services (NSS) module + | + |--------------------------------------------------------------------------- + | + | $Author: vandana $ + | $Date: 2005-08-10 01:03:51 +0530 (Wed, 10 Aug 2005) $ + | + | $RCSfile$ + | $Revision: 1177 $ + | + |--------------------------------------------------------------------------- + | This module is used to: + | Ported LibC base Unicode services +-------------------------------------------------------------------------*/ + #include -#include +#include #include #include #include -#include +#include -extern char *getenv(const char *name); -extern long strtol(const char *nptr, char **endptr, int base); -extern int atoi(const char *nptr); -extern void *calloc(size_t nmemb, size_t size); -extern void free(void *ptr); -extern int strcasecmp(const char *s1, const char *s2); +int LoadHostLocale(void *whatever, void *NLMHandle); + +// Manifest constants +#define CODEPAGES 52 /* we know of this many so far */ +#define CURRENT_UNI_TAB_VERSION 1 +#define UNI_ERR_MEM_ALLOC (-494) /* no memory for allocation */ +#define UNI_ERR_TABLE_CORRUPT (-498) /* table corruption detected */ +#define UNI_ERR_FILE_OPEN (-501) /* unable to open data file */ +#define UNI_ERR_FILE_READ (-504) /* unable to read data file */ +#define UNI_ERR_UNMAPPABLE_CHAR (-532) /* discovered during translation */ + +#define UNI_UNDEFINED 0xFFFD /* Source code page character has no Unicode conversion */ +#define CP_UNDEFINED 0xFDFF /* Source Unicode character has no code page conversion */ + +#define UNI_MAP_NO_CHAR 0 /* return UNI_ERR_UNMAPPABLE_CHAR */ +#define UNI_MAP_CHAR 1 /* use value in 'noMapChar' unless 0 */ +#define UNI_MAP_BY_FUNC 1 /* use 'noMapFunc' if non-nil */ +#define UNI_MAP_SELF 2 /* use character itself */ + +#define UNI_NOMAP_DEFAULT '?' /* no-map character if 'noMapChar' 0 */ +#define DEFAULT_NOMAP UNI_NOMAP_DEFAULT + +#define MAPPING_TO_UPPER_CASE_BIT 0x08000000 +#define MAPPING_TO_LOWER_CASE_BIT 0x04000000 +#define MAPPING_TO_TITLE_CASE_BIT 0x02000000 + +#define OFFSET_MASK 0x000FFFFF + +#define INVALID_SEQUENCE 0xFFFF -#define CODEPAGES 52 -#define UNI_ERR_MEM_ALLOC (-494) -#define UNI_ERR_BAD_HANDLE (-496) -#define UNI_ERR_TABLE_CORRUPT (-498) -#define UNI_ERR_FILE_OPEN (-501) -#define UNI_ERR_FILE_READ (-504) -#define UNI_ERR_UNMAPPABLE_CHAR (-532) +/* Workarounds */ +#define vm_t void +#define STATIC static +#define nobreak -#define UNI_UNDEFINED 0xFFFD -#define CP_UNDEFINED 0xFDFF -#define UNI_LOCAL_DEFAULT (-1) -#define UNI_MAP_NO_CHAR 0 -#define UNI_MAP_CHAR 1 -#define UNI_MAP_SELF 2 -#define UNI_NOMAP_DEFAULT '?' -#define MAPPING_TO_UPPER_CASE_BIT 0x08000000 -#define MAPPING_TO_LOWER_CASE_BIT 0x04000000 -#define OFFSET_MASK 0x000FFFFF -#define SINGLE_LOOKUP_TAG 0x0F000000 +/* New types */ -typedef struct { - int codepage; - int major; - int minor; - int tabType; - int usageCnt; - int minLowerByte; - int maxLowerByte; - void *cpTab1; - unicode_t *cpTab2; - unsigned short *uniTab1; - unsigned short *uniTab2; -} RuleTable_t; +typedef struct +{ + int codepage; // if nonzero, it's a real table + int major; // major version number + int minor; // minor version number + int tabType; // 1 or 2 for single or double byte +// - 0x10 ---------------------------------------------------------------- + int usageCnt; // how many are still using this table + int minLowerByte; // table range + int maxLowerByte; // table range + void *cpTab1; // suballocation +// - 0x20 ---------------------------------------------------------------- + unicode_t *cpTab2; // ibid + unsigned short *uniTab1; // ibid + unsigned short *uniTab2; // ibid + unsigned short *paddington; // pad this out to even +// - 0x30 ---------------------------------------------------------------- +} RuleTable_t; // in debugger this takes three lines +typedef struct +{ +#define kUniLibSig 0x62694C55 + /* 'ULib' */ +#define padSig 0x2D2D2D2D + /* '----' */ -typedef struct { - int codepage; - UniRuleTable_t defaultHandle; - RuleTable_t tables[CODEPAGES]; + LONG signature; // 'ULib' + LONG flags; // from libflags.h + void *lock; // lock to serialize UniLib's data + int codepage; // OSGetCodePage() at library start-up +//- 0x10 ---------------------------------------------------------------------- + UniRuleTable_t defaultHandle; // handle corresponding to 'codepage' + int paddington[3]; // pad out to start tables on even boundary +//- 0x20 ---------------------------------------------------------------------- + RuleTable_t tables[CODEPAGES]; // rule tables +//- 0x5C0 --------------------------------------------------------------------- + LONG signature2; // 'ULib'--marks end of struct } UniLibGlobalsStruct; -typedef union { - unsigned short word; - char byte[2]; +typedef union // for translation copying operations +{ + unsigned short word; + char byte[2]; } copyu_t; -typedef struct { - const char *name; - const char *dosname; - int codepage; +typedef struct +{ + const char *name; + const char *dosname; + int codepage; // (made-up, pseudo-codepage) } maccp_t; -static UniLibGlobalsStruct UniLibGlobals; -static uint32_t *UniMasterIndex; -static uint32_t *UniDataTable; -static int UniLibStarted; -#ifndef NSS_DEFAULT_CODEPAGE -#define NSS_DEFAULT_CODEPAGE 850 -#endif -#ifndef NSS_UNITABLE_DIRS -#define NSS_UNITABLE_DIRS "" -#endif - -int UniGetTable(int codepage, UniRuleTable_t *handle); - -static maccp_t sMacintoshCodepages[] = { - { "Roman", "ROMAN.TAB", 1 }, - { "Central European", "CENTEURO.TAB", 2 }, - { "centeuro", "CENTEURO.TAB", 2 }, - { "Japanese", "JAPANESE.TAB", 3 }, - { "Korean", "KOREAN.TAB", 4 }, - { "Chinese Simplified", "CHINSIMP.TAB", 5 }, - { "chinsimp", "CHINSIMP.TAB", 5 }, - { "Chinese Traditional", "CHINTRAD.TAB", 6 }, - { "chintrad", "CHINTRAD.TAB", 6 }, - { "Cyrillic", "CYRILLIC.TAB", 8 }, - { "Croatian", "CROATIAN.TAB", 9 }, - { "Devanagari", "DEVANAGA.TAB", 10 }, - { "devanaga", "DEVANAGA.TAB", 10 }, - { "Farsi", "FARSI.TAB", 11 }, - { "Greek", "GREEK.TAB", 12 }, - { "Gujarati", "GUJARATI.TAB", 13 }, - { "Gurmukhi", "GURMUKHI.TAB", 14 }, - { "Hebrew", "HEBREW.TAB", 15 }, - { "Icelandic", "ICELAND.TAB", 16 }, - { "iceland", "ICELAND.TAB", 16 }, - { "Keyboard Characters", "KEYBOARD.TAB", 17 }, - { "keyboard", "KEYBOARD.TAB", 17 }, - { "Romanian", "ROMANIAN.TAB", 18 }, - { "Symbol", "SYMBOL.TAB", 19 }, - { "Thai", "THAI.TAB", 20 }, - { "Turkish", "TURKISH.TAB", 21 }, - { "Ukrainian", "UKRAINE.TAB", 8 }, - { "Ukraine", "UKRAINE.TAB", 8 }, - { "Dingbats", "DINGBATS.TAB", 22 }, - { NULL, NULL, 0 } +/* Global state */ +UniLibGlobalsStruct UniLibGlobals; +LONG *UniMasterIndex = (LONG *) NULL; +LONG *UniDataTable = (LONG *) NULL; +static char sUniTabDir[256] = "/opt/novell/nss/nls/"; +STATIC maccp_t sMacintoshCodepages[] = +{ + { "Roman", "ROMAN.TAB", 1 }, + { "Central European", "CENTEURO.TAB", 2 }, + { "centeuro", "CENTEURO.TAB", 2 }, + { "Japanese", "JAPANESE.TAB", 3 }, + { "Korean", "KOREAN.TAB", 4 }, + { "Chinese Simplified", "CHINSIMP.TAB", 5 }, + { "chinsimp", "CHINSIMP.TAB", 5 }, + { "Chinese Traditional", "CHINTRAD.TAB", 6 }, + { "chintrad", "CHINTRAD.TAB", 6 }, + { "Corporate-zone Characters", "CORPCHAR.TAB", 7 }, + { "corpchar", "CORPCHAR.TAB", 7 }, + { "Cyrillic", "CYRILLIC.TAB", 8 }, + { "Croatian", "CROATIAN.TAB", 9 }, + { "Devanagari", "DEVANAGA.TAB", 10 }, + { "devanaga", "DEVANAGA.TAB", 10 }, + { "Farsi", "FARSI.TAB", 11 }, + { "Greek", "GREEK.TAB", 12 }, + { "Gujarati", "GUJARATI.TAB", 13 }, + { "Gurmukhi", "GURMUKHI.TAB", 14 }, + { "Hebrew", "HEBREW.TAB", 15 }, + { "Icelandic", "ICELAND.TAB", 16 }, + { "iceland", "ICELAND.TAB", 16 }, + { "Keyboard Characters", "KEYBOARD.TAB", 17 }, + { "keyboard", "KEYBOARD.TAB", 17 }, + { "Romanian", "ROMANIAN.TAB", 18 }, + { "Symbol", "SYMBOL.TAB", 19 }, + { "Thai", "THAI.TAB", 20 }, + { "Turkish", "TURKISH.TAB", 21 }, + { "Ukrainian", "UKRAINE.TAB", 8 }, + { "Ukraine", "UKRAINE.TAB", 8 }, + { "Dingbats", "DINGBATS.TAB", 22 }, + { (char *) NULL, (char *) NULL, 0 } }; -static int table_index_for_codepage(int codepage) -{ - int i; - for (i = 0; i < CODEPAGES; i++) { - if (UniLibGlobals.tables[i].codepage == codepage) - return i; - } - for (i = 0; i < CODEPAGES; i++) { - if (UniLibGlobals.tables[i].codepage == 0) - return i; - } - return -1; -} -static int parse_int_env(const char *name, int fallback) -{ - const char *value = getenv(name); - char *end = NULL; - long parsed; - if (!value || !*value) - return fallback; - parsed = strtol(value, &end, 10); - if (!end || *end || parsed <= 0 || parsed > 65535) - return fallback; - return (int)parsed; -} +/* Functional macros */ +#define CPCharSize(ch) ((ch & 0xFF00) == 0) ? 1 : 2 +#define CP2UniType1Lookup(x, table) ((unicode_t *) ((table)->cpTab1))[0xFF & (int)x] +#define CP2UniType2Table1Lookup(x, table) \ + ((LONG *) ((table)->cpTab1))[0xFF & (int)x] +#define CP2UniType2Table2Lookup(dest, x, tab1Output, table) \ + dest = (((0xFF & (int)x) < (table)->minLowerByte) || ((0xFF & (int)x) > (table)->maxLowerByte)) \ + ? UNI_UNDEFINED \ + : (table)->cpTab2[(tab1Output) + ((0xFF & (int)x))]; +#define SecondLookupNotNeeded(tab1Output) \ + (((tab1Output) & 0xFF000000) == 0x0F000000) +#define LIBC_FREE_LOCK(x, y) kMutexFree(x) +#define LIBC_INIT_LOCK(x, y) kMutexAlloc(x) +#define LIBC_LOCK(x) kMutexLock(x) +#define LIBC_UNLOCK(x) kMutexUnlock(x) -LONG OSGetCodePage(void) -{ - return parse_int_env("NSS_CODEPAGE", NSS_DEFAULT_CODEPAGE); -} +#define inrange(lo, x, hi) ((x) >= (lo) && (x) <= (hi)) +#define GET_TABLE(vm, handle) \ + (inrange(0, handle, CODEPAGES-1) \ + && UniLibGlobals.tables[handle].codepage) \ + ? &UniLibGlobals.tables[handle] \ + : &UniLibGlobals.tables[UniLibGlobals.defaultHandle] +#define XLATION(copy) copy.word +#define TWO_BYTE_XLATION(copy) (copy.byte[1] != '\0') +#define FIRST_BYTE(copy) copy.byte[0] +#define SECOND_BYTE(copy) copy.byte[1] +#define UNI_TO_LOC(ch, utab1, utab2) utab2[utab1[(ch) >> 8] + ((ch) & 0x00FF)] +#define LOC_TO_UNI(ch, cpTab1) ((unicode_t *) cpTab1)[0xFF & (int)ch] +#define LOC_TO_UNI1(ch, cpTab1) ((LONG *) cpTab1)[0xFF & (int)ch] +#define LOC_TO_UNI2(ch, min, max, tab1Output, cpTab2) \ + (((0xFF & (int)ch) < min) || ((0xFF & (int)ch) > max)) ? UNI_UNDEFINED : cpTab2[(tab1Output) + (0xFF & (int)ch)] +#define SINGLE_LOOKUP(tab1Output) (((tab1Output) & 0xFF000000) == 0x0F000000) -static const char *table_name_for_codepage(int codepage, char *buffer, size_t size) -{ - snprintf(buffer, size, "UNI_%03d.TAB", codepage); - return buffer; -} -static FILE *open_table_file(const char *name) -{ - const char *env_dir = getenv("NSS_UNITABLE_DIR"); - const char *dirs = NSS_UNITABLE_DIRS; - char path[1024]; - char dirs_copy[2048]; - char *save = NULL; - char *dir; - FILE *file; - if (env_dir && *env_dir) { - snprintf(path, sizeof(path), "%s/%s", env_dir, name); - file = fopen(path, "rb"); - if (file) - return file; + +/* Prototypes */ +int InitStaticCollationAndCaseTables(void); +int ReadCollationAndCaseTables(void *index, void *data); +int lnxLB_atoi(const char *src); + + +/* Functional replacements for LibC aloc macro */ +void *LIBALLOC(unsigned int bytes, int unused) +{ + void *result; + unsigned pages; + unsigned slack; + + (void)unused; + + pages = bytes / PAGE_SIZE; + slack = bytes % PAGE_SIZE; + if (slack > 0) + { + pages++; } - if (dirs && *dirs) { - snprintf(dirs_copy, sizeof(dirs_copy), "%s", dirs); - for (dir = strtok_r(dirs_copy, ":", &save); dir; dir = strtok_r(NULL, ":", &save)) { - if (!*dir) - continue; - snprintf(path, sizeof(path), "%s/%s", dir, name); - file = fopen(path, "rb"); - if (file) - return file; - } + result = mpkPageAlloc(bytes, MPK_SLEEP_OK); + if (result != NULL) + { + memset(result, 0, bytes); } - return fopen(name, "rb"); + return result; } +#define LIBFREE(x) mpkPageFree(x) -static int read_exact(FILE *file, void *buf, size_t bytes) +/* Scary stuff to permit LibC NetWare specifics to compile */ +struct { - return fread(buf, 1, bytes, file) == bytes ? 0 : UNI_ERR_FILE_READ; -} + int allocRTag; +} LibCGlobals = {0}; -static int read_alloc(FILE *file, void **buf, size_t size, size_t count) + +/* Initialisation entry point for library. Sets up global state */ +int InitUniLibGlobals(void) { - size_t bytes = size * count; - *buf = calloc(count, size); - if (!*buf) - return UNI_ERR_MEM_ALLOC; - if (read_exact(file, *buf, bytes)) { - free(*buf); - *buf = NULL; - return UNI_ERR_FILE_READ; + int err; + + mpkEnter(); + + memset(&UniLibGlobals, '\0', sizeof(UniLibGlobals)); + + UniLibGlobals.signature = + UniLibGlobals.signature2 = kUniLibSig; + UniLibGlobals.paddington[0] = + UniLibGlobals.paddington[1] = + UniLibGlobals.paddington[2] = padSig; + UniLibGlobals.lock = LIBC_INIT_LOCK("Unicode Tables", (vm_t *) NULL); + + /* FIXFIXFIX - Fake the code page to 437 for now but we need a persistent + * way to store the code page for the host */ + UniLibGlobals.codepage = OSGetCodePage(); + + if (!UniLibGlobals.lock) + { + err = -1; + goto Exit; } + +/* +** Load a default rule table as well as the collation and casing tables which +** are always memory-resident. +*/ + err = InitStaticCollationAndCaseTables(); + + LoadHostLocale(NULL, NULL); + + if (err) + LIBC_FREE_LOCK(UniLibGlobals.lock, NULL); + +Exit: + mpkExit(); + + return err; +} + + +/* Shutdown entry point for library. Cleans up global state */ +void DeinitUniLibGlobals(void) +{ + if (UniDataTable) + { + LIBFREE(UniDataTable); + } + + if (UniMasterIndex) + { + LIBFREE(UniMasterIndex); + } + + if (UniLibGlobals.lock) + { + LIBC_FREE_LOCK(UniLibGlobals.lock, NULL); + } +} + + +/* Return the current code page */ +int UniGetHostCodePage( void ) +{ + return UniLibGlobals.codepage; +} + + +/* Initialise Unicode collation and case conversion tables */ +int InitStaticCollationAndCaseTables( void ) +{ + int err; + LONG *new1, *new2; + unsigned int size; + + if (UniMasterIndex) // already initialized... + { + return 0; + } + +/* +** If these tables haven't been read in yet, do it. +*/ + size = 0x10000 * sizeof(LONG); + new1 = (LONG *) LIBALLOC(size, LibCGlobals.allocRTag); + if (!new1) + { + return ENOMEM; + } + + size = 46507 * sizeof(LONG); + new2 = (LONG *) LIBALLOC(size, LibCGlobals.allocRTag); + if (!new2) + { + LIBFREE(new1); + return ENOMEM; + } + + err = 0; + + LIBC_LOCK(UniLibGlobals.lock); + +/* +** After acquiring lock, check to see if the data hasn't already been allocated +** and set. If so, then it's done and we can toss what we've just done. +*/ + if (UniMasterIndex || (err = ReadCollationAndCaseTables(new1, new2))) + { + LIBFREE(new2); + LIBFREE(new1); + LIBC_UNLOCK(UniLibGlobals.lock); + return (err) ? err : 0; + } + + UniMasterIndex = new1; + UniDataTable = new2; + LIBC_UNLOCK(UniLibGlobals.lock); + return 0; } -static void uniFreeTable(RuleTable_t *table) + +/* Multiple string concatenation */ +char *strlist +( + char *s1, + const char *s2, + ... +) { - free(table->cpTab1); - free(table->cpTab2); - free(table->uniTab1); - free(table->uniTab2); + register char *tgt = s1; + va_list ap; + + va_start(ap, s2); + + *tgt = '\0'; + + while (s2) /* there is another string... */ + { + while ((*tgt = *s2++)) + ++tgt; + + s2 = va_arg(ap, const char *); + } + + va_end(ap); + + return s1; /* points to beginning of target unlike USL version */ +} + + +/* Read the collation and case tables */ +int ReadCollationAndCaseTables(void *index, void *data) +{ + int err; + char filepath[256]; + struct file *filep; + int bytesRead; + unsigned int size, filePosition; + static char *UNI_000_TAB = "UNI_000.TAB"; + +/* +** Basically, special reading of UNI_000.TAB. Read in the collation and case +** tables which will remain resident until the server goes down. These tables +** are in "UNI_000.TAB". +*/ + err = -1; + + if (sUniTabDir[0]) + { + strlist(filepath, sUniTabDir, UNI_000_TAB, (char *) NULL); + filep = kFileOpen(filepath, O_RDONLY, 0); + if (IS_ERR(filep)) + { + printk("NSSLNXLIB: Failed to open Unicode table file\n"); + return -1; + } + } + else + { + return -1; + } + + +#if 0 + if (err) + { + // if this fails, all hope is lost... + if (err = ProposeTableOnServerLoadPath(sUniTabDir, TRUE)) + return UNI_ERR_FILE_OPEN; + + strlist(filepath, sUniTabDir, UNI_000_TAB, (char *) NULL); + + if (err = DOSOpenWithAccess(filepath, &fh, 0)) + { + if (err = ProposeTableOnServerLoadPath(sUniTabDir, FALSE)) + return UNI_ERR_FILE_OPEN; + + strlist(filepath, sUniTabDir, UNI_000_TAB, (char *) NULL); + + if (err = DOSOpenWithAccess(filepath, &fh, 0)) + { + // set sUniTabDir so that it will be recalculated + sUniTabDir[0] = '\0'; + + /* + ** Try LibC's load path only for collation and case tables. In the + ** NetWare 5 overlay install this is the only place they exist. + */ + sprintf(filepath, "%sNLS\\UNI_000.TAB", + LibCGlobals.handle->LDFullPath); + if (err = DOSOpenWithAccess(filepath, &fh, 0)) + return UNI_ERR_FILE_OPEN; + } + } + } +#endif + + filePosition = 0; + + size = 0x10000 * sizeof(LONG); + filep->f_pos = 0; + bytesRead = kFileRead(filep, index, size); + if ((bytesRead < 0) || (bytesRead != size)) + { + printk("NSSLNXLIB: Failed to read %d collation table bytes for file %p: %d\n", size, filep, bytesRead); + err = UNI_ERR_FILE_READ; + goto Exit; + } + + filePosition += size; + + size = 46507 * sizeof(LONG); + bytesRead = kFileRead(filep, data, size); + + if ((bytesRead < 0) || (bytesRead != size)) + { + printk("NSSLNXLIB: Failed to read case table: %d\n", bytesRead); + err = UNI_ERR_FILE_READ; + goto Exit; + } + + err = 0; + +Exit: + (void)filp_close(filep, NULL); + + return err; +} + + +STATIC int uniAllocAndRead(struct file *filep, void **buf, unsigned int *filePosition, unsigned int size, unsigned int count) +{ + int err = 0; + int bytesRead; + unsigned int total; + + total = size * count; + + *buf = LIBALLOC(total, LibCGlobals.allocRTag); + + if (!*buf) + return UNI_ERR_MEM_ALLOC; + + klseek(filep, *filePosition, 0); + bytesRead = kFileRead(filep, *buf, total); + + if (bytesRead == total) + { + (*filePosition) += bytesRead; + } + + return (err || bytesRead != total) ? UNI_ERR_FILE_READ : 0; +} + + +STATIC void uniFreeTable(RuleTable_t *table) +{ + // free up allocated structures and zero-out the table... + if (table->cpTab1) + LIBFREE(table->cpTab1); + + if (table->cpTab2) + LIBFREE(table->cpTab2); + + if (table->uniTab1) + LIBFREE(table->uniTab1); + + if (table->uniTab2) + LIBFREE(table->uniTab2); + memset(table, 0, sizeof(*table)); } -static int read_collation_and_case_tables(void) + +STATIC int uniLoadUni2MBTables(struct file *filep, RuleTable_t *table, unsigned int *filePosition) { - FILE *file = open_table_file("UNI_000.TAB"); - size_t master_bytes = 0x10000u * sizeof(uint32_t); - size_t data_bytes = 46507u * sizeof(uint32_t); - if (!file) - return UNI_ERR_FILE_OPEN; - UniMasterIndex = calloc(0x10000u, sizeof(uint32_t)); - UniDataTable = calloc(46507u, sizeof(uint32_t)); - if (!UniMasterIndex || !UniDataTable) { - fclose(file); - free(UniMasterIndex); - free(UniDataTable); - UniMasterIndex = NULL; - UniDataTable = NULL; - return UNI_ERR_MEM_ALLOC; + int err = 0; + unsigned int tab2Size; + unsigned int bytesRead; + + // read size of buffer needed to hold phase 2 tables... + klseek(filep, *filePosition, 0); + bytesRead = kFileRead(filep, (char *)&tab2Size, sizeof(unsigned int)); + + if (bytesRead == sizeof(unsigned int)) + { + *filePosition += bytesRead; } - if (read_exact(file, UniMasterIndex, master_bytes) || read_exact(file, UniDataTable, data_bytes)) { - fclose(file); - free(UniMasterIndex); - free(UniDataTable); - UniMasterIndex = NULL; - UniDataTable = NULL; - return UNI_ERR_FILE_READ; + + if (err || (bytesRead != sizeof(unsigned int))) + { + err = UNI_ERR_FILE_READ; + goto Exit; } - fclose(file); - return 0; + + // load first lookup table... + if ((err = uniAllocAndRead(filep, (void **)&table->uniTab1, filePosition, + sizeof(unsigned short), 256))) + goto Exit; + + // load phase 2 tables... + err = uniAllocAndRead(filep, (void **)&table->uniTab2, filePosition, + sizeof(unsigned short), tab2Size); +Exit : + return err; } -static int init_unilib(void) + +STATIC int uniLoadMB2UniTables(struct file *filep, RuleTable_t *table, unsigned int *filePosition) { - if (UniLibStarted) + int err = 0; + unsigned int bytesRead; + LONG tab2Size; + unsigned int size; + + // get the lookup table type. 1 - single lookup, 2 - double lookup... + klseek(filep, *filePosition, 0); + bytesRead = kFileRead(filep, (char *)&table->tabType, sizeof(LONG)); + if (bytesRead == sizeof(LONG)) + { + *filePosition += bytesRead; + } + + if (err || (bytesRead != sizeof(LONG))) + { + err = UNI_ERR_FILE_READ; + goto Exit; + } + + size = (table->tabType == 1) ? sizeof(unicode_t) : sizeof(LONG); + + // load data for table 1 of the two-phased lookup table... + err = uniAllocAndRead(filep, &table->cpTab1, filePosition, size, 256); + + if (err) + goto Exit; + + // read phase II look-up table... + if (table->tabType == 2) + { + bytesRead = kFileRead(filep, (char *)&table->minLowerByte, sizeof(LONG)); + if (bytesRead == sizeof(LONG)) + { + *filePosition += bytesRead; + } + + if (err || (bytesRead != sizeof(LONG))) + { + err = UNI_ERR_FILE_READ; + goto Exit; + } + + bytesRead = kFileRead(filep, (char *)&table->maxLowerByte, sizeof(LONG)); + if (bytesRead == sizeof(LONG)) + { + *filePosition += bytesRead; + } + + if (err || (bytesRead != sizeof(LONG))) + { + err = UNI_ERR_FILE_READ; + goto Exit; + } + + bytesRead = kFileRead(filep, (char *)&tab2Size, sizeof(LONG)); + if (bytesRead == sizeof(LONG)) + { + *filePosition += bytesRead; + } + + if (err || (bytesRead != sizeof(LONG))) + { + err = UNI_ERR_FILE_READ; + goto Exit; + } + + // load table 2... + err = uniAllocAndRead(filep, (void **)&table->cpTab2, filePosition, + sizeof(unicode_t), tab2Size); + } + +Exit : + return err; +} + + +STATIC int uniCheckHeader(struct file *filep, RuleTable_t *table, unsigned int *filePosition) +{ + int err = 0; + char buf[64], *ptr, *endPtr; + unsigned int bytesRead; + + (void)klseek(filep, 0, 0); + bytesRead = kFileRead(filep, buf, sizeof(buf)); + + if (err || bytesRead != sizeof(buf)) + { + printk("uniCheckHeader read problem, err = %X, bytes read = %d\n", err, bytesRead); + err = UNI_ERR_FILE_READ; + goto Exit; + } + + // look for "Version" string in the header... + if (!(ptr = strstr(buf, "Version "))) + { + err = UNI_ERR_TABLE_CORRUPT; + goto Exit; + } + + ptr += 8; // as if strlen("Version ") + + // find the '.' in the version number... + endPtr = strchr(ptr, '.'); + + *endPtr = '\0'; + table->major = lnxLB_atoi(ptr); + table->minor = lnxLB_atoi(endPtr + 1); + + /* + ** Check version number of table. The major version indicates changes in + ** the format and function of the table. The minor version is incremented + ** when the content of the table changes. If the major version number is + ** greater than CURRENT_UNI_TAB_VERSION than it is not compatible with this + ** build of the library. + */ + if (table->major > CURRENT_UNI_TAB_VERSION) + err = UNI_ERR_TABLE_CORRUPT; + + // file header is 256 bytes... + (*filePosition) += 256; + +Exit : + return err; +} + + +#if 0 +STATIC int uniVerifyCheckSum(struct file *filep) +{ + int err = 0; + unsigned int filePosition; + unsigned int bytesRead; + unsigned short checkSum, currentBytes, previousBytes; + + checkSum = + previousBytes = + filePosition = 0; + + klseek(filep, 0, 0); + bytesRead = kFileRead(filep, (char *)¤tBytes, sizeof(currentBytes)); + return 0; + + while (err == 0 && bytesRead == 2) + { + checkSum += previousBytes; + previousBytes = currentBytes; + filePosition += bytesRead; + + bytesRead = kFileRead(filep, (char *)¤tBytes, sizeof(currentBytes)); + } + + if (err) + return UNI_ERR_FILE_READ; + + if (currentBytes != checkSum) + return UNI_ERR_TABLE_CORRUPT; + + return 0; +} +#endif + +STATIC int uniTableHandleFromCodepage(int codepage) +{ + int tab, emptySlot; + +/* +** A rule handle is a number that serves as subscript to the array of Unicode +** translation tables. This function translates between a codepage and a +** handle. If codepage 0 is passed (not a real codepage), then this function's +** other semantic is to find the next empty slot for the caller's use. +*/ + if (codepage) + { + for (tab = 0; tab < CODEPAGES; tab++) + { + if (UniLibGlobals.tables[tab].codepage == codepage) + return tab; + } + + return -1; // not found + } + + for (emptySlot = 0; emptySlot < CODEPAGES; emptySlot++) + { + if (!UniLibGlobals.tables[emptySlot].codepage) + return emptySlot; + } + + return -1; // no empty slot found +} + + +STATIC int OpenUnicodeTable(int codepage, const char *filename, UniRuleTable_t *handle) +{ + int err, tab; + char filepath[256]; + unsigned int filePosition; + struct file *filep = NULL; + RuleTable_t table; + + printk("Opening Unicode table %d\n", codepage); + +/* +** Here we have a request to get a translation table for a specific codepage. +** Ordinarily, most applications will just expect translation between Unicode +** and the local codepage, but we offer complete linguistic freedom. We have +** allocated enough blank tables to handle all the codepages NetWare supported +** in March 2001. They should never get used up because we fold requests for +** tables using the 'usageCnt' field. If callers complain they get +** UNI_ERR_TABLE_CORRUPT back from this function, it signals that the number of +** possible translations has increased or something is wrong in our code. +*/ + if (codepage == 0) // means just give me the default one... + { + printk("Opening default table\n"); + *handle = (UniRuleTable_t) UniLibGlobals.defaultHandle; + LIBC_LOCK(UniLibGlobals.lock); + UniLibGlobals.tables[*handle].usageCnt++; + LIBC_UNLOCK(UniLibGlobals.lock); return 0; - memset(&UniLibGlobals, 0, sizeof(UniLibGlobals)); - UniLibGlobals.codepage = OSGetCodePage(); - if (read_collation_and_case_tables()) - return UNI_ERR_FILE_OPEN; - if (UniGetTable(UniLibGlobals.codepage, &UniLibGlobals.defaultHandle)) - return UNI_ERR_FILE_OPEN; - UniLibStarted = 1; - return 0; -} - -static int uniLoadUni2MBTables(FILE *file, RuleTable_t *table) -{ - uint32_t tab2Size; - if (read_exact(file, &tab2Size, sizeof(tab2Size))) - return UNI_ERR_FILE_READ; - if (read_alloc(file, (void **)&table->uniTab1, sizeof(unsigned short), 256)) - return UNI_ERR_FILE_READ; - return read_alloc(file, (void **)&table->uniTab2, sizeof(unsigned short), tab2Size); -} - -static int uniLoadMB2UniTables(FILE *file, RuleTable_t *table) -{ - int32_t tab2Size; - if (read_exact(file, &table->tabType, sizeof(int32_t))) - return UNI_ERR_FILE_READ; - if (table->tabType != 1 && table->tabType != 2) - return UNI_ERR_TABLE_CORRUPT; - if (read_alloc(file, &table->cpTab1, table->tabType == 1 ? sizeof(unicode_t) : sizeof(int32_t), 256)) - return UNI_ERR_FILE_READ; - if (table->tabType == 2) { - if (read_exact(file, &table->minLowerByte, sizeof(int32_t)) || - read_exact(file, &table->maxLowerByte, sizeof(int32_t)) || - read_exact(file, &tab2Size, sizeof(int32_t))) - return UNI_ERR_FILE_READ; - if (tab2Size < 0) - return UNI_ERR_TABLE_CORRUPT; - return read_alloc(file, (void **)&table->cpTab2, sizeof(unicode_t), (size_t)tab2Size); } - return 0; -} -static int parse_version(const unsigned char header[256], int *major, int *minor) -{ - const char *marker = "Version "; - const char *pos = strstr((const char *)header, marker); - if (!pos) + // now try for an actual table... + tab = uniTableHandleFromCodepage(codepage); + + if (tab != -1) + { + printk("Found loaded Unicode table %d\n", codepage); + // table found to be already loaded... + *handle = tab; // make sure caller gets this back! + LIBC_LOCK(UniLibGlobals.lock); + UniLibGlobals.tables[tab].usageCnt++; + LIBC_UNLOCK(UniLibGlobals.lock); + return 0; + } + + // not there? let's get an empty slot for loading into then... + tab = uniTableHandleFromCodepage(0); + + if (tab == -1) // we should have had enough room for all tables... return UNI_ERR_TABLE_CORRUPT; - pos += strlen(marker); - *major = atoi(pos); - pos = strchr(pos, '.'); - *minor = pos ? atoi(pos + 1) : 0; - return 0; -} -static int OpenUnicodeTable(int codepage, const char *filename, UniRuleTable_t *handle) -{ - FILE *file; - unsigned char header[256]; - RuleTable_t new_table; - int slot; - char generated_name[32]; + memset(&table, 0, sizeof(table)); - if (!handle) - return UNI_ERR_BAD_HANDLE; - if (!UniMasterIndex && init_unilib()) - return UNI_ERR_FILE_OPEN; + // initialize UniRuleTable_t struct... + table.codepage = codepage; - for (slot = 0; slot < CODEPAGES; slot++) { - if (UniLibGlobals.tables[slot].codepage == codepage) { - UniLibGlobals.tables[slot].usageCnt++; - *handle = slot; - return 0; +/*----------------------------------------------------------------------------- +** Read in a new table: +** This table not already loaded, but there is room for it. First, try out the +** path we already have (if in fact we have done this before). That failing, +** try out a couple of other different ways and remember how we did it for next +** time. +**----------------------------------------------------------------------------- +*/ + if (sUniTabDir[0]) + { + strlist(filepath, sUniTabDir, filename, (char *) NULL); + filep = kFileOpen(filepath, O_RDONLY, 0); + if (IS_ERR(filep)) + { + printk("Error opening Unicode table file: %s\n", filepath); + err = -1; + goto Exit; } } - if (!filename) - filename = table_name_for_codepage(codepage, generated_name, sizeof(generated_name)); - file = open_table_file(filename); - if (!file) - return UNI_ERR_FILE_OPEN; - memset(&new_table, 0, sizeof(new_table)); - if (read_exact(file, header, sizeof(header)) || parse_version(header, &new_table.major, &new_table.minor) || - uniLoadMB2UniTables(file, &new_table) || uniLoadUni2MBTables(file, &new_table)) { - fclose(file); - uniFreeTable(&new_table); - return UNI_ERR_FILE_READ; +#if 0 + if (err) + { + if (err = ProposeTableOnServerLoadPath(sUniTabDir, TRUE)) + return UNI_ERR_FILE_OPEN; + + strlist(filepath, sUniTabDir, filename, (char *) NULL); + + if (err = DOSOpenWithAccess(filepath, &fh, 0)) + { + // now try in the current directory + if (err = ProposeTableOnServerLoadPath(sUniTabDir, FALSE)) + return UNI_ERR_FILE_OPEN; + + strlist(filepath, sUniTabDir, filename, (char *) NULL); + + if (err = DOSOpenWithAccess(filepath, &fh, 0)) + return UNI_ERR_FILE_OPEN; + } } - fclose(file); - slot = table_index_for_codepage(codepage); - if (slot < 0) { - uniFreeTable(&new_table); - return UNI_ERR_MEM_ALLOC; + if ((err = uniVerifyCheckSum(filep))) + { + printk("Checksum error opening Unicode table file: %s\n", filepath); + goto CloseExit; } - new_table.codepage = codepage; - new_table.usageCnt = 1; - UniLibGlobals.tables[slot] = new_table; - *handle = slot; - return 0; + + goto CloseExit; +#endif + + filePosition = 0; + + if ((err = uniCheckHeader(filep, &table, &filePosition))) + { + printk("Header error opening Unicode table file: %s\n", filepath); + goto CloseExit; + } + + if ((err = uniLoadMB2UniTables(filep, &table, &filePosition))) + { + printk("Error loading MB2 Unicode table file: %s\n", filepath); + goto CloseExit; + } + + if ((err = uniLoadUni2MBTables(filep, &table, &filePosition))) + { + printk("Error loading 2MB Unicode table file: %s\n", filepath); + goto CloseExit; + } + + table.usageCnt = 1; + +/* +** Now that the table has been allocated, check to see if someone beat us to +** the punch and has already set it in the array. If so, then just dump the +** resources we consumed, bump the usage counter in the (new, but not the one +** we allocated) table and return. If the table slot is already in use (and +** allocated), the table slot will be non-null. +*/ + LIBC_LOCK(UniLibGlobals.lock); + + if (UniLibGlobals.tables[tab].codepage) + { + uniFreeTable(&table); + UniLibGlobals.tables[tab].usageCnt++; + } + else + { + // structure assignment here... + UniLibGlobals.tables[tab] = table; + } + + LIBC_UNLOCK(UniLibGlobals.lock); + + *handle = tab; // make sure caller gets this back! + +CloseExit: + (void)filp_close(filep, NULL); + +Exit: + printk("Open Unicode table status %d\n", err); + return err; } + int UniGetTable(int codepage, UniRuleTable_t *handle) { - return OpenUnicodeTable(codepage, NULL, handle); + char dosname[16]; + + sprintf(dosname, "UNI_%03d.TAB", codepage); + + return OpenUnicodeTable(codepage, dosname, handle); } + +int LoadHostLocale(void *whatever, void *NLMHandle) +{ + int err, codepage; + UniRuleTable_t handle; + + (void)whatever; /* Unused argument */ + + printk("Loading host locale\n"); + +/* +** This function is always called once every boot since the LCONFIG.SYS file +** isn't read by the OS until long after we load. This is very important for +** non-437 locales since the real, default Unicode translation won't be set +** up until it does happen. Meanwhile, early-loading NLMs like drivers will +** get page 437 behavior. If they want to keep this behavior, they should be +** careful with Unicode that they do not use the default table since it will +** be changing. +** +** If a host locale change is performed very often, there will be hell to pay +** in filepath translation for NKS and even POSIX/ANSI atop NSS. +*/ +#if 0 + BuildDBCSTableFromVector(); + GetUpperCaseTable(); + DeriveLowerCaseTable(); + GetCollationTable(); + LoadMessages(NLMHandle, LibCGlobals.console); +#endif + // since this probably involved a new codepage... + err = UniGetTable(codepage = UniLibGlobals.codepage, &handle); + + // ...we'll change all this, but this could be devastating to filepaths! + if (!err) + { + UniLibGlobals.defaultHandle = handle; + UniLibGlobals.codepage = codepage; + UniLibGlobals.tables[handle].usageCnt = 0x10; + } + + printk("Loaded host locale\n"); + + return 0; +} + + +int lnxLB_stricmp(const char *s, const char *t) +{ + unsigned char c1; + unsigned char c2; + + for(;;) { + c1 = *s; + if( c1 >= 'A' && c1 <= 'Z' ) c1 += 'a' - 'A'; + c2 = *t; + if( c2 >= 'A' && c2 <= 'Z' ) c2 += 'a' - 'A'; + if( c1 != c2 ) break; + if( c1 == '\0' ) break; + ++s; + ++t; + } + return( c1 - c2 ); +} + + +int lnxLB_isdigit(const char ch) +{ + return ((ch >= '0') && (ch <= '9')); +} + + +int lnxLB_atoi(const char *src) +{ + int result = 0; + int doMinus = 0; + + if (src == NULL) + { + return 0; + } + + doMinus = (*src == '-'); + if (doMinus) + { + src++; + } + + while (*src) + { + if (!lnxLB_isdigit(*src)) + { + return 0; + } + + result *= 10; + if (doMinus) + { + result -= (*src - '0'); + } + else + { + result += (*src - '0'); + } + + src++; + } + + return result; +} + + +static int hexval +( + int ch +) +{ + switch (ch) + { + case '0' : return 0; + case '1' : return 1; + case '2' : return 2; + case '3' : return 3; + case '4' : return 4; + case '5' : return 5; + case '6' : return 6; + case '7' : return 7; + case '8' : return 8; + case '9' : return 9; + case 'A' : + case 'a' : return 10; + case 'B' : + case 'b' : return 11; + case 'C' : + case 'c' : return 12; + case 'D' : + case 'd' : return 13; + case 'E' : + case 'e' : return 14; + case 'F' : + case 'f' : return 15; + default : break; + } + + return -1; // (not 7-bit ASCII hexadecimal value) +} + +unicode_t unichar; + +unicode_t ConvertSpecialSequence +( + char *src // presumably contains "[XxYy]" sequence +) +{ + int ch, set, ch1, ch2; + +/* +** The special sequence, "[XxYy]", where XxYy are valid hexadecimal characters +** in the range defined by hexval(), is converted to Unicode character 0xXxYy. +** Since 0xFFFF is not a valid Unicode character (it is permitted to be a +** sentinel value per Unicode Version 1.0 document, page 623 near bottom), +** this function disallows it and uses it as indication of error. +*/ + if (*(src+5) != ']') // (quick, easy and useful) + return INVALID_SEQUENCE; + + src++; // skip the opening '[' + + // convert first two digits into the character set... + ch1 = hexval(*src++); + ch2 = hexval(*src++); + + if (ch1 == -1 || ch2 == -1) + return INVALID_SEQUENCE; + + ch1 *= 16; + set = ch1 + ch2; + + // convert second two digits into the character... + ch1 = hexval(*src++); + ch2 = hexval(*src++); + + if (ch1 == -1 || ch2 == -1) + return INVALID_SEQUENCE; + + ch1 *= 16; + ch = ch1 + ch2; + + // construct the result and return it... + unichar = set << 8; + unichar |= ch; + + return unichar; +} + + +/* Convert a string from code page to Unicode */ +int loc2uni +( + UniRuleTable_t handle, // 0 means all characters from Unicode 0 + unicode_t *dest, + const char *src, + unicode_t noMapCh, + int noMapFlag +) +{ + int err, indx, min , max; + void *cpTab1; + unicode_t *cpTab2; +// vm_t *vm; + RuleTable_t *table; + +/* +** Translate the in-coming string from the local codepage into Unicode. The +** comments in this function hold more or less for its derivatives where the +** comments aren't reliably repeated. +*/ + err = 0; + table = GET_TABLE(vm, handle); + cpTab1 = table->cpTab1; + + if (table->tabType == 1) // simple, single pass lookup + { + while (*src) + { + *dest = LOC_TO_UNI(*src, cpTab1); + + if (*dest == UNI_UNDEFINED) + { + switch (noMapFlag) + { + default : + case UNI_MAP_NO_CHAR : + return UNI_ERR_UNMAPPABLE_CHAR; + case UNI_MAP_CHAR : + *dest = (noMapCh == 0x0000) ? UNI_UNDEFINED : noMapCh; + break; + case UNI_MAP_SELF : + *dest = *src; + break; + } + } + + dest++; + src++; + } + + goto Exit; + } + + // double-byte codepage--possibly two lookups... + min = table->minLowerByte; + max = table->maxLowerByte; + cpTab2 = table->cpTab2; + + while (*src) + { + char save = *src; + + indx = LOC_TO_UNI1(*src, cpTab1); + + if (SINGLE_LOOKUP(indx)) + { // gives us the whole Unicode character + *dest = (unicode_t) indx; + } + else + { + src++; // now, skip the first byte of the two + *dest = LOC_TO_UNI2(*src, min, max, indx, cpTab2); + } + + if (*dest == UNI_UNDEFINED) // handle unmapped characters + { + char *p; + + switch (noMapFlag) + { + default : + case UNI_MAP_NO_CHAR : + err = UNI_ERR_UNMAPPABLE_CHAR; + goto Exit; + case UNI_MAP_CHAR : + *dest = (noMapCh == 0x0000) ? UNI_UNDEFINED : noMapCh; + break; + case UNI_MAP_SELF : + p = (char *) dest; + *p++ = save; + *p = *src; + break; + } + } + + src++; + dest++; + } + +Exit : + *dest = 0x0000; + + return err; +} + + +int loc2unipath +( + UniRuleTable_t handle, // 0 means all characters from Unicode 0 + unicode_t *dest, + const char *source, + size_t *dryRunSize // don't copy, only measure if present +) +{ + int multibyte; +// vm_t *vm; + unicode_t unichar; + RuleTable_t *table; + register char *src; + + if (dryRunSize) // don't copy, only count up number of... + *dryRunSize = 0; // ...bytes needed to render + + table = GET_TABLE(vm, handle); + src = (char *) source; + + multibyte = !(table->tabType == 1); + + if (dryRunSize) // measure string only... + { + while (*src) + { + if (*src == '[' && (ConvertSpecialSequence(src) != INVALID_SEQUENCE)) + src += 5; // this sequence is one character + else if (multibyte + && !(SecondLookupNotNeeded(CP2UniType2Table1Lookup(*src, table)))) + src++; // double-byte sequence is one character + + src++; + (*dryRunSize)++; + } + + return 0; + } + + while (*src) // perform the translation for real... + { + switch (*src) + { + case '/' : // preserve directory path delimiters + case '\\' : + case ':' : + *dest = (unicode_t) *src; + break; + + case '[' : // enforced, special byte sequence "[XxYy]" + if ((unichar = ConvertSpecialSequence(src) != INVALID_SEQUENCE)) + { + *dest = unichar; + src += 5; // will bump for 6th character at bottom + break; + } + + nobreak; // handle as any other ']' + + default : + if (multibyte) // multibyte codepage: up to two look-ups + { + int index = CP2UniType2Table1Lookup(*src, table); + + if (SecondLookupNotNeeded(index)) + { + *dest = (unicode_t) index; + } + else + { + src++; // skip switch (first) byte + CP2UniType2Table2Lookup(*dest, *src, index, table); + } + } + else // simple, single pass look-up + { + *dest = CP2UniType1Lookup(*src, table); + } + + if (*dest == UNI_UNDEFINED) + return UNI_ERR_UNMAPPABLE_CHAR; + + break; + } + + dest++; + src++; + } + + *dest = 0x0000; // null-terminate the output + return 0; +} + + +int uni2loc +( + UniRuleTable_t handle, // 0 means all characters from Unicode 0 + char *dest, + const unicode_t *src, + char noMapCh, + int noMapFlag +) +{ + int err; + unsigned short *uniTab1, *uniTab2; +// vm_t *vm; + RuleTable_t *table; + + err = 0; + table = GET_TABLE(vm, handle); + uniTab1 = table->uniTab1; + uniTab2 = table->uniTab2; + + if (noMapFlag == UNI_MAP_CHAR && noMapCh == 0) + noMapCh = DEFAULT_NOMAP; + + while (*src) + { + copyu_t copy; + + copy.word = UNI_TO_LOC(*src, uniTab1, uniTab2); + + if (copy.word != CP_UNDEFINED) + { + *dest++ = FIRST_BYTE(copy); + + if (TWO_BYTE_XLATION(copy))// (may go over here*) + *dest++ = SECOND_BYTE(copy); + } + else + { + switch (noMapFlag) // handle unmapped characters + { + default : + case UNI_MAP_NO_CHAR : + err = UNI_ERR_UNMAPPABLE_CHAR; + break; + case UNI_MAP_CHAR : + *dest++ = (noMapCh == 0) ? DEFAULT_NOMAP : noMapCh; + break; + case UNI_MAP_SELF : + *dest++ = *src & 0x00FF; + break; + } + } + + src++; + } + + // null-terminate the string... + *dest = '\0'; + + return err; +} + + +const char sHex[] = "0123456789ABCDEF"; + +int uni2locpath +( + UniRuleTable_t handle, // 0 means local host code page + char *dest, // rule of thumb: unilen(src) + 20% + const unicode_t *src, + size_t *dryRunSize // don't copy, only measure if present +) +{ + unsigned short ch; // holds potentially two byte characters + unsigned short *uniTab1, *uniTab2; +// vm_t *vm; + RuleTable_t *table; + + if (dryRunSize) // don't copy, only count up number of... + *dryRunSize = 0; // ...bytes needed to render + + table = GET_TABLE(vm, handle); + uniTab1 = table->uniTab1; + uniTab2 = table->uniTab2; + + if (dryRunSize) + { + while (*src) + { + switch (ch=*src) + { + case 0x005C : + case 0x00A5 : + case 0xF8F7 : + (*dryRunSize)++; + break; + + default : + if ((ch = UNI_TO_LOC(*src, uniTab1, uniTab2)) == CP_UNDEFINED) + (*dryRunSize) += 6; + else + (*dryRunSize) += CPCharSize(ch); + break; + } + + src++; + } + + return 0; + } + + while (*src) // march through the string... + { + switch (ch=*src) // define the Unicode character + { + case 0x005C : // backslash + case 0x00A5 : // Yen + case 0xF8F7 : // Novell-defined path separator + *dest++ = 0x5C; // our semantics make these all backslashes + break; + + default : // look up the character and translate it + ch = UNI_TO_LOC(*src, uniTab1, uniTab2); + + if (ch == CP_UNDEFINED) // can't map the character? + { + int set, ch; + + set = (*src & 0xFF00) >> 8;// this is the character set + ch = *src & 0x00FF; // this is the character in that set + + *dest++ = '['; // begin building the unmappable output + // render the character set and character... + // ...as hexadecimal string (in two bytes) + *dest++ = sHex[(set & 0xF0) >> 4]; + *dest++ = sHex[(set & 0x0F)]; + *dest++ = sHex[(ch & 0xF0) >> 4]; + *dest++ = sHex[(ch & 0x0F)]; + *dest++ = ']'; // finish building the unmappable output + } + else // map the character as one or two bytes + { + *((unsigned short *) dest) = ch; + dest += CPCharSize(ch);// skip bytes corresponding to size... + } // ...of the resulting character + + break; + } + + src++; + } + + *dest = '\0'; + + return 0; +} + + +unicode_t chr2lwr +( + unicode_t ch +) +{ + LONG lookup; + + if ((lookup = UniMasterIndex[ch]) & MAPPING_TO_LOWER_CASE_BIT) + return (unicode_t) UniDataTable[lookup & OFFSET_MASK]; + + return ch; +} + +unicode_t chr2upr +( + unicode_t ch +) +{ + LONG lookup; + + if ((lookup = UniMasterIndex[ch]) & MAPPING_TO_UPPER_CASE_BIT) + { + LONG offset = lookup & OFFSET_MASK; + + if(lookup & MAPPING_TO_LOWER_CASE_BIT) + offset ++; + + return (unicode_t) UniDataTable[offset]; + } + + return ch; +} + + int UniGetMacintoshTable(const char *name, UniRuleTable_t *handle) { - maccp_t *mac; - for (mac = sMacintoshCodepages; mac->name; mac++) { - if (strcasecmp(name, mac->name) == 0) - return OpenUnicodeTable(mac->codepage, mac->dosname, handle); - } - return UNI_ERR_FILE_OPEN; -} + maccp_t *mac = sMacintoshCodepages; -static RuleTable_t *get_table(UniRuleTable_t handle) -{ - if (init_unilib()) - return NULL; - if (handle == UNI_LOCAL_DEFAULT) - handle = UniLibGlobals.defaultHandle; - if (handle < 0 || handle >= CODEPAGES || !UniLibGlobals.tables[handle].codepage) - return NULL; - return &UniLibGlobals.tables[handle]; -} - -static unicode_t loc_char_to_uni(RuleTable_t *table, unsigned short ch) -{ - if (table->tabType == 1) - return ((unicode_t *)table->cpTab1)[ch & 0xff]; - if ((ch & 0xff00) == 0) { - int32_t lookup = ((int32_t *)table->cpTab1)[ch & 0xff]; - if ((lookup & 0xff000000) == SINGLE_LOOKUP_TAG) - return lookup & 0xffff; - return UNI_UNDEFINED; - } else { - int lead = (ch >> 8) & 0xff; - int low = ch & 0xff; - int32_t lookup = ((int32_t *)table->cpTab1)[lead]; - if ((lookup & 0xff000000) == SINGLE_LOOKUP_TAG) - return lookup & 0xffff; - if (low < table->minLowerByte || low > table->maxLowerByte) - return UNI_UNDEFINED; - return table->cpTab2[lookup + low]; - } -} - -int loc2uni(UniRuleTable_t handle, unicode_t *dest, const char *src, unicode_t noMapCh, int noMapFlag) -{ - RuleTable_t *table = get_table(handle); - if (!table) - return UNI_ERR_BAD_HANDLE; - while (*src) { - unsigned char first = (unsigned char)*src++; - unsigned short code = first; - unicode_t ch; - if (table->tabType == 2 && ((int32_t *)table->cpTab1)[first] < SINGLE_LOOKUP_TAG && *src) - code = (unsigned short)((first << 8) | (unsigned char)*src++); - ch = loc_char_to_uni(table, code); - if (ch == UNI_UNDEFINED) { - if (noMapFlag == UNI_MAP_CHAR) - ch = noMapCh ? noMapCh : UNI_NOMAP_DEFAULT; - else if (noMapFlag == UNI_MAP_SELF) - ch = code; - else - return UNI_ERR_UNMAPPABLE_CHAR; + while (mac->name) + { + if (lnxLB_stricmp(name, mac->name) == 0) + { + break; } - *dest++ = ch; + + mac++; } - *dest = 0; - return 0; -} -int loc2unipath(UniRuleTable_t handle, unicode_t *dest, const char *src, size_t *dryRunSize) -{ - int rc = loc2uni(handle, dest, src, 0, UNI_MAP_NO_CHAR); - if (dryRunSize && rc == 0) { - size_t chars = 0; - while (dest[chars]) - chars++; - *dryRunSize = (chars + 1) * sizeof(unicode_t); + if (!mac->name) + { + return UNI_ERR_FILE_OPEN; } - return rc; -} -int uni2loc(UniRuleTable_t handle, char *dest, const unicode_t *src, char noMapCh, int noMapFlag) -{ - RuleTable_t *table = get_table(handle); - if (!table) - return UNI_ERR_BAD_HANDLE; - while (*src) { - unicode_t uni = *src++; - unsigned short code = table->uniTab2[table->uniTab1[uni >> 8] + (uni & 0xff)]; - if (code == CP_UNDEFINED) { - if (noMapFlag == UNI_MAP_CHAR) - code = (unsigned char)(noMapCh ? noMapCh : UNI_NOMAP_DEFAULT); - else if (noMapFlag == UNI_MAP_SELF && uni <= 0xff) - code = uni; - else - return UNI_ERR_UNMAPPABLE_CHAR; - } - *((unsigned short *)dest) = code; - dest += ((code & 0xff00) == 0) ? 1 : 2; - } - *dest = '\0'; - return 0; -} - -int uni2locpath(UniRuleTable_t handle, char *dest, const unicode_t *src, size_t *dryRunSize) -{ - int rc = uni2loc(handle, dest, src, 0, UNI_MAP_NO_CHAR); - if (dryRunSize && rc == 0) - *dryRunSize = strlen(dest) + 1; - return rc; -} - -unicode_t chr2lwr(unicode_t ch) -{ - uint32_t lookup; - if (init_unilib() || !UniMasterIndex || !UniDataTable) - return ch; - lookup = UniMasterIndex[ch]; - if (lookup & MAPPING_TO_LOWER_CASE_BIT) - return (unicode_t)UniDataTable[lookup & OFFSET_MASK]; - return ch; -} - -unicode_t chr2upr(unicode_t ch) -{ - uint32_t lookup; - uint32_t offset; - if (init_unilib() || !UniMasterIndex || !UniDataTable) - return ch; - lookup = UniMasterIndex[ch]; - if (lookup & MAPPING_TO_UPPER_CASE_BIT) { - offset = lookup & OFFSET_MASK; - if (lookup & MAPPING_TO_LOWER_CASE_BIT) - offset++; - return (unicode_t)UniDataTable[offset]; - } - return ch; + return OpenUnicodeTable(mac->codepage, mac->dosname, handle); } diff --git a/src/nwnss/nsslnxlib/unilibUserspace.c b/src/nwnss/nsslnxlib/unilibUserspace.c new file mode 100644 index 0000000..9154db5 --- /dev/null +++ b/src/nwnss/nsslnxlib/unilibUserspace.c @@ -0,0 +1,226 @@ +/**************************************************************************** + | + | Userspace runtime boundary for the NSS Unicode rule-table loader. + | + | Keep NSS Unicode semantics in unilib.c. This file supplies the process + | runtime used by libnwnss: table discovery, file reads, memory allocation, + | lightweight locking hooks, and host-codepage selection. + +-------------------------------------------------------------------------*/ + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#ifndef NSS_DEFAULT_CODEPAGE +#define NSS_DEFAULT_CODEPAGE 850 +#endif + +#ifndef NSS_UNITABLE_DIRS +#define NSS_UNITABLE_DIRS "" +#endif + +extern char *getenv(const char *name); +extern long strtol(const char *nptr, char **endptr, int base); + +void mpkEnter(void) +{ +} + +void mpkExit(void) +{ +} + +void *mpkPageAlloc(unsigned int bytes, int flags) +{ + (void)flags; + return calloc(1, bytes); +} + +void mpkPageFree(void *ptr) +{ + free(ptr); +} + +void *kMutexAlloc(const char *name) +{ + (void)name; + return (void *)1; +} + +void kMutexFree(void *lock) +{ + (void)lock; +} + +void kMutexLock(void *lock) +{ + (void)lock; +} + +void kMutexUnlock(void *lock) +{ + (void)lock; +} + +static const char *basename_of(const char *path) +{ + const char *slash; + + if (!path) + return ""; + + slash = strrchr(path, '/'); + return slash ? slash + 1 : path; +} + +static FILE *try_open_path(const char *dir, const char *name) +{ + char path[1024]; + + if (!dir || !*dir) + return NULL; + + snprintf(path, sizeof(path), "%s/%s", dir, name); + return fopen(path, "rb"); +} + +static FILE *open_unicode_table_stream(const char *path) +{ + const char *env_dir = getenv("NSS_UNITABLE_DIR"); + const char *dirs = NSS_UNITABLE_DIRS; + const char *name = basename_of(path); + char dirs_copy[2048]; + char *save = NULL; + char *dir; + FILE *stream; + + stream = fopen(path, "rb"); + if (stream) + return stream; + + stream = try_open_path(env_dir, name); + if (stream) + return stream; + + if (dirs && *dirs) { + snprintf(dirs_copy, sizeof(dirs_copy), "%s", dirs); + for (dir = strtok_r(dirs_copy, ":", &save); dir; dir = strtok_r(NULL, ":", &save)) { + stream = try_open_path(dir, name); + if (stream) + return stream; + } + } + + return fopen(name, "rb"); +} + +struct file *kFileOpen(const char *path, int flags, int mode) +{ + struct file *filep; + FILE *stream; + + (void)flags; + (void)mode; + + stream = open_unicode_table_stream(path); + if (!stream) + return NULL; + + filep = calloc(1, sizeof(*filep)); + if (!filep) { + fclose(stream); + return NULL; + } + + filep->stream = stream; + filep->f_pos = 0; + return filep; +} + +int kFileRead(struct file *filep, void *buf, unsigned int count) +{ + size_t got; + + if (!filep || !filep->stream) + return -EINVAL; + + if (fseek(filep->stream, filep->f_pos, SEEK_SET) != 0) + return -errno; + + got = fread(buf, 1, count, filep->stream); + filep->f_pos += (long)got; + + if (got != count && ferror(filep->stream)) + return -EIO; + + return (int)got; +} + +void klseek(struct file *filep, unsigned int pos, int whence) +{ + if (!filep) + return; + + if (whence == SEEK_CUR) + filep->f_pos += (long)pos; + else if (whence == SEEK_END && filep->stream) { + if (fseek(filep->stream, 0, SEEK_END) == 0) + filep->f_pos = ftell(filep->stream) + (long)pos; + } else + filep->f_pos = (long)pos; +} + +int filp_close(struct file *filep, void *unused) +{ + int rc = 0; + + (void)unused; + + if (!filep) + return 0; + + if (filep->stream) + rc = fclose(filep->stream); + free(filep); + return rc; +} + +int printk(const char *fmt, ...) +{ + va_list ap; + int rc; + + va_start(ap, fmt); + rc = vfprintf(stderr, fmt, ap); + va_end(ap); + return rc; +} + +static int parse_int_env(const char *name, int fallback) +{ + const char *value = getenv(name); + char *end = NULL; + long parsed; + + if (!value || !*value) + return fallback; + + parsed = strtol(value, &end, 10); + if (!end || *end || parsed <= 0 || parsed > 65535) + return fallback; + + return (int)parsed; +} + +LONG OSGetCodePage(void) +{ + return parse_int_env("NSS_CODEPAGE", NSS_DEFAULT_CODEPAGE); +}