1289 lines
53 KiB
C
1289 lines
53 KiB
C
|
/* vqscanf.c
|
||
|
Author: Steven Augart (swa@isi.edu)
|
||
|
Designed, Documented, and Written: 7/18/92 -- 7/27/92
|
||
|
Ported from Gnu C to full ANSI C & traditional C, 10/5/92
|
||
|
& modifier added, detraditionalized: 2/16/93.
|
||
|
3/2/93 converted from qsscanf() to vqscanf(); not all comments updated.
|
||
|
Sorry. :)
|
||
|
*/
|
||
|
|
||
|
#define ASSERT2 /* expensive debugging assertions */
|
||
|
/* Copyright (c) 1992, 1993 by the University of Southern California. */
|
||
|
/* For copying and distribution information, see the file <usc-copyr.h> */
|
||
|
|
||
|
#include <usc-copyr.h>
|
||
|
|
||
|
#include <stdarg.h> /* ANSI variable arguments facility. */
|
||
|
|
||
|
/*
|
||
|
This is NOT the ordinary sscanf()! It works somewhat like sscanf(), but it
|
||
|
does *not* recognize all of sscanf()'s options.
|
||
|
|
||
|
In order to understand the rest of this documentation, you should be familiar
|
||
|
with the sscanf() function.
|
||
|
|
||
|
Like sscanf(), qsscanf() returns a count of the number of successful
|
||
|
assignment matches. The return value is not as important as it is in
|
||
|
sscanf(), though, since you can check whether you matched everything by
|
||
|
making your final conversion specifier be %r, setting the corresponding
|
||
|
pointer to NULL, then checking whether the corresponding pointer was reset.
|
||
|
|
||
|
sscanf() ignores blanks and tabs in the input string. qsscanf() considers
|
||
|
those to match a stretch of at least one horizontal whitespace (space or tab)
|
||
|
character. This means that horizontal whitespace is NOT ignored in the input
|
||
|
string, nor in the format string. In other words, occurrence of one or more
|
||
|
space or a tab in the format string is equivalent to %*[ \t]. This isn't
|
||
|
really a terribly creative thing to map whitespace onto, but it fits my
|
||
|
intuition a little bit better than sscanf()'s approach of ignoring it
|
||
|
altogether. Other whitespace characters (such as newline ('\n')) which
|
||
|
appear in the format string must literally match the corresponding character
|
||
|
in the input string.
|
||
|
|
||
|
The assignment suppression (the '*' modifier) feature of sscanf() is
|
||
|
supported.
|
||
|
|
||
|
The "long instead of int" (the 'l' modifier) feature of sscanf() is
|
||
|
supported, but we have not yet had a need for the "short instead of int" (the
|
||
|
'h' modifier) feature.
|
||
|
|
||
|
The "'" modifier will unquote the input string. It turns off normal
|
||
|
processing of all field terminators inside a quoted string, until the quoting
|
||
|
ends, at which point normal processing continues. See psprintf() for a
|
||
|
discussion of quoting. If we read only part of a quoted string, either
|
||
|
because the input string terminated early or because the field width ended
|
||
|
before the quoted part of the string ended or because the buffer ran out of
|
||
|
room (even with a '_' modifier), then we consider the match to have failed.
|
||
|
(I have thought about this quite a bit. If you have an
|
||
|
application for partial quoted string matches, send e-mail to swa@isi.edu;
|
||
|
I'd like to hear about it.)
|
||
|
|
||
|
Regular sscanf() has a notion of maximum field width. For example, to read
|
||
|
no more than 5 spaces or slashes into a string, one would give sscanf() the
|
||
|
conversion specifier "%5[ /]". "qsscanf()" also supports field width.
|
||
|
Note that, according to the ANSI spec, all fields except for %n must contain
|
||
|
at least one character that matches. (In our extended sscanf(), all
|
||
|
conversion specifiers except for %r, %~ and %( must match at least 1 single
|
||
|
character of output field. This means that negative or zero length field
|
||
|
widths are meaningless, and should never be specified. Similarly, buffer
|
||
|
lengths (see below) must contain at least enough room for one character (and
|
||
|
the trailing NUL ('\0') in all cases of NUL-terminated strings).
|
||
|
|
||
|
The institution of quoted strings means that there is now a separation
|
||
|
between input field width and number of characters read into the buffer. To
|
||
|
give an example, should the quoted string "''''" have a field width of 4 (the
|
||
|
input characters) or 1 (the output characters)? I have decided "field width"
|
||
|
represents the number of charcters we are willing to read into the output
|
||
|
buffer (exclusive of terminating '\0' -- perhaps not including the '\0' is a
|
||
|
bad decision, but it's backward compatible with sscanf().
|
||
|
|
||
|
Output buffer size may be specified by using the '_' modifier, the '$'
|
||
|
modifier, or the '!' modifier.
|
||
|
|
||
|
_: If the '_' modifier is specified, this means that the output buffer size
|
||
|
follows (including space for a terminating '\0', in the case of every
|
||
|
string conversion). After the output buffer is full, the match will
|
||
|
succeed and we go on to the next format character. It is quite similar to
|
||
|
specifying maximum field width, except that maximum field width doesn't
|
||
|
include space for the terminating '\0'. If two '_' modifiers are
|
||
|
specified, then the output buffer size is read from the next integer
|
||
|
argument.
|
||
|
|
||
|
$: If the '$' modifier is specified, the output buffer size follows. The
|
||
|
match will FAIL if the output buffer overflows. A count of the # of
|
||
|
successful matches is returned. If two '$' modifiers are specified, the
|
||
|
output buffer size is read from the next integer argument. Note that '_'
|
||
|
works just like '$' when we are scanning quoted strings, because partial
|
||
|
matches for quoted strings are not useful.
|
||
|
|
||
|
!: If the '!' modifier is specified, the output buffer size follows. If the
|
||
|
output buffer overflows, qsscanf() will return with the value of the
|
||
|
integer constant BUFFER_OVERFLOW (guaranteed to be negative). Note that
|
||
|
buffer overflow and numeric overflow are the only situations in which
|
||
|
qsscanf() returns negative values. the output buffer overflows. If two
|
||
|
'!' modifiers are specified, the output buffer size is read from the next
|
||
|
integer argument. Note that '_' works just like '!' when we are scanning
|
||
|
quoted strings.
|
||
|
|
||
|
|
||
|
A Prospero-specific modifier is '&'. The '&' modifier is only implemented
|
||
|
for the %s and %[ conversions. The argument to %s or %[, instead of being a
|
||
|
pointer to a buffer, is a char **. The argument will have the Prospero
|
||
|
stcopyr() function applied to it. Therefore, the argument must be NULL or
|
||
|
contain data previously allocated by Prospero's stcopy() or stcopyr()
|
||
|
function. This allows us to read strings of unlimited size without overflow.
|
||
|
This works with the '\'' modifier.
|
||
|
|
||
|
qsscanf() checks for integer overflow. By default, or if the '!' modifier is
|
||
|
specified, qsscanf() aborts processing and returns the value of the integer
|
||
|
constant "NUMERIC_OVERFLOW" (guaranteed to be negative). If the '_' modifier
|
||
|
has been specified, then we will terminate the integer conversion upon
|
||
|
potential overflow (just like specifying an input field width for an
|
||
|
integer). If the '$' modifier has been specified, then the match will fail
|
||
|
upon integer overflow.
|
||
|
|
||
|
It is meaningful to combine the '_', '$', and '!' modifiers with the '*'
|
||
|
(assignment suppression) modifier.
|
||
|
|
||
|
I expect that nobody will actually use the '_' modifier with numeric input,
|
||
|
but the functionality is there.
|
||
|
|
||
|
Conversion Argument Function
|
||
|
Specifier Type
|
||
|
------ ----- -----------
|
||
|
|
||
|
%d int * Looks for an optionally signed decimal integer.
|
||
|
Note that we do NOT skip over leading whitespace
|
||
|
like sscanf() does, nor do we in any other
|
||
|
conversions. Terminates on the first non-numeric
|
||
|
character, exclusive of an optional leading '-'.
|
||
|
|
||
|
Unlike in regular sscanf(), I have not added
|
||
|
functionality for longs and shorts to qsscanf(),
|
||
|
although they could be added easily.
|
||
|
|
||
|
%s char * Whitespace-terminated non-zero-length sequence of
|
||
|
characters. In the context of "%s", "whitespace"
|
||
|
means any one or more of "\n\r\t\v\f ". This
|
||
|
conversion adds a terminating '\0' to the output
|
||
|
string. . Note one exception to the
|
||
|
non-zero-length rule:
|
||
|
|
||
|
"%'s" will return the zero-length string when fed
|
||
|
the input "''". (This exception applies to all
|
||
|
cases where "non-zero-length" is mentioned in this
|
||
|
documentation.)
|
||
|
|
||
|
%S char * Matches all of the remaining characters in the
|
||
|
input buffer. Adds a terminating '\0' to the
|
||
|
output string. Will return a zero-length string if
|
||
|
that's all that's left, (and won't fail). In other
|
||
|
words, %S never fails if you get to it. "%'S" will
|
||
|
strip off a layer of quoting while it gobbles, if
|
||
|
you need it to.
|
||
|
|
||
|
%r char ** "Rest of the string". Sets the pointer to point
|
||
|
to the portion of the input string beyond this
|
||
|
point. This is useful, for instance, to check
|
||
|
whether there was any leftover input in the string
|
||
|
beyond a certain point. Applying maximum
|
||
|
field-width to this construct is meaningless.
|
||
|
Applying assignment suppression to this construct
|
||
|
is not useful. %r never fails, if you get to it.
|
||
|
|
||
|
%R char ** "Rest of the string, skipping to the next line".
|
||
|
Equivalent to specifying
|
||
|
"%*( \t)%*[\r\n]%*( \t)%r" or "%~%*[\r\n]%~%r".
|
||
|
We can use this to go onto the next Prospero
|
||
|
command in a Prospero protocol packet.
|
||
|
|
||
|
%~ none This conversion is automatically suppressed.
|
||
|
Equivalent to "%*( \t)". Used to skip over
|
||
|
optional leading or trailing horizontal whitespace.
|
||
|
Ignores field width and buffer size specifiers.
|
||
|
|
||
|
%c char * field_width characters are placed in the position
|
||
|
pointed to by the char *. Default is 1, if no
|
||
|
field-width is specified. (All of the other
|
||
|
constructs have an infinite default field-width.)
|
||
|
|
||
|
%[ ... ] char * Matches the longest non-empty string of input
|
||
|
characters from the set between brackets. A '\0'
|
||
|
is added. [] ... ] includes ']' in the set.
|
||
|
|
||
|
%[^ ... ] char * Matches the longest non-empty string of input
|
||
|
characters not from the set within brackets. A
|
||
|
'\0' is added. [^] ... ] behaves as expected.
|
||
|
It will *include* whitespace in the set of
|
||
|
acceptable input characters, unless you explicitly
|
||
|
exclude whitespace.
|
||
|
|
||
|
Note that %s is equivalent to %[^ \t\n\r\v\f]
|
||
|
|
||
|
Also, note that %'[^/ \t] will match a string of
|
||
|
characters up to the first *unquoted* slash.
|
||
|
In Prospero, will use this construct to disassemble
|
||
|
multi-component user-level filenames, which may
|
||
|
have components with (quoted) slashes.
|
||
|
|
||
|
%( ... ) char * Works just like %[, except that it accepts
|
||
|
%(^ ... ) zero-length matches.
|
||
|
The construct "%*( \t)" is useful for skipping over
|
||
|
zero or more whitespace characters at the start of
|
||
|
an input line.
|
||
|
|
||
|
%% none Literally matches a '%'. Does not increment the
|
||
|
counter of matches.
|
||
|
|
||
|
Note that there is no way to match a '\0' (terminating NUL) in the input
|
||
|
string, except with "%r". (And a good thing, too.)
|
||
|
|
||
|
|
||
|
*/
|
||
|
|
||
|
/*
|
||
|
Implementation notes:
|
||
|
|
||
|
This routine was written to be fast above all else; it is used to
|
||
|
parse Prospero commands. Therefore, there's a lot of in-line code and
|
||
|
macros that I would have put into a separate function if I'd been working
|
||
|
under different constraints.
|
||
|
|
||
|
I also do a small amount of loop unrolling to avoid excessive tests.
|
||
|
|
||
|
I really must apologize to the reader; I find this stuff pretty thick to get
|
||
|
through. I'm sorry. The charset implementation is not great, and could
|
||
|
be made more efficient. We could also pre-build the charset for '%s', and
|
||
|
save about 300 instructions per use of %s. That would be a good quick
|
||
|
speedup.
|
||
|
|
||
|
The GNU C implementation of readc() and incc() is reasonably efficient, but
|
||
|
the easiest translation to a non-GNU C system involves making the inline
|
||
|
functions into static functions, and all variables they reference into file
|
||
|
static variables. Totally gross, and much less efficient to boot.
|
||
|
|
||
|
One may #define NDEBUG to remove some internal consistency checking code and
|
||
|
to remove code that checks for malformed format strings.
|
||
|
*/
|
||
|
|
||
|
/*
|
||
|
** Maintainer **
|
||
|
Yes, we are actually maintaining qsprintf() and qsscanf() as part of the
|
||
|
Prospero project. We genuinely want your bug reports, bug fixes, and
|
||
|
complaints. We figure that improving qsprintf()'s portability will help us
|
||
|
make all of the Prospero software more portable. Send complaints and
|
||
|
comments and questions to bug-prospero@isi.edu.
|
||
|
*/
|
||
|
|
||
|
#if __GNUC__ && 0
|
||
|
#define NESTED_FUNCTIONS /* Use nested functions. (Currently buggy, in
|
||
|
my version of GCC, so I turned them off.) */
|
||
|
#endif
|
||
|
#ifdef NESTED_FUNCTIONS
|
||
|
|
||
|
#endif
|
||
|
|
||
|
|
||
|
|
||
|
#define NUMERIC_OVERFLOW (-1) /* Return value from qsscanf() */
|
||
|
#define BUFFER_OVERFLOW (-2) /* Return value from qsscanf() */
|
||
|
|
||
|
#include <pfs_threads.h> /* for definitions of thread stuff, below. */
|
||
|
#include <pfs.h> /* For definition of ZERO, for charset.h. */
|
||
|
#include <pparse.h> /* for definition of INPUT & prototype for
|
||
|
vqscanf(). */
|
||
|
#include "charset.h" /* character set stuff; shared with qsprintf()
|
||
|
*/
|
||
|
#include <ctype.h>
|
||
|
|
||
|
|
||
|
/* The next macro only works on ASCII systems. It also involves a subtraction
|
||
|
operation, which is likely to be as efficient as a table lookup, so I won't
|
||
|
rewrite it. */
|
||
|
|
||
|
#ifdef __GNUC__
|
||
|
static inline int chartoi(char c)
|
||
|
{
|
||
|
return c - '0';
|
||
|
}
|
||
|
#else
|
||
|
#define chartoi(c) ((c) - '0')
|
||
|
#endif
|
||
|
|
||
|
/* being in a quotation is an automatic match, since we can't break up
|
||
|
quotations across fields, EXCEPT that EOF is always a failure to match
|
||
|
(otherwise we might run off the end of the string.) */
|
||
|
/* This uses its argument twice, but that's OK. */
|
||
|
#define match(cs, c) ((c) != EOF && (am_quoting || in_charset(cs,(c))))
|
||
|
|
||
|
static int inpsetspn(charset cs, INPUT in);
|
||
|
static int qinpsetspn(charset cs, INPUT in, int am_quoting);
|
||
|
|
||
|
int
|
||
|
vqscanf(INPUT in, const char *fmt, va_list ap)
|
||
|
/* in: source
|
||
|
fmt: format describing what to scan for.
|
||
|
remaining args: pointers to places to store the data we read, or
|
||
|
integers (field widths).
|
||
|
*/
|
||
|
{
|
||
|
INPUT_ST newin_st;
|
||
|
int nmatches = 0; /* no assignment-producing directives matched
|
||
|
so far! */
|
||
|
|
||
|
if ((in->flags & CONSUME_INPUT) == 0) {
|
||
|
input_copy(in, &newin_st);
|
||
|
in = &newin_st;
|
||
|
}
|
||
|
for (;;) { /* check current format character */
|
||
|
/* Each case in this switch statement is responsible for leaving fmt
|
||
|
and s pointing to the next format and input characters to process.
|
||
|
*/
|
||
|
switch (*fmt) {
|
||
|
case ' ':
|
||
|
case '\t':
|
||
|
if (in_readc(in) != ' ' && in_readc(in) != '\t')
|
||
|
goto done; /* must match at least 1 space; failure
|
||
|
otherwise */
|
||
|
do { /* eat up any remaining spaces in the input */
|
||
|
in_incc(in);
|
||
|
} while (in_readc(in) == ' ' || in_readc(in) == '\t');
|
||
|
/* Eat up any remaining spaces in the format string and leave it
|
||
|
poised at the next formatting character */
|
||
|
while (*++fmt == ' ' || *fmt == '\t')
|
||
|
;
|
||
|
break;
|
||
|
case '%':
|
||
|
/* This is the big long part! Handle the conversion specifiers. */
|
||
|
{
|
||
|
int use_long = 0; /* Use long instead of int? */
|
||
|
int quote = 0; /* quoting modifier? */
|
||
|
int suppress = 0; /* suppression modifier? */
|
||
|
int maxfieldwidth = 0; /* max field width specified? 0 would be
|
||
|
meaningless; if the user does specify 0,
|
||
|
that would be strange, and I don't know
|
||
|
what it would mean. It will be ignored.
|
||
|
*/
|
||
|
/* outbuf_size could be set to this too: */
|
||
|
#define READ_FROM_NEXT_ARGUMENT (-1)
|
||
|
size_t outbuf_size = 0; /* output buffer size */
|
||
|
int seen_underscore = 0; /* How many underscores have we seen? */
|
||
|
int seen_bang = 0; /* How many bangs ('!') have we seen? */
|
||
|
int seen_dollar = 0; /* How many dollar signs ('$') have we seen?
|
||
|
*/
|
||
|
int seen_ampersand = 0; /* How many ampersands ('&') have we
|
||
|
seen? */
|
||
|
int am_quoting = 0; /* Are we actively in the middle of a
|
||
|
quotation? */
|
||
|
|
||
|
#ifdef NESTED_FUNCTIONS
|
||
|
/* read a character from the input stream, handling quoting if it's
|
||
|
turned on. Do not advance the input stream, except while
|
||
|
parsing single quotation marks. This has the effect that
|
||
|
multiple calls to readc() without an intervening incc() will
|
||
|
return the same value. */
|
||
|
/* this code has actually been tested in its current configuration.
|
||
|
*/
|
||
|
inline int readc(void) {
|
||
|
if (!quote)
|
||
|
return in_readc(in);
|
||
|
redo:
|
||
|
if (in_readc(in) != '\'')
|
||
|
return in_readc(in);
|
||
|
/* in_readc(in) == '\'' */
|
||
|
if (!am_quoting) {
|
||
|
++am_quoting;
|
||
|
in_incc(in);
|
||
|
goto redo;
|
||
|
}
|
||
|
/* We *are* quoting, & just saw a '\'' */
|
||
|
if (in_readcahead(in, 1) == '\'') /* examine next char. */
|
||
|
return '\''; /* do NOT increment s, since we might call
|
||
|
readc() again. */
|
||
|
else {
|
||
|
am_quoting = 0; /* didn't get two successive 's */
|
||
|
in_incc(in);
|
||
|
return in_readc(in); /* quoting gone; return real char. */
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/* We're finished with this non-NUL ('\0') character; increment */
|
||
|
/* This function will work even if readc() was never called;
|
||
|
thus, "incc(), incc();" will do the right thing. */
|
||
|
inline void incc(void) {
|
||
|
assert(in_readc(in) != '\0'); /* inappropriate call to incc()!
|
||
|
*/
|
||
|
if (!quote)
|
||
|
in_incc(in); /* the easy case :) */
|
||
|
else {
|
||
|
redo:
|
||
|
if (in_readc(in) == '\'') {
|
||
|
if (!am_quoting) {
|
||
|
++am_quoting;
|
||
|
in_incc(in);
|
||
|
goto redo;
|
||
|
}
|
||
|
/* We're in a quotation (am_quoting == 1) && we're on
|
||
|
top of a quotation mark. Still need to increment
|
||
|
past the 'real' character. */
|
||
|
assert(am_quoting);
|
||
|
if (in_readcahead(in,1) == '\'') {
|
||
|
in_incc(in);
|
||
|
in_incc(in);
|
||
|
/* When inside a quotation, the two-character
|
||
|
sequence '' is treated as one character */
|
||
|
} else {
|
||
|
/* We're on top of a quotation mark that must close
|
||
|
the current quoted section. Go past it. */
|
||
|
in_incc(in);
|
||
|
am_quoting = !am_quoting;
|
||
|
/* Now (just changed this; hope I'm right)
|
||
|
step on. The character we're stepping over is
|
||
|
NOT a quotation mark. */
|
||
|
in_incc(in);
|
||
|
}
|
||
|
} else { /* Quoting enabled, but we're not on a single
|
||
|
quote. Just increment. */
|
||
|
in_incc(in);
|
||
|
}
|
||
|
} /* if (!quote) */
|
||
|
}
|
||
|
#ifndef NDEBUG
|
||
|
#define check_null(fmtc) if ((fmtc) == '\0') \
|
||
|
internal_error("improperly specified character set given as qsscanf() format")
|
||
|
#else
|
||
|
/* Assume format strings are always properly formed. This is reasonable
|
||
|
behavior, since it's a programming error to submit a malformed format
|
||
|
string. */
|
||
|
#define check_null(fmtc)
|
||
|
#endif
|
||
|
#define build_charset(cs, endc) _build_charset(&(cs), (endc))
|
||
|
|
||
|
inline void _build_charset(charset *csp, char endc) {
|
||
|
|
||
|
int negation;
|
||
|
if (*++fmt == '^') {
|
||
|
negation = 1;
|
||
|
new_full_charset(*csp);
|
||
|
remove_char(*csp, *++fmt);
|
||
|
} else {
|
||
|
negation = 0;
|
||
|
new_empty_charset(*csp);
|
||
|
add_char(*csp, *fmt);
|
||
|
}
|
||
|
check_null(*fmt);
|
||
|
while (*++fmt != endc) {
|
||
|
check_null(*fmt);
|
||
|
if (negation)
|
||
|
remove_char(*csp, *fmt);
|
||
|
else
|
||
|
add_char(*csp, *fmt);
|
||
|
}
|
||
|
/* fmt now points to closing bracket or paren. Done! */
|
||
|
}
|
||
|
|
||
|
#undef check_null
|
||
|
#else
|
||
|
#define readc() _readc(in, quote, &am_quoting)
|
||
|
#define incc() _incc(in, quote, &am_quoting)
|
||
|
#define build_charset(cs, endc) _build_charset(&(cs), (endc), &fmt)
|
||
|
|
||
|
/* Names changed from in to _in and from quote to _quote in order
|
||
|
to avoid getting bogus GCC warnings with -Wshadow under
|
||
|
gcc version 2.5.8:
|
||
|
lib/pfs/vqscanf.c:470: warning: declaration of `in' shadows a parameter
|
||
|
lib/pfs/vqscanf.c:470: warning: declaration of `quote' shadows previous local
|
||
|
*/
|
||
|
|
||
|
static int _readc(INPUT _in, int _quote, int *am_quotingp);
|
||
|
static void _incc(INPUT _in, int _quote, int *am_quotingp);
|
||
|
/* This version generates the warning: */
|
||
|
/* lib/pfs/vqscanf.c:471: warning: declaration of `in' shadows a parameter */
|
||
|
/* lib/pfs/vqscanf.c:471: warning: declaration of `quote' shadows previous local */
|
||
|
/* static void _incc(INPUT in, int quote, int *am_quotingp); */
|
||
|
static void _build_charset(charset *csp, char endc, const char **fmtp);
|
||
|
|
||
|
#endif /* NESTED_FUNCTIONS */
|
||
|
|
||
|
more:
|
||
|
switch(*++fmt) {
|
||
|
/* Process the modifiers (options) */
|
||
|
case '\'':
|
||
|
++quote;
|
||
|
goto more;
|
||
|
case '_':
|
||
|
#ifndef NDEBUG
|
||
|
if (seen_bang)
|
||
|
internal_error("qsscanf(): can't use ! and _ modifiers \
|
||
|
together");
|
||
|
if (seen_dollar)
|
||
|
internal_error("qsscanf(): can't use $ and _ modifiers \
|
||
|
together");
|
||
|
if (seen_underscore > 1)
|
||
|
internal_error("qsscanf(): can't use > 2 underscore\
|
||
|
modifiers together.");
|
||
|
#endif
|
||
|
if (seen_underscore++)
|
||
|
outbuf_size = READ_FROM_NEXT_ARGUMENT;
|
||
|
goto more;
|
||
|
|
||
|
case '!':
|
||
|
#ifndef NDEBUG
|
||
|
if (seen_underscore)
|
||
|
internal_error("qsscanf(): can't use ! and _ modifiers \
|
||
|
together");
|
||
|
if (seen_dollar)
|
||
|
internal_error("qsscanf(): can't use $ and ! modifiers \
|
||
|
together");
|
||
|
if (seen_bang > 1)
|
||
|
internal_error("qsscanf(): can't use > 2 bang\
|
||
|
modifiers together.");
|
||
|
#endif
|
||
|
if (seen_bang++)
|
||
|
outbuf_size = READ_FROM_NEXT_ARGUMENT;
|
||
|
goto more;
|
||
|
case '$':
|
||
|
#ifndef NDEBUG
|
||
|
if (seen_underscore)
|
||
|
internal_error("qsscanf(): can't use $ and _ modifiers \
|
||
|
together");
|
||
|
if (seen_bang)
|
||
|
internal_error("qsscanf(): can't use $ and ! modifiers \
|
||
|
together");
|
||
|
if (seen_dollar > 1)
|
||
|
internal_error("qsscanf(): can't use > 2 dollar sign\
|
||
|
modifiers together.");
|
||
|
#endif
|
||
|
if (seen_dollar++)
|
||
|
outbuf_size = READ_FROM_NEXT_ARGUMENT;
|
||
|
goto more;
|
||
|
case '&':
|
||
|
seen_ampersand++;
|
||
|
goto more;
|
||
|
case '0':
|
||
|
case '1':
|
||
|
case '2':
|
||
|
case '3':
|
||
|
case '4':
|
||
|
case '5':
|
||
|
case '6':
|
||
|
case '7':
|
||
|
case '8':
|
||
|
case '9':
|
||
|
if (seen_underscore == 1 || seen_bang == 1 || seen_dollar == 1)
|
||
|
outbuf_size = outbuf_size * 10 + chartoi(*fmt);
|
||
|
else
|
||
|
maxfieldwidth = maxfieldwidth * 10 + chartoi(*fmt);
|
||
|
goto more;
|
||
|
case 'l':
|
||
|
++use_long;
|
||
|
goto more;
|
||
|
case '*':
|
||
|
++suppress;
|
||
|
goto more;
|
||
|
|
||
|
|
||
|
/* commands */
|
||
|
case '%': /* literal match */
|
||
|
if (seen_ampersand)
|
||
|
internal_error("& modifier cannot be combined \
|
||
|
with %% conversion.");
|
||
|
if (in_readc(in) != '%')
|
||
|
goto done;
|
||
|
in_incc(in);
|
||
|
break;
|
||
|
case '~':
|
||
|
{
|
||
|
int r;
|
||
|
if (seen_ampersand)
|
||
|
internal_error("& modifier cannot be combined \
|
||
|
with %~ conversion.");
|
||
|
/* Match zero or more whitespace characters. */
|
||
|
while ((r = readc()) == ' ' || r == '\t')
|
||
|
incc();
|
||
|
}
|
||
|
break;
|
||
|
case 'R':
|
||
|
{
|
||
|
register int r;
|
||
|
if (seen_ampersand)
|
||
|
internal_error("& modifier cannot be combined \
|
||
|
with %R conversion.");
|
||
|
/* strip trailing whitespace from previous line. */
|
||
|
while ((r = readc()) == ' ' || r == '\t')
|
||
|
incc();
|
||
|
/* skip the newline character. Be generous and accept \r.
|
||
|
Be even more generous and accept blank lines. */
|
||
|
if (r != '\n' && r != '\r')
|
||
|
goto done;
|
||
|
do {
|
||
|
incc();
|
||
|
} while ((r = readc()) == '\n' && r == '\r');
|
||
|
/* Skip leading whitespace from next line. */
|
||
|
/* This could be rewritten to eliminate a call to readc(). */
|
||
|
while ((r = readc()) == ' ' || r == '\t')
|
||
|
incc();
|
||
|
}
|
||
|
/* DELIBERATE FALLTHROUGH */
|
||
|
case 'r': /* ptr. to rest of string */
|
||
|
if (seen_ampersand)
|
||
|
internal_error("& modifier cannot be combined \
|
||
|
with %r conversion.");
|
||
|
if (!suppress) { /* suppression is really stupid in this case,
|
||
|
but we'll support stupidity. */
|
||
|
if (in->flags & PERCENT_R_TARGET_IS_STRING) {
|
||
|
/* cast to char * to throw away CONST */
|
||
|
*va_arg(ap, char **) = (char *) in->s;
|
||
|
} else {
|
||
|
INPUT inparg = va_arg(ap, INPUT);
|
||
|
input_copy(in, inparg);
|
||
|
}
|
||
|
++nmatches;
|
||
|
}
|
||
|
break;
|
||
|
case 'c': /* char */
|
||
|
{
|
||
|
char *out = 0; /* assignment quiets gcc -Wall */
|
||
|
register int r;
|
||
|
|
||
|
if (seen_ampersand)
|
||
|
internal_error("& modifier cannot be combined \
|
||
|
with %c conversion.");
|
||
|
if (!suppress)
|
||
|
out = va_arg(ap, char *);
|
||
|
if (outbuf_size == READ_FROM_NEXT_ARGUMENT)
|
||
|
outbuf_size = va_arg(ap,int);
|
||
|
else if (outbuf_size == 0)
|
||
|
outbuf_size = -1; /* ignore outbuf size by default */
|
||
|
if (maxfieldwidth < 1)
|
||
|
maxfieldwidth = 1; /* Read 1 character by default. */
|
||
|
if ((r = readc()) == '\0')
|
||
|
goto done; /* must match at least 1 character. */
|
||
|
do {
|
||
|
if (outbuf_size-- == 0) {
|
||
|
if (seen_dollar)
|
||
|
goto done;
|
||
|
else if (seen_bang)
|
||
|
return BUFFER_OVERFLOW;
|
||
|
else
|
||
|
break;
|
||
|
}
|
||
|
if (!suppress)
|
||
|
*out++ = r;
|
||
|
incc();
|
||
|
} while (--maxfieldwidth && (r = readc()) != '\0');
|
||
|
if (!suppress)
|
||
|
++nmatches;
|
||
|
}
|
||
|
break;
|
||
|
case 'd':
|
||
|
if (seen_ampersand)
|
||
|
internal_error("& modifier cannot be combined \
|
||
|
with %d conversion.");
|
||
|
if (use_long) {
|
||
|
long d = 0; /* decimal # we're generating. */
|
||
|
/* Use these 2 definitions to check for overflow. */
|
||
|
const long div = LONG_MAX / 10;
|
||
|
const long mod = LONG_MAX % 10;
|
||
|
|
||
|
|
||
|
/* Save the last return from readc() for reuse. We must use an
|
||
|
explicit temporary variable because the compiler won't know
|
||
|
that readc() always returns the same value without an
|
||
|
intervening incc(). */
|
||
|
register int r;
|
||
|
|
||
|
int negative; /* non-zero if negative #. */
|
||
|
|
||
|
if ((r = readc()) == '-')
|
||
|
negative = -1, incc(), r = readc();
|
||
|
else
|
||
|
negative = 0;
|
||
|
if (!isdigit(r))
|
||
|
goto done;
|
||
|
do {
|
||
|
register i = chartoi(r);
|
||
|
|
||
|
if (d > div || (d == div && i > mod)) {
|
||
|
/* Integer overflow! */
|
||
|
if (seen_dollar)
|
||
|
goto done; /* failure to match */
|
||
|
else if (seen_underscore)
|
||
|
break; /* conversion done */
|
||
|
else
|
||
|
return -1; /* abort */
|
||
|
}
|
||
|
d = d * 10 + i;
|
||
|
incc();
|
||
|
} while (isdigit(r = readc()));
|
||
|
/* s points to the next non-digit now. */
|
||
|
if (!suppress) {
|
||
|
*va_arg(ap, long *) = (negative ? -d : d);
|
||
|
++nmatches;
|
||
|
}
|
||
|
} else {
|
||
|
int d = 0; /* decimal # we're generating. */
|
||
|
/* Use these 2 definitions to check for overflow. */
|
||
|
const int div = INT_MAX / 10;
|
||
|
const int mod = INT_MAX % 10;
|
||
|
|
||
|
|
||
|
/* Save the last return from readc() for reuse. We must use an
|
||
|
explicit temporary variable because the compiler won't know
|
||
|
that readc() always return the same value without an
|
||
|
intervening incc(). */
|
||
|
register int r;
|
||
|
|
||
|
int negative; /* non-zero if negative #. */
|
||
|
|
||
|
if ((r = readc()) == '-')
|
||
|
negative = -1, incc(), r = readc();
|
||
|
else
|
||
|
negative = 0;
|
||
|
if (!isdigit(r))
|
||
|
goto done;
|
||
|
do {
|
||
|
register i = chartoi(r);
|
||
|
|
||
|
if (d > div || (d == div && i > mod)) {
|
||
|
/* Integer overflow! */
|
||
|
if (seen_bang)
|
||
|
goto done; /* failure to match */
|
||
|
else if (seen_underscore)
|
||
|
break; /* conversion done */
|
||
|
else
|
||
|
return -1; /* abort */
|
||
|
}
|
||
|
d = d * 10 + i;
|
||
|
incc();
|
||
|
} while (isdigit(r = readc()));
|
||
|
/* s points to the next non-digit now. */
|
||
|
if (!suppress) {
|
||
|
*va_arg(ap, int *) = (negative ? -d : d);
|
||
|
++nmatches;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
break;
|
||
|
|
||
|
case '(':
|
||
|
{
|
||
|
char *out = 0; /* assignment quiets gcc -Wall */ /* output buffer */
|
||
|
register int r;
|
||
|
charset cs;
|
||
|
|
||
|
if (seen_ampersand)
|
||
|
internal_error("& modifier cannot currently be combined \
|
||
|
with %( conversion.");
|
||
|
if (!maxfieldwidth)
|
||
|
maxfieldwidth = -1;
|
||
|
if (!suppress)
|
||
|
out = va_arg(ap, char *);
|
||
|
if (outbuf_size == READ_FROM_NEXT_ARGUMENT)
|
||
|
outbuf_size = va_arg(ap,int);
|
||
|
build_charset(cs, ')');
|
||
|
|
||
|
/* don't have to match any characters. */
|
||
|
for (; maxfieldwidth-- && match(cs, r = readc()); incc()) {
|
||
|
if (--outbuf_size == 0) {
|
||
|
if (seen_dollar)
|
||
|
goto done;
|
||
|
else if (seen_bang)
|
||
|
return BUFFER_OVERFLOW;
|
||
|
else
|
||
|
break;
|
||
|
}
|
||
|
if (!suppress)
|
||
|
*out++ = r;
|
||
|
}
|
||
|
/* if we stopped while still quoting, there are problems! */
|
||
|
if (am_quoting)
|
||
|
goto done;
|
||
|
if (!suppress)
|
||
|
*out = '\0', ++nmatches;
|
||
|
}
|
||
|
break;
|
||
|
case '[':
|
||
|
{
|
||
|
char *out = 0; /* assignment quiets gcc -Wall */ /* output buffer */
|
||
|
char **outp = 0; /* assignment quiets gcc -Wall */ /* Pointer to place to stash output. */
|
||
|
register int r;
|
||
|
charset cs;
|
||
|
|
||
|
if (suppress) {
|
||
|
seen_ampersand = 0; /* ignore the seen_ampersand flag */
|
||
|
} else {
|
||
|
if (seen_ampersand) {
|
||
|
outp = va_arg(ap, char **);
|
||
|
} else {
|
||
|
out = va_arg(ap, char *);
|
||
|
}
|
||
|
}
|
||
|
if (outbuf_size == READ_FROM_NEXT_ARGUMENT)
|
||
|
outbuf_size = va_arg(ap,int);
|
||
|
if ((suppress || outbuf_size) && seen_ampersand)
|
||
|
internal_error("qsscanf(): Specifying an output buffer \
|
||
|
size or suppression and the ampersand conversion together is ridiculous. You \
|
||
|
don't know what you're doing.");
|
||
|
build_charset(cs, ']');
|
||
|
|
||
|
/* must match at least 1 character if not quoting. If quoting,
|
||
|
we might see the (quoted) null string. */
|
||
|
/* Treat the quoted null string (also, by the way, a
|
||
|
common case) as a special case. */
|
||
|
if (quote && in_readc(in) == '\''
|
||
|
&& in_readcahead(in, 1) == '\''
|
||
|
&& in_readcahead(in, 2) != '\''
|
||
|
&& !match(cs, in_readcahead(in, 2))) {
|
||
|
/* quoted null string. */
|
||
|
assert(in_readcahead(in,2) == readc());
|
||
|
if (!suppress) {
|
||
|
++nmatches;
|
||
|
if (seen_ampersand)
|
||
|
*outp = stcopyr("", *outp);
|
||
|
else
|
||
|
*out = '\0';
|
||
|
}
|
||
|
break;
|
||
|
}
|
||
|
if (!match(cs, r = readc()))
|
||
|
goto done; /* did not match any characters */
|
||
|
if (seen_ampersand) {
|
||
|
/* Set out to start of string; we can increment out, but
|
||
|
should leave *outp always at the start of the string. */
|
||
|
if ((out = *outp) == NULL) {
|
||
|
/* passing null pointer to strcpy is legal. */
|
||
|
outbuf_size = 1 + (quote ?
|
||
|
qinpsetspn(cs, in, am_quoting)
|
||
|
: inpsetspn(cs, in));
|
||
|
if (outbuf_size <= 1) /* unbalanced quoting or failed
|
||
|
to match any characters. */
|
||
|
goto done;
|
||
|
out = *outp = stalloc(outbuf_size);
|
||
|
#ifndef NDEBUG
|
||
|
/* should have allocated exactly enough memory for this
|
||
|
conversion. */
|
||
|
seen_ampersand = -1;
|
||
|
#endif
|
||
|
} else {
|
||
|
/* stalloc() guarantees not to honor requests for 0 or
|
||
|
fewer bytes of memory, so outbuf_size > 0. */
|
||
|
outbuf_size = p__bstsize(out);
|
||
|
assert(outbuf_size > 0);
|
||
|
}
|
||
|
}
|
||
|
do {
|
||
|
if (--outbuf_size == 0) {
|
||
|
if (seen_ampersand) {
|
||
|
int oldsize = p__bstsize(*outp);
|
||
|
char *oldstart = *outp;
|
||
|
|
||
|
#ifdef ASSERT2 /* expensive assertion */
|
||
|
assert(oldsize == strlen(oldstart) + 1);
|
||
|
#endif
|
||
|
#ifndef NDEBUG
|
||
|
/* We should never run through this code twice for
|
||
|
the same conversion. */
|
||
|
assert(seen_ampersand > 0);
|
||
|
seen_ampersand = -1;
|
||
|
#endif
|
||
|
/* Don't need room for the trailing null, since
|
||
|
the current outbuf_size allocated space for it.
|
||
|
*/
|
||
|
outbuf_size = quote ?
|
||
|
qinpsetspn(cs, in, am_quoting) :
|
||
|
inpsetspn(cs, in);
|
||
|
if (outbuf_size < 1) /* unbalanced quoting */
|
||
|
goto done;
|
||
|
|
||
|
out = *outp = stalloc(outbuf_size + oldsize);
|
||
|
strcpy(out, oldstart);
|
||
|
out += oldsize - 1;
|
||
|
} else if (seen_dollar)
|
||
|
goto done;
|
||
|
else if (seen_bang)
|
||
|
return BUFFER_OVERFLOW;
|
||
|
else {
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
if (!suppress) {
|
||
|
*out++ = r;
|
||
|
}
|
||
|
incc();
|
||
|
} while (--maxfieldwidth && match(cs, r = readc()));
|
||
|
/* if we stopped while still quoting, there are problems! */
|
||
|
if (am_quoting)
|
||
|
goto done;
|
||
|
#ifndef NDEBUG
|
||
|
if (seen_ampersand == -1) /* if we allocated just enough memory
|
||
|
to fit */
|
||
|
assert(*outp + p__bstsize(*outp) - 1 == out);
|
||
|
#endif
|
||
|
if (!suppress) {
|
||
|
*out = '\0';
|
||
|
++nmatches;
|
||
|
}
|
||
|
}
|
||
|
break;
|
||
|
|
||
|
case 'b': /* just like %s, except '\0' is OK */
|
||
|
case 's': /* Just like %b, except that encountering '\0'
|
||
|
in a string (quoted or not) is a failure to
|
||
|
match. */
|
||
|
/* This distinction needs to be implemented and isn't. */
|
||
|
{
|
||
|
static charset nw_cs; /* not whitespace cs */
|
||
|
static int nw_cs_initialized = 0;
|
||
|
register int r;
|
||
|
char *out = 0; /* assignment quiets gcc -Wall */ /* output buffer */
|
||
|
char **outp = 0; /* assignment quiets gcc -Wall */ /* Pointer to place to stash output. */
|
||
|
int expected_inputlen;
|
||
|
|
||
|
|
||
|
/* Have we already built a charset for %s? If not, build it
|
||
|
now and flag it as having been initialized. */
|
||
|
if (!nw_cs_initialized) {
|
||
|
p_th_mutex_lock(p_th_mutexPFS_VQSCANF_NW_CS);
|
||
|
new_full_charset(nw_cs);
|
||
|
remove_char(nw_cs, '\n');
|
||
|
remove_char(nw_cs, ' ');
|
||
|
remove_char(nw_cs, '\t');
|
||
|
remove_char(nw_cs, '\r');
|
||
|
remove_char(nw_cs, '\v');
|
||
|
remove_char(nw_cs, '\f');
|
||
|
|
||
|
++nw_cs_initialized;
|
||
|
p_th_mutex_unlock(p_th_mutexPFS_VQSCANF_NW_CS);
|
||
|
}
|
||
|
if (suppress) {
|
||
|
seen_ampersand = 0; /* ignore the seen_ampersand flag */
|
||
|
} else {
|
||
|
if (seen_ampersand) {
|
||
|
outp = va_arg(ap, char **);
|
||
|
} else {
|
||
|
out = va_arg(ap, char *);
|
||
|
}
|
||
|
}
|
||
|
if (outbuf_size == READ_FROM_NEXT_ARGUMENT)
|
||
|
outbuf_size = va_arg(ap,int);
|
||
|
if ((suppress || outbuf_size) && seen_ampersand)
|
||
|
internal_error("qsscanf(): Specifying an output buffer \
|
||
|
size or suppression and the ampersand conversion together is ridiculous. You \
|
||
|
don't know what you're doing.");
|
||
|
/* must match at least 1 character if not quoting. If quoting,
|
||
|
we might see the (quoted) null string. */
|
||
|
/* Treat the quoted null string (also, by the way, a
|
||
|
common case) as a special case. */
|
||
|
if (quote && in_readc(in) == '\''
|
||
|
&& in_readcahead(in,1) == '\''
|
||
|
&& in_readcahead(in,2) != '\''
|
||
|
&& !match(nw_cs, in_readcahead(in,2))) {
|
||
|
/* quoted null string. */
|
||
|
assert(in_readcahead(in,2) == readc());
|
||
|
if (!suppress) {
|
||
|
++nmatches;
|
||
|
if (seen_ampersand)
|
||
|
*outp = stcopyr("", *outp);
|
||
|
else
|
||
|
*out = '\0';
|
||
|
}
|
||
|
break;
|
||
|
}
|
||
|
if (!match(nw_cs, r = readc()))
|
||
|
goto done; /* did not match any characters */
|
||
|
expected_inputlen = quote ?
|
||
|
qinpsetspn(nw_cs, in, am_quoting)
|
||
|
: inpsetspn(nw_cs, in);
|
||
|
if (expected_inputlen <= 0) /* unbalanced quoting or failed
|
||
|
to match any characters. */
|
||
|
goto done;
|
||
|
if (seen_ampersand) {
|
||
|
/* Set out to start of string; we can increment out, but
|
||
|
should leave *outp always at the start of the string. */
|
||
|
/* Stick on the +1 for trailing NUL for safety. */
|
||
|
if (expected_inputlen + 1 > p__bstsize(*outp)) {
|
||
|
stfree(*outp); /* might be NULL. Ok if it is. */
|
||
|
/* passing null pointer to strcpy is legal. */
|
||
|
outbuf_size = 1 + expected_inputlen;
|
||
|
out = *outp = stalloc(outbuf_size);
|
||
|
#ifndef NDEBUG
|
||
|
/* should have allocated exactly enough memory for this
|
||
|
conversion. */
|
||
|
seen_ampersand = -1;
|
||
|
#endif
|
||
|
} else {
|
||
|
outbuf_size = p__bstsize(out = *outp);
|
||
|
/* This assertion is ok, since we tested above. */
|
||
|
assert(outbuf_size > 0);
|
||
|
}
|
||
|
}
|
||
|
do {
|
||
|
if (--outbuf_size == 0) {
|
||
|
if (seen_ampersand) {
|
||
|
internal_error("somehow failed to allocate enough \
|
||
|
memory for the output in a %s or %b conversion.");
|
||
|
} else if (seen_dollar) {
|
||
|
goto done;
|
||
|
} else if (seen_bang) {
|
||
|
return BUFFER_OVERFLOW;
|
||
|
} else {
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
if (!suppress)
|
||
|
*out++ = r;
|
||
|
incc();
|
||
|
} while (--maxfieldwidth && match(nw_cs, r = readc()));
|
||
|
/* if we stopped while still quoting, there are problems! */
|
||
|
if (am_quoting)
|
||
|
goto done;
|
||
|
#ifndef NDEBUG
|
||
|
if (seen_ampersand == -1) /* if we allocated just enough memory
|
||
|
to fit */
|
||
|
assert(*outp + p__bstsize(*outp) - 1 == out);
|
||
|
#endif
|
||
|
if (seen_ampersand)
|
||
|
p_bst_set_buffer_length_nullterm(*outp, out - *outp);
|
||
|
if (!suppress) {
|
||
|
*out = '\0';
|
||
|
++nmatches;
|
||
|
}
|
||
|
}
|
||
|
break;
|
||
|
case 'S':
|
||
|
{
|
||
|
register int r;
|
||
|
char *out = 0; /* assignment quiets gcc -Wall */ /* output buffer */
|
||
|
char **outp = 0; /* assignment quiets gcc -Wall */ /* Pointer to place to stash output. */
|
||
|
|
||
|
if (suppress) {
|
||
|
seen_ampersand = 0; /* ignore the seen_ampersand flag */
|
||
|
} else {
|
||
|
if (seen_ampersand) {
|
||
|
outp = va_arg(ap, char **);
|
||
|
} else {
|
||
|
out = va_arg(ap, char *);
|
||
|
}
|
||
|
}
|
||
|
if (seen_ampersand)
|
||
|
internal_error("& modifier cannot currently be combined \
|
||
|
with %S conversion.");
|
||
|
if (outbuf_size == READ_FROM_NEXT_ARGUMENT)
|
||
|
outbuf_size = va_arg(ap,int);
|
||
|
if ((suppress || outbuf_size) && seen_ampersand)
|
||
|
internal_error("qsscanf(): Specifying an output buffer \
|
||
|
size or suppression and the ampersand conversion together is ridiculous. You \
|
||
|
don't know what you're doing.");
|
||
|
|
||
|
while (--maxfieldwidth && (r = readc())) {
|
||
|
if (--outbuf_size == 0) {
|
||
|
if (seen_dollar)
|
||
|
goto done;
|
||
|
else if (seen_bang)
|
||
|
return BUFFER_OVERFLOW;
|
||
|
else
|
||
|
break;
|
||
|
}
|
||
|
if (!suppress) {
|
||
|
*out++ = r;
|
||
|
}
|
||
|
incc();
|
||
|
}
|
||
|
/* if we stopped while still quoting, there are problems! */
|
||
|
if (am_quoting)
|
||
|
goto done;
|
||
|
if (!suppress)
|
||
|
*out = '\0', ++nmatches;
|
||
|
}
|
||
|
break;
|
||
|
|
||
|
#ifndef NDEBUG
|
||
|
default:
|
||
|
internal_error("malformed format string passed to qsscanf()");
|
||
|
/* NOTREACHED */
|
||
|
#endif
|
||
|
}
|
||
|
++fmt;
|
||
|
} /* end of case '%' */
|
||
|
break;
|
||
|
|
||
|
|
||
|
case '\0': /* no more format specifiers to match. */
|
||
|
goto done;
|
||
|
default: /* literal character match */
|
||
|
if (*fmt++ != in_readc(in))
|
||
|
goto done;
|
||
|
in_incc(in);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
done:
|
||
|
return nmatches;
|
||
|
}
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
#ifndef NESTED_FUNCTIONS
|
||
|
static int
|
||
|
_readc(INPUT in, int quote, int *am_quotingp)
|
||
|
{
|
||
|
if (!quote)
|
||
|
return in_readc(in);
|
||
|
redo:
|
||
|
if (in_readc(in) != '\'')
|
||
|
return in_readc(in);
|
||
|
/* in_readc(in) == '\'' */
|
||
|
if (!(*am_quotingp)) {
|
||
|
++(*am_quotingp);
|
||
|
in_incc(in);
|
||
|
goto redo;
|
||
|
}
|
||
|
/* We *are* quoting, & just saw a '\'' */
|
||
|
if (in_readcahead(in,1) == '\'') /* examine next char. */
|
||
|
return '\''; /* do NOT advance input stream, since we might call
|
||
|
readc() again. */
|
||
|
else {
|
||
|
(*am_quotingp) = 0; /* didn't get two successive 's */
|
||
|
in_incc(in);
|
||
|
return in_readc(in); /* quoting gone; return real char. */
|
||
|
}
|
||
|
}
|
||
|
|
||
|
|
||
|
|
||
|
static void
|
||
|
_incc(INPUT in, int quote, int *am_quotingp)
|
||
|
{
|
||
|
assert(in_readc(in) != EOF); /* inappropriate call to incc()! */
|
||
|
|
||
|
/* Skip single ', if it's there. */
|
||
|
if (!quote)
|
||
|
in_incc(in); /* easy case :) */
|
||
|
else {
|
||
|
redo:
|
||
|
if (in_readc(in) == '\'') {
|
||
|
if (!(*am_quotingp)) {
|
||
|
++(*am_quotingp);
|
||
|
in_incc(in);
|
||
|
goto redo;
|
||
|
}
|
||
|
/* We're in a quotation (am_quoting == 1) && we're on
|
||
|
top of a quotation mark. Still need to increment
|
||
|
past the 'real' character. */
|
||
|
assert(*am_quotingp);
|
||
|
if (in_readcahead(in,1) == '\'') {
|
||
|
in_incc(in);
|
||
|
in_incc(in);
|
||
|
/* When inside a quotation, the two-character
|
||
|
sequence '' is treated as one character */
|
||
|
} else {
|
||
|
/* We're on top of a quotation mark that must close
|
||
|
the current quoted section. Go past it. */
|
||
|
in_incc(in);
|
||
|
(*am_quotingp) = !(*am_quotingp);
|
||
|
/* Now (just changed this; hope I'm right) step on. The
|
||
|
character we're stepping over is NOT a quotation mark. */
|
||
|
in_incc(in);
|
||
|
}
|
||
|
} else { /* Quoting enabled, but we're not on a single
|
||
|
quote. Just increment. */
|
||
|
in_incc(in);
|
||
|
}
|
||
|
} /* if (!quote) */
|
||
|
}
|
||
|
|
||
|
|
||
|
/* inpsetspn() returns the length of the initial segment of s whose
|
||
|
characters are in set cs. This relies on the trick that '\0' will never be
|
||
|
in a character set. */
|
||
|
static int
|
||
|
inpsetspn(charset cs, INPUT oldin)
|
||
|
{
|
||
|
int retval = 0;
|
||
|
INPUT_ST in_st;
|
||
|
INPUT in = &in_st;
|
||
|
|
||
|
input_copy(oldin, in);
|
||
|
while (in_charset(cs, in_readc(in))) {
|
||
|
++retval;
|
||
|
in_incc(in);
|
||
|
}
|
||
|
return retval;
|
||
|
}
|
||
|
|
||
|
/* qinpsetspn() returns the length of the initial segment of s whose unquoted
|
||
|
characters are in set cs. -1 is returned if the quoting is ill-formed. */
|
||
|
static int
|
||
|
qinpsetspn(charset cs, INPUT oldin, int am_quoting)
|
||
|
{
|
||
|
/* code swiped from qindex() */
|
||
|
int count = 0;
|
||
|
|
||
|
enum { OUTSIDE_QUOTATION, IN_QUOTATION,
|
||
|
SEEN_POSSIBLE_CLOSING_QUOTE } state;
|
||
|
INPUT_ST in_st;
|
||
|
INPUT in = &in_st;
|
||
|
input_copy(oldin, in);
|
||
|
|
||
|
state = am_quoting? IN_QUOTATION : OUTSIDE_QUOTATION;
|
||
|
|
||
|
for (; !in_eof(in); in_incc(in)) {
|
||
|
switch (state) {
|
||
|
case OUTSIDE_QUOTATION:
|
||
|
if (in_readc(in) == '\'')
|
||
|
state = IN_QUOTATION;
|
||
|
else if (in_charset(cs, in_readc(in))) {
|
||
|
++count;
|
||
|
} else {
|
||
|
return count; /* failure to match. */
|
||
|
}
|
||
|
break;
|
||
|
case IN_QUOTATION:
|
||
|
if (in_readc(in) == '\'')
|
||
|
state = SEEN_POSSIBLE_CLOSING_QUOTE;
|
||
|
else
|
||
|
++count;
|
||
|
break;
|
||
|
case SEEN_POSSIBLE_CLOSING_QUOTE:
|
||
|
if (in_readc(in) == '\'') {
|
||
|
++count;
|
||
|
state = IN_QUOTATION;
|
||
|
} else {
|
||
|
state = OUTSIDE_QUOTATION;
|
||
|
if (!in_charset(cs, in_readc(in)))
|
||
|
return count;
|
||
|
++count;
|
||
|
}
|
||
|
break;
|
||
|
default:
|
||
|
internal_error("inpsetspn(): impossible state!");
|
||
|
}
|
||
|
}
|
||
|
if (state == IN_QUOTATION) /* unbalanced quoting */
|
||
|
return -1;
|
||
|
return count;
|
||
|
}
|
||
|
|
||
|
|
||
|
#ifndef NDEBUG
|
||
|
#define check_null(fmtc) if ((fmtc) == '\0') \
|
||
|
internal_error("improperly specified character set given as qsscanf() format")
|
||
|
#else
|
||
|
/* Assume format strings are always properly formed. This is reasonable
|
||
|
behavior, since it's a programming error to submit a malformed format
|
||
|
string. */
|
||
|
#define check_null(fmtc)
|
||
|
#endif
|
||
|
static void
|
||
|
_build_charset(charset *csp, char endc, const char **fmtp)
|
||
|
{
|
||
|
int negation;
|
||
|
if (*++(*fmtp) == '^') {
|
||
|
negation = 1;
|
||
|
new_full_charset(*csp);
|
||
|
#if 0
|
||
|
remove_char(*csp, '\0'); /* don't ever want to match \0 as being
|
||
|
valid. */
|
||
|
#endif
|
||
|
remove_char(*csp, *++(*fmtp));
|
||
|
} else {
|
||
|
negation = 0;
|
||
|
new_empty_charset(*csp);
|
||
|
add_char(*csp, *(*fmtp));
|
||
|
}
|
||
|
check_null(*(*fmtp));
|
||
|
while (*++(*fmtp) != endc) {
|
||
|
check_null(*(*fmtp));
|
||
|
if (negation)
|
||
|
remove_char(*csp, *(*fmtp));
|
||
|
else
|
||
|
add_char(*csp, *(*fmtp));
|
||
|
}
|
||
|
/* (*fmtp) now points to closing bracket or paren. Done! */
|
||
|
}
|
||
|
#undef check_null
|
||
|
|
||
|
#endif
|
||
|
|