stdio: factor out format parsing in scanf

This commit is contained in:
Lephenixnoir 2024-01-14 21:18:30 +01:00
parent 1caaa8ff63
commit 9f6e0c8039
No known key found for this signature in database
GPG key ID: 1BBA026E13FC0495

View file

@ -106,152 +106,220 @@ void __scanf_store_d(long double value, int size, va_list *args)
// %ms and %m[set] are not implemented (with memory allocation while parsing a chain or a set of characters) // %ms and %m[set] are not implemented (with memory allocation while parsing a chain or a set of characters)
struct scanf_format {
/* Maximum field width */
int field_width;
/* Size of the assigned (integer or floating-point) type, in bytes */
uint8_t size;
/* Whether to skip assignment */
bool skip;
/* Set of bytes allowed in a given set %[]. */ /* Set of bytes allowed for bracket sets in %[] */
static uint8_t bracket_set[32]; uint8_t bracket_set[32];
};
/* Allow/disallow the entire set */ /* Allow/disallow the entire set */
static void bracket_set_init(bool allow) static void bracket_set_init(uint8_t *set, bool allow)
{ {
memset(bracket_set, allow ? 0xff : 0x00, sizeof bracket_set); memset(set, allow ? 0xff : 0x00, 32);
} }
/* Allow/disallow a range of characters. Both ends are included. */ /* Allow/disallow a range of characters. Both ends are included. */
static void bracket_set_range(uint8_t start, uint8_t end, bool allow) static void bracket_set_range(
uint8_t *set, uint8_t start, uint8_t end, bool allow)
{ {
for(int u = start; u <= end; u++) { for(int u = start; u <= end; u++) {
int byte = u >> 3; int byte = u >> 3;
int bit = 1 << (u & 7); int bit = 1 << (u & 7);
if(allow) if(allow)
bracket_set[byte] |= bit; set[byte] |= bit;
else else
bracket_set[byte] &= ~bit; set[byte] &= ~bit;
} }
} }
/* Check whether a byte is allowed by the bracket set. */ /* Check whether a byte is allowed by the bracket set. */
static bool bracket_set_test(int c) static bool bracket_set_test(uint8_t *set, int c)
{ {
int byte = (c >> 3); return (c != EOF) && (set[c >> 3] & (1 << (c & 7)));
int bit = 1 << (c & 7);
return (c != EOF) && (bracket_set[byte] & bit);
} }
/* return 0 if Ok or -1 if syntax err in the set format */ /* Parse a bracket set from a format string. Returns true on success. */
static int bracket_set_parse(char const * __restrict__ format, int *pos ) static bool bracket_set_parse(uint8_t *set, char const *format, int *pos)
{ {
int __sor = 0; int last = 0;
int __eor = 0; bool allow = true;
bool __neg = false; bracket_set_init(set, false);
bracket_set_init(false); /* '^' denotes a negated set */
if(format[*pos] == '^') {
(*pos)++; allow = false;
// next will be a "negation" set
if (format[*pos] == '^' ) {
__neg = true;
(*pos)++; (*pos)++;
bracket_set_init(true); bracket_set_init(set, true);
}
/* ']' as the first character adds ']' to the set */
if(format[*pos] == ']' ) {
bracket_set_range(set, ']', ']', allow);
(*pos)++;
}
// the char ']' is part of the set for(; format[*pos] && format[*pos] != ']'; (*pos)++) {
if (format[*pos] == ']' ) { /* '-' as the last character, thus explicitly in the set */
bracket_set_range(']', ']', !__neg); if(format[*pos] == '-' && format[*pos + 1] == ']')
(*pos)++; bracket_set_range(set, '-', '-', allow);
} /* '-' as denoting a range */
} else if(format[*pos] == '-') {
// the char ']' is included in the allowed set
else if (format[*pos] == ']' ) {
__neg = false;
// the char ']' is part of the set
if (format[*pos] == ']' ) {
bracket_set_range(']', ']', !__neg);
(*pos)++; (*pos)++;
bracket_set_range(set, last, format[*pos], allow);
}
/* Any other character */
else {
last = format[*pos];
bracket_set_range(set, last, last, allow);
} }
} }
return (format[*pos] == ']');
}
static int parse_fmt(char const *fmt, int *pos, struct scanf_format *opt)
{
opt->field_width = INT_MAX;
opt->size = sizeof(int);
opt->skip = false;
int width = 0;
char size_letter = 0;
while(1) { while(1) {
// we find a '-' so need to check if we are considering a range or the char '-' only (*pos)++;
if (format[*pos]=='-') {
// the char '-' is included in the allowed set switch(fmt[*pos]) {
if (format[*pos+1]==']') { case '*':
bracket_set_range('-', '-', !__neg); opt->skip = true;
(*pos)++; break;
// we have now finished the reading of the set cause the following char is ']'
return 0; case 'h':
} opt->size = (size_letter=='h') ? sizeof(char) : sizeof(short);
// the char '-' indicates a range of char to be included into the set size_letter = 'h';
else { break;
(*pos)++; case 'l':
__eor = format[*pos]; opt->size = (size_letter=='l') ? sizeof(long long) : sizeof(long);
bracket_set_range( __sor, __eor, !__neg ); size_letter = 'l';
} break;
} case 'L':
// we find the char ']' so it means we reach the end of this set opt->size = sizeof(long double);
else if (format[*pos]==']') return 0; size_letter = 'L';
// if we reach the '\0' we have a syntax problem break;
else if (format[*pos]=='\0') return -1; case 'j':
// we are considering one particular char and prepare for a potential range if we find the char '-' later on opt->size = sizeof(intmax_t);
else { break;
__sor = format[*pos]; case 'z':
bracket_set_range(__sor, __sor, !__neg); opt->size = sizeof(size_t);
break;
case 't':
opt->size = sizeof(ptrdiff_t);
break;
case '0' ... '9':
width = width * 10 + (fmt[*pos] - '0');
opt->field_width = width;
break;
case '[':
(*pos)++; (*pos)++;
return bracket_set_parse(opt->bracket_set, fmt, pos) ? '[' : 0;
case 'd':
case 'i':
case 'o':
case 'u':
case 'x':
case 'X':
case 'p':
case 's':
case 'n':
return fmt[*pos];
case 'a':
case 'A':
case 'e':
case 'E':
case 'f':
case 'F':
case 'g':
case 'G':
/* Adjust interpretation of no size / 'l' size */
if(size_letter == 0)
opt->size = sizeof(float);
if(size_letter == 'l')
opt->size = sizeof(double);
return fmt[*pos];
case 'c':
if(opt->field_width == INT_MAX)
opt->field_width = 1;
return 'c';
default:
return 0;
} }
} }
return 0;
} }
int __scanf( int __scanf(
struct __scanf_input * __restrict__ in, struct __scanf_input * __restrict__ in,
char const * __restrict__ format, char const * __restrict__ format,
va_list *args) va_list *args)
{ {
bool skip = false;
int MOD = sizeof(int);
in->bytes_read = 0; // we haven't started to read char from the input stream in->bytes_read = 0; // we haven't started to read char from the input stream
int validrets = 0; // to be incremented each time we successfully read and store an input as per the format int validrets = 0; // to be incremented each time we successfully read and store an input as per the format
int err = 0; // err control on __strto_xx( ) functions int err = 0; // err control on __strto_xx( ) functions
int user_length = 0; // length defined by user with a %xx modifier
int pos = 0; // current pos in the format string int pos = 0; // current pos in the format string
__scanf_start( in ); __scanf_start( in );
// TODO: No __scanf_end() in any of the "return validrets"!! // TODO: No __scanf_end() in any of the "return validrets"!!
while( format[pos] != 0 ) { for(; format[pos]; pos++) {
user_length = 0; if(format[pos] == ' ') {
MOD = sizeof(int);
skip = false;
if( format[pos] == ' ' ) {
__purge_space(in); __purge_space(in);
continue;
} }
// we will have to manage a given format else if(format[pos] != '%') {
else if( format[pos] == '%' ) { // if the next char of the stream is corresponding, we validate the read and go to the following char
if(format[pos] == __scanf_peek( in )) {
int readmaxlength = INT_MAX; __scanf_in( in );
char size_letter = 0; pos++;
continue;
// main loop }
loopagain: else return validrets; // else we return the number of valid read
}
else if(format[pos + 1] == '%') {
if(__scanf_peek(in) != '%') return validrets;
else __scanf_in( in );
pos++; pos++;
continue;
}
/* Perform a conversion */
else {
struct scanf_format opt;
int spec = parse_fmt(format, &pos, &opt);
if(spec == 0)
return validrets;
switch(format[pos]) { switch(spec) {
// we need to decrypt the corresponding scanf set of character // we need to decrypt the corresponding scanf set of character
case '[': { case '[': {
err = bracket_set_parse( format, &pos );
if (err!=0) return validrets;
int currentlength = 0; int currentlength = 0;
// we need to assign the read char to the corresponding pointer // we need to assign the read char to the corresponding pointer
char *c = skip ? NULL : va_arg(*args, char *); char *c = opt.skip ? NULL : va_arg(*args, char *);
for(int u=0; u<readmaxlength; u++) { for(int u=0; u<opt.field_width; u++) {
int temp = __scanf_peek(in); int temp = __scanf_peek(in);
if(bracket_set_test(temp)) { if(bracket_set_test(opt.bracket_set, temp)) {
__scanf_in(in); __scanf_in(in);
if(c) *c++ = temp; if(c) *c++ = temp;
currentlength++; currentlength++;
@ -263,7 +331,7 @@ int __scanf(
if(!currentlength) if(!currentlength)
return validrets; return validrets;
*c = '\0'; *c = '\0';
validrets += !skip; validrets += !opt.skip;
break; break;
} }
@ -272,48 +340,6 @@ int __scanf(
*va_arg(*args, int *) = in->bytes_read; *va_arg(*args, int *) = in->bytes_read;
break; break;
// we are expecting the char '%' to be in the input stream, if not err and return
case '%': {
if (__scanf_peek(in) != '%') return validrets;
else __scanf_in( in );
break;
}
// the next read, even if valid, will not be stored
case '*':
skip = true;
goto loopagain;
case 'h':
MOD = (size_letter == 'h') ? sizeof(char) : sizeof(short);
size_letter = 'h';
goto loopagain;
case 'l':
MOD = (size_letter == 'l') ? sizeof(long long) : sizeof(long);
/* FP conversions will adjust to sizeof(double) later */
size_letter = 'l';
goto loopagain;
case 'L':
MOD = sizeof(long double);
size_letter = 'L';
goto loopagain;
case 'j':
MOD = sizeof(intmax_t);
goto loopagain;
case 'z':
MOD = sizeof(size_t);
goto loopagain;
case 't':
MOD = sizeof(ptrdiff_t);
goto loopagain;
case '0' ... '9': {
user_length = user_length * 10 + (int) ( format[pos] - '0' );
readmaxlength = user_length;
goto loopagain;
break;
}
case 'd': case 'd':
case 'i': case 'i':
case 'o': case 'o':
@ -328,11 +354,11 @@ int __scanf(
long long int temp; long long int temp;
err = __strto_int(in, base, NULL, &temp, use_unsigned, err = __strto_int(in, base, NULL, &temp, use_unsigned,
readmaxlength); opt.field_width);
if (err == EOF && validrets == 0) return EOF; if (err == EOF && validrets == 0) return EOF;
if (err != 0) return validrets; if (err != 0) return validrets;
if (skip) __scanf_store_i( temp, 0, args ); if(!opt.skip)
else __scanf_store_i( temp, MOD, args ); __scanf_store_i( temp, opt.size, args );
validrets++; validrets++;
break; break;
} }
@ -345,51 +371,42 @@ int __scanf(
case 'F': case 'F':
case 'g': case 'g':
case 'G': { case 'G': {
/* Adjust interpretation of no size / 'l' size */
if(size_letter == 0)
MOD = sizeof(float);
if(size_letter == 'l')
MOD = sizeof(double);
// read a double from the current input stream // read a double from the current input stream
// and store in the corresponding arg as a char by reference // and store in the corresponding arg as a char by reference
long double temp; long double temp;
err = __strto_fp( in, NULL, NULL, &temp, err = __strto_fp( in, NULL, NULL, &temp,
readmaxlength); opt.field_width);
if (err == EOF && validrets == 0) return EOF; if (err == EOF && validrets == 0) return EOF;
if (err != 0) return validrets; if (err != 0) return validrets;
if (skip) __scanf_store_d( temp, 0, args ); if(!opt.skip)
else __scanf_store_d( temp, MOD, args ); __scanf_store_d( temp, opt.size, args );
validrets++; validrets++;
break; break;
} }
case 'p': { case 'p': {
long int temp; long int temp;
if (!skip) { if (!opt.skip) {
void *p = (void *) va_arg( *args, void** ); // get the adress of the target pointer (void**) void *p = (void *) va_arg( *args, void** ); // get the adress of the target pointer (void**)
err = __strto_int( in, 0, p, NULL, true, err = __strto_int( in, 0, p, NULL, true,
readmaxlength); opt.field_width);
} }
else err = __strto_int( in, 0, &temp, NULL, true, else err = __strto_int( in, 0, &temp, NULL, true,
readmaxlength); opt.field_width);
if (err == 0) validrets++; if (err == 0) validrets++;
else return validrets; else return validrets;
skip = false;
break; break;
} }
case 'c': { case 'c': {
if(readmaxlength == INT_MAX) char *c = opt.skip ? NULL : va_arg(*args, char *);
readmaxlength = 1;
char *c = skip ? NULL : va_arg(*args, char *);
for(int u = 0; u < readmaxlength; u++) { for(int u = 0; u < opt.field_width; u++) {
int temp = __scanf_in(in); int temp = __scanf_in(in);
if(temp==EOF) return EOF; if(temp==EOF) return EOF;
else if(c) *c++ = temp; else if(c) *c++ = temp;
} }
validrets += !skip; validrets += !opt.skip;
break; break;
} }
@ -398,8 +415,8 @@ int __scanf(
int curstrlength = 0; int curstrlength = 0;
__purge_space(in); __purge_space(in);
char *c = skip ? NULL : va_arg(*args, char *); char *c = opt.skip ? NULL : va_arg(*args, char *);
for(int u = 0; u < readmaxlength; u++) { for(int u = 0; u < opt.field_width; u++) {
temp = __scanf_peek(in); temp = __scanf_peek(in);
if(temp==EOF && curstrlength==0) return validrets; if(temp==EOF && curstrlength==0) return validrets;
if(isspace(temp) || ((temp==EOF && curstrlength!=0))) { if(isspace(temp) || ((temp==EOF && curstrlength!=0))) {
@ -420,16 +437,6 @@ int __scanf(
} }
} }
} }
// we are looking for a specific character in the input stream
else {
// if the next char of the stream is corresponding, we validate the read and go to the following char
if(format[pos] == __scanf_peek( in )) {
__scanf_in( in );
pos++;
}
else return validrets; // else we return the number of valid read
}
pos++;
} }
__scanf_end( in ); __scanf_end( in );