stdlib: scanf-friendly strto* functions

This commit is contained in:
Lephenixnoir 2022-08-22 19:00:13 +02:00
parent fda0d950ed
commit 26e54af8e0
No known key found for this signature in database
GPG key ID: 1BBA026E13FC0495
14 changed files with 288 additions and 116 deletions

View file

@ -140,6 +140,7 @@ set(SOURCES
src/stdio/puts.c src/stdio/puts.c
src/stdio/remove.c src/stdio/remove.c
src/stdio/rewind.c src/stdio/rewind.c
src/stdio/scanf/scan.c
src/stdio/setbuf.c src/stdio/setbuf.c
src/stdio/setvbuf.c src/stdio/setvbuf.c
src/stdio/snprintf.c src/stdio/snprintf.c

View file

@ -75,7 +75,6 @@ extern int __printf(
va_list *__args); va_list *__args);
/* Format extension API. */ /* Format extension API. */
struct __printf_format { struct __printf_format {
@ -84,12 +83,8 @@ struct __printf_format {
/* How much significant characters of data, meaning varies. */ /* How much significant characters of data, meaning varies. */
int16_t precision; int16_t precision;
/* /* Size of targeted integer type (%o, %x, %i, %d, %u), in bytes */
** Size specifier for integers (%o, %x, %i, %d, %u), is equal to the
** sizeof() of the targeted type. Also used for %lc.
*/
uint8_t size; uint8_t size;
/* (#) Alternative form: base prefixes, decimal point. */ /* (#) Alternative form: base prefixes, decimal point. */
uint8_t alternative :1; uint8_t alternative :1;
/* ( ) Add a blank sign before nonnegative numbers. */ /* ( ) Add a blank sign before nonnegative numbers. */
@ -111,15 +106,14 @@ struct __printf_format {
/* /*
** Type of format functions. ** Type of format functions.
** -> __spec is the specifier letter (eg. "d" in "%d") ** -> __out specifies the output and is used when generating text
** -> __opts are the length, precision, sign, alignment, etc. options ** -> __fmt contains the format options and specifier letter
** -> __args is a pointer to the variable list of arguments to read from ** -> __args is a pointer to the variable list of arguments to read from
*/ */
typedef void __printf_formatter_t( typedef void __printf_formatter_t(
struct __printf_output *__out, struct __printf_output *__out,
struct __printf_format *__opts, struct __printf_format *__fmt,
va_list *__args va_list *__args);
);
/* /*
** Register a new format. ** Register a new format.
@ -127,10 +121,10 @@ typedef void __printf_formatter_t(
** The formatter designated by the specified lowercase or uppercase letter ** The formatter designated by the specified lowercase or uppercase letter
** (eg 'p' or 'P') is registered. This functions allows overriding default ** (eg 'p' or 'P') is registered. This functions allows overriding default
** formatters, but this is very much discouraged. Letters with special meaning ** formatters, but this is very much discouraged. Letters with special meaning
** in the standard cannot be changed. A formatted can be removed of disabled by ** in the standard cannot be changed. A formatter can be removed of disabled by
** registering NULL. ** registering NULL.
** **
** Here are used characters in the C standard: ** Here are the characters used/reserved in the C standard:
** **
** a: Hexadecimal floating-point A: Hexadecimal floating-point ** a: Hexadecimal floating-point A: Hexadecimal floating-point
** b: _ B: _ ** b: _ B: _
@ -138,7 +132,7 @@ typedef void __printf_formatter_t(
** d: Decimal integer D: _ ** d: Decimal integer D: _
** e: Exponent floating-point E: Exponent floating-point ** e: Exponent floating-point E: Exponent floating-point
** f: Floating-point F: Floating-point ** f: Floating-point F: Floating-point
** g: General floating-point G: General: floating-point ** g: General floating-point G: General floating-point
** h: short or char size H: _ ** h: short or char size H: _
** i: Integer I: Locale-aware digits ** i: Integer I: Locale-aware digits
** j: intmax_t size J: _ ** j: intmax_t size J: _

37
src/stdio/scanf/scan.c Normal file
View file

@ -0,0 +1,37 @@
#include <stdio.h>
#include "../stdio_p.h"
#include "../../stdlib/stdlib_p.h"
void __scanf_start(struct __scanf_input *__in)
{
if(__in->fp)
__in->buffer = fgetc(__in->fp);
else {
__in->buffer = *__in->str;
__in->str += (__in->buffer != 0);
}
}
int __scanf_fetch(struct __scanf_input *__in)
{
if(__in->fp)
return fgetc(__in->fp);
int c = *__in->str;
if(c == 0)
return EOF;
__in->str++;
return c;
}
void __scanf_end(struct __scanf_input *__in)
{
if(__in->buffer == EOF)
return;
if(__in->fp)
ungetc(__in->buffer, __in->fp);
else
__in->str--;
}

45
src/stdio/stdio_p.h Normal file
View file

@ -0,0 +1,45 @@
#ifndef __STDIO_P_H__
# define __STDIO_P_H__
#include <stdio.h>
/*
** General utilities for scanf(); we expose them here as we use subfunctions of
** strto*() from <stdlib.h> to implement numerical specifiers.
*/
/*
** Input for scanf; exactly one of str and fp must be non-NULL. We include a
** single-character buffer for convenience for scanning functions to test the
** next character, which can be flushed back by ungetc().
*/
struct __scanf_input {
char const * __restrict__ str;
FILE *fp;
int buffer;
};
/* Initialize the input by feeding the buffer byte. */
void __scanf_start(struct __scanf_input *__in);
/* Fetch the next byte from the input and return it (don't call directly). */
int __scanf_fetch(struct __scanf_input *__in);
/* Read the next byte while maintaining the buffer. */
static inline int __scanf_in(struct __scanf_input *__in)
{
int c = __in->buffer;
__in->buffer = __scanf_fetch(__in);
return c;
}
/* Peek the next byte without advancing. */
static inline int __scanf_peek(struct __scanf_input *__in)
{
return __in->buffer;
}
/* Close the input by unsending the buffer once finished. */
void __scanf_end(struct __scanf_input *__in);
#endif /* __STDIO_P_H__ */

View file

@ -3,6 +3,7 @@
#include <stdlib.h> #include <stdlib.h>
#include <stdbool.h> #include <stdbool.h>
#include "../stdio/stdio_p.h"
/* /*
** Parse an integer from a string. This is the base function for strtol, ** Parse an integer from a string. This is the base function for strtol,
@ -23,8 +24,7 @@
** expensive. ** expensive.
*/ */
int __strto_int( int __strto_int(
char const * restrict __ptr, struct __scanf_input *__input,
char ** restrict __endptr,
int __base, int __base,
long *__outl, long *__outl,
long long *__outll, long long *__outll,
@ -39,10 +39,9 @@ int __strto_int(
** and outl is set. ** and outl is set.
*/ */
int __strto_fp( int __strto_fp(
char const * restrict __ptr, struct __scanf_input *__input,
char ** restrict __endptr, double *__out,
double *__out, float *__outf,
float *__outf, long double *__outl);
long double *__outl);
#endif /*__STDLIB_P_H__*/ #endif /*__STDLIB_P_H__*/

View file

@ -1,3 +1,4 @@
#include "stdlib_p.h"
#include <stdlib.h> #include <stdlib.h>
#include <stdbool.h> #include <stdbool.h>
@ -37,12 +38,11 @@
** -> In hexadecimal notation, we read as many bits as the mantissa of a long ** -> In hexadecimal notation, we read as many bits as the mantissa of a long
** double, then later multiply by a power of 2. There are no approximations. ** double, then later multiply by a power of 2. There are no approximations.
*/ */
static void parse_digits(char const * restrict *ptr0, bool *valid, static bool parse_digits(struct __scanf_input *input,
SIGNIFICAND_TYPE *digits, long *exponent, bool hexadecimal) SIGNIFICAND_TYPE *digits, long *exponent, bool hexadecimal)
{ {
char const *ptr = *ptr0;
bool dot_found = false; bool dot_found = false;
int digits_found = 0; int digits_found=0, c=0;
*digits = 0; *digits = 0;
*exponent = 0; *exponent = 0;
@ -53,13 +53,14 @@ static void parse_digits(char const * restrict *ptr0, bool *valid,
int dot_character = '.'; int dot_character = '.';
int exp_character = (hexadecimal ? 'p' : 'e'); int exp_character = (hexadecimal ? 'p' : 'e');
for(int i = 0; isdigit(*ptr) || (hexadecimal && isxdigit(*ptr)) for(int i = 0; true; i++) {
|| *ptr == dot_character; i++, ptr++) { c = __scanf_peek(input);
if(!(isdigit(c) ||
(hexadecimal && isxdigit(c)) ||
(c == dot_character && !dot_found))) break;
__scanf_in(input);
/* Allow only one dot in the string, stop at the second one */ if(c == dot_character) {
if(*ptr == dot_character && dot_found) break;
if(*ptr == dot_character) {
dot_found = true; dot_found = true;
continue; continue;
} }
@ -67,12 +68,12 @@ static void parse_digits(char const * restrict *ptr0, bool *valid,
/* Count digits only until SIGNIFICAND_DIGITS */ /* Count digits only until SIGNIFICAND_DIGITS */
if(digits_found < max_digits) { if(digits_found < max_digits) {
if(hexadecimal) { if(hexadecimal) {
int v = *ptr - '0'; int v = c - '0';
if(!isdigit(*ptr)) v = tolower(*ptr)-'a'+10; if(!isdigit(c)) v = tolower(c) - 'a' + 10;
*digits = (*digits << 4) + v; *digits = (*digits << 4) + v;
} }
else { else {
*digits = (*digits * 10) + (*ptr - '0'); *digits = (*digits * 10) + (c - '0');
} }
} }
else (*exponent)++; else (*exponent)++;
@ -80,7 +81,7 @@ static void parse_digits(char const * restrict *ptr0, bool *valid,
if(dot_found) (*exponent)--; if(dot_found) (*exponent)--;
/* But also round at the first discarded one */ /* But also round at the first discarded one */
if(digits_found == max_digits && *ptr >= '5') if(digits_found == max_digits && c >= '5')
(*digits)++; (*digits)++;
digits_found++; digits_found++;
@ -88,46 +89,54 @@ static void parse_digits(char const * restrict *ptr0, bool *valid,
/* Require at least one digit to be present; if not, the whole string /* Require at least one digit to be present; if not, the whole string
is considered invalid */ is considered invalid */
if(!digits_found) { if(!digits_found)
*valid = false; return false;
return;
}
/* In hexadecimal, each character is worth 4 bits of exponent */ /* In hexadecimal, each character is worth 4 bits of exponent */
if(hexadecimal) (*exponent) *= 4; if(hexadecimal) (*exponent) *= 4;
/* Parse exponent */ /* Parse exponent */
if(tolower(*ptr) == exp_character) { if(tolower(__scanf_peek(input)) == exp_character) {
char *end; /* Hack: Restore the str pointer if this fails (which we
long e = strtol(ptr + 1, &end, 10); cannot determine with a single lookahead) so that *endptr is
set correctly */
struct __scanf_input backup = *input;
/* If an integer cannot be parsed, ignore the 'e...' part */ __scanf_in(input);
if(end != ptr + 1) { long e = 0;
ptr = end; if(__strto_int(input, 10, &e, NULL, false) == 0)
*exponent += e; *exponent += e;
} else
*input = backup;
} }
*ptr0 = ptr; return true;
*valid = true;
} }
int __strto_fp(char const * restrict ptr, char ** restrict endptr, double *out, static bool expect(struct __scanf_input *input, char const *sequence)
float *outf, long double *outl)
{ {
/* Save the value of ptr in endptr, in case format is invalid */ for(int i = 0; sequence[i]; i++) {
if(endptr) *endptr = (char *)ptr; int c = __scanf_in(input);
if(tolower(c) != tolower(sequence[i]))
return false;
}
return true;
}
int __strto_fp(struct __scanf_input *input, double *out, float *outf,
long double *outl)
{
/* Skip initial whitespace */ /* Skip initial whitespace */
while(isspace(*ptr)) ptr++; while(isspace(__scanf_peek(input))) __scanf_in(input);
/* Read optional sign */ /* Read optional sign */
bool negative = false; bool negative = false;
if(*ptr == '-') negative = true; int sign = __scanf_peek(input);
if(*ptr == '-' || *ptr == '+') ptr++; if(sign == '-') negative = true;
if(sign == '-' || sign == '+') __scanf_in(input);
int errno_value = 0; int errno_value = 0;
bool valid = true; bool valid = false;
/* Result variable */ /* Result variable */
if(out) *out = 0.0; if(out) *out = 0.0;
@ -135,47 +144,64 @@ int __strto_fp(char const * restrict ptr, char ** restrict endptr, double *out,
if(outl) *outl = 0.0l; if(outl) *outl = 0.0l;
/* NaN possibly with an argument */ /* NaN possibly with an argument */
if(!strncasecmp(ptr, "nan", 3)) { if(tolower(__scanf_peek(input)) == 'n') {
char const *arg = ""; if(!expect(input, "nan"))
ptr += 3; return EINVAL;
if(ptr[0] == '(') {
arg = ptr + 1; /* Get the argument for up to 32 bytes */
do ptr++; char arg[32];
while(ptr[-1] != ')'); int i = 0;
if(__scanf_peek(input) == '(') {
while(i < 31) {
int c = __scanf_in(input);
if(c == ')') break;
arg[i++] = c;
}
arg[i] = 0;
} }
if(out) *out = __builtin_nan(arg); if(out) *out = __builtin_nan(arg);
if(outf) *outf = __builtin_nanf(arg); if(outf) *outf = __builtin_nanf(arg);
if(outl) *outl = __builtin_nanl(arg); if(outl) *outl = __builtin_nanl(arg);
valid = true;
} }
/* Infinity */ else if(tolower(__scanf_peek(input)) == 'i') {
else if(!strncasecmp(ptr, "infinity", 8)) { if(!expect(input, "inf"))
return EINVAL;
if(tolower(__scanf_peek(input)) == 'i' &&
!expect(input, "inity"))
return EINVAL;
if(out) *out = __builtin_inf(); if(out) *out = __builtin_inf();
if(outf) *outf = __builtin_inff(); if(outf) *outf = __builtin_inff();
if(outl) *outl = __builtin_infl(); if(outl) *outl = __builtin_infl();
ptr += 8; valid = true;
}
else if(!strncasecmp(ptr, "inf", 3)) {
if(out) *out = __builtin_inf();
if(outf) *outf = __builtin_inff();
if(outl) *outl = __builtin_infl();
ptr += 3;
} }
else { else {
SIGNIFICAND_TYPE digits = 0; SIGNIFICAND_TYPE digits = 0;
long e = 0; long e = 0;
if(ptr[0] == '0' && tolower(ptr[1]) == 'x') { /* Check for the 0x prefix. Skipping a 0 if we start with 0 but
ptr += 2; not 0x isn't a problem. */
parse_digits(&ptr, &valid, &digits, &e, true); bool hexa = false;
if(__scanf_peek(input) == '0') {
__scanf_in(input);
if(tolower(__scanf_peek(input)) == 'x') {
__scanf_in(input);
hexa = true;
}
/* Count the 0 as a digit */
else valid = true;
}
if(hexa) {
valid |= parse_digits(input, &digits, &e, true);
if(out) *out = (double)digits * exp2(e); if(out) *out = (double)digits * exp2(e);
if(outf) *outf = (float)digits * exp2f(e); if(outf) *outf = (float)digits * exp2f(e);
if(outl) *outl = (long double)digits * exp2l(e); if(outl) *outl = (long double)digits * exp2l(e);
} }
else { else {
parse_digits(&ptr, &valid, &digits, &e, false); valid |= parse_digits(input, &digits, &e, false);
if(out) *out = (double)digits * pow(10, e); if(out) *out = (double)digits * pow(10, e);
if(outf) *outf = (float)digits * powf(10, e); if(outf) *outf = (float)digits * powf(10, e);
if(outl) *outl = (long double)digits * powl(10, e); if(outl) *outl = (long double)digits * powl(10, e);
@ -200,8 +226,5 @@ int __strto_fp(char const * restrict ptr, char ** restrict endptr, double *out,
if(outl) *outl = -(*outl); if(outl) *outl = -(*outl);
} }
/* Save the result pointer */ return valid ? errno_value : EINVAL;
if(endptr && valid) *endptr = (char *)ptr;
return errno_value;
} }

View file

@ -4,19 +4,17 @@
#include <errno.h> #include <errno.h>
#include <limits.h> #include <limits.h>
int __strto_int(char const * restrict ptr, char ** restrict endptr, int base, int __strto_int(struct __scanf_input *input, int base, long *outl,
long *outl, long long *outll, bool use_unsigned) long long *outll, bool use_unsigned)
{ {
/* Save the value of ptr in endptr now in case the format is invalid */
if(endptr) *endptr = (char *)ptr;
/* Skip initial whitespace */ /* Skip initial whitespace */
while(isspace(*ptr)) ptr++; while(isspace(__scanf_peek(input))) __scanf_in(input);
/* Accept a sign character */ /* Accept a sign character */
bool negative = false; bool negative = false;
if(*ptr == '-') negative = true; int sign = __scanf_peek(input);
if(*ptr == '-' || *ptr == '+') ptr++; if(sign == '-') negative = true;
if(sign == '-' || sign == '+') __scanf_in(input);
/* Use unsigned variables as only these have defined overflow */ /* Use unsigned variables as only these have defined overflow */
unsigned long xl = 0; unsigned long xl = 0;
@ -26,29 +24,34 @@ int __strto_int(char const * restrict ptr, char ** restrict endptr, int base,
bool valid = false; bool valid = false;
/* Read prefixes and determine base */ /* Read prefixes and determine base */
if((base == 0 || base == 16) && ptr[0]=='0' && tolower(ptr[1])=='x') { if(__scanf_peek(input) == '0') {
ptr += 2; __scanf_in(input);
base = 16; if((base == 0 || base == 16) &&
tolower(__scanf_peek(input)) == 'x') {
__scanf_in(input);
base = 16;
}
/* If we don't consume the x then count the 0 as a digit */
else valid = true;
if(base == 0)
base = 8;
} }
else if(base == 0 && ptr[0] == '0') { if(base == 0)
ptr++;
base = 8;
}
else if(base == 0) {
base = 10; base = 10;
}
/* Read digits */ /* Read digits */
while(1) { while(1) {
int v = -1; int v = -1;
if(isdigit(*ptr)) v = *ptr - '0'; int c = __scanf_peek(input);
if(islower(*ptr)) v = *ptr - 'a' + 10; if(isdigit(c)) v = c - '0';
if(islower(c)) v = c - 'a' + 10;
if(v == -1 || v >= base) break; if(v == -1 || v >= base) break;
/* The value is valid as long as there is at least one digit */ /* The value is valid as long as there is at least one digit */
valid = true; valid = true;
/* (x = base*x + v) but with overflow checks */ /* (x = base*x + v) but with overflow checks */
/* TODO: strto_int: We might fail to represent [L]LONG_MIN */
if(outl) { if(outl) {
if(__builtin_umull_overflow(xl, base, &xl)) if(__builtin_umull_overflow(xl, base, &xl))
errno_value = ERANGE; errno_value = ERANGE;
@ -62,7 +65,7 @@ int __strto_int(char const * restrict ptr, char ** restrict endptr, int base,
errno_value = ERANGE; errno_value = ERANGE;
} }
ptr++; __scanf_in(input);
} }
/* Handle sign and range */ /* Handle sign and range */
@ -101,6 +104,6 @@ int __strto_int(char const * restrict ptr, char ** restrict endptr, int base,
if(outl) *outl = xl; if(outl) *outl = xl;
if(outll) *outll = xll; if(outll) *outll = xll;
if(endptr && valid) *endptr = (char *)ptr;
return errno_value; return valid ? errno_value : EINVAL;
} }

View file

@ -4,7 +4,17 @@
double strtod(char const * restrict ptr, char ** restrict endptr) double strtod(char const * restrict ptr, char ** restrict endptr)
{ {
double d = 0; double d = 0;
int err = __strto_fp(ptr, endptr, &d, NULL, NULL); if(endptr)
if(err != 0) errno = err; *endptr = (char *)ptr;
struct __scanf_input in = { .str = ptr, .fp = NULL };
__scanf_start(&in);
int err = __strto_fp(&in, &d, NULL, NULL);
__scanf_end(&in);
if(err != 0)
errno = err;
if(err != EINVAL && endptr)
*endptr = (char *)in.str;
return d; return d;
} }

View file

@ -4,7 +4,17 @@
float strtof(char const * restrict ptr, char ** restrict endptr) float strtof(char const * restrict ptr, char ** restrict endptr)
{ {
float f = 0; float f = 0;
int err = __strto_fp(ptr, endptr, NULL, &f, NULL); if(endptr)
if(err != 0) errno = err; *endptr = (char *)ptr;
struct __scanf_input in = { .str = ptr, .fp = NULL };
__scanf_start(&in);
int err = __strto_fp(&in, NULL, &f, NULL);
__scanf_end(&in);
if(err != 0)
errno = err;
if(err != EINVAL && endptr)
*endptr = (char *)in.str;
return f; return f;
} }

View file

@ -4,7 +4,17 @@
long int strtol(char const * restrict ptr, char ** restrict endptr, int base) long int strtol(char const * restrict ptr, char ** restrict endptr, int base)
{ {
long n = 0; long n = 0;
int err = __strto_int(ptr, endptr, base, &n, NULL, false); if(endptr)
if(err != 0) errno = err; *endptr = (char *)ptr;
struct __scanf_input in = { .str = ptr, .fp = NULL };
__scanf_start(&in);
int err = __strto_int(&in, base, &n, NULL, false);
__scanf_end(&in);
if(err != 0)
errno = err;
if(err != EINVAL && endptr)
*endptr = (char *)in.str;
return n; return n;
} }

View file

@ -4,7 +4,17 @@
long double strtold(char const * restrict ptr, char ** restrict endptr) long double strtold(char const * restrict ptr, char ** restrict endptr)
{ {
long double ld = 0; long double ld = 0;
int err = __strto_fp(ptr, endptr, NULL, NULL, &ld); if(endptr)
if(err != 0) errno = err; *endptr = (char *)ptr;
struct __scanf_input in = { .str = ptr, .fp = NULL };
__scanf_start(&in);
int err = __strto_fp(&in, NULL, NULL, &ld);
__scanf_end(&in);
if(err != 0)
errno = err;
if(err != EINVAL && endptr)
*endptr = (char *)in.str;
return ld; return ld;
} }

View file

@ -5,7 +5,17 @@ long long int strtoll(char const * restrict ptr, char ** restrict endptr,
int base) int base)
{ {
long long n = 0; long long n = 0;
int err = __strto_int(ptr, endptr, base, NULL, &n, false); if(endptr)
if(err != 0) errno = err; *endptr = (char *)ptr;
struct __scanf_input in = { .str = ptr, .fp = NULL };
__scanf_start(&in);
int err = __strto_int(&in, base, NULL, &n, false);
__scanf_end(&in);
if(err != 0)
errno = err;
if(err != EINVAL && endptr)
*endptr = (char *)in.str;
return n; return n;
} }

View file

@ -5,7 +5,17 @@ unsigned long int strtoul(char const * restrict ptr, char ** restrict endptr,
int base) int base)
{ {
unsigned long n = 0; unsigned long n = 0;
int err = __strto_int(ptr, endptr, base, (long *)&n, NULL, true); if(endptr)
if(err != 0) errno = err; *endptr = (char *)ptr;
struct __scanf_input in = { .str = ptr, .fp = NULL };
__scanf_start(&in);
int err = __strto_int(&in, base, (long *)&n, NULL, true);
__scanf_end(&in);
if(err != 0)
errno = err;
if(err != EINVAL && endptr)
*endptr = (char *)in.str;
return n; return n;
} }

View file

@ -5,7 +5,17 @@ unsigned long long int strtoull(char const * restrict ptr,
char ** restrict endptr, int base) char ** restrict endptr, int base)
{ {
unsigned long long n = 0; unsigned long long n = 0;
int err = __strto_int(ptr, endptr, base, NULL, (long long *)&n, true); if(endptr)
if(err != 0) errno = err; *endptr = (char *)ptr;
struct __scanf_input in = { .str = ptr, .fp = NULL };
__scanf_start(&in);
int err = __strto_int(&in, base, NULL, (long long *)&n, true);
__scanf_end(&in);
if(err != 0)
errno = err;
if(err != EINVAL && endptr)
*endptr = (char *)in.str;
return n; return n;
} }