stdlib: scanf-friendly strto* functions

2025-07-16 17:17:33 +02:00 · 2022-08-22 19:00:13 +02:00 · 2022-08-22 19:00:13 +02:00 · 26e54af8e0
commit 26e54af8e0
parent fda0d950ed
14 changed files with 288 additions and 116 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -140,6 +140,7 @@ set(SOURCES
  src/stdio/puts.c
  src/stdio/remove.c
  src/stdio/rewind.c
  src/stdio/scanf/scan.c
  src/stdio/setbuf.c
  src/stdio/setvbuf.c
  src/stdio/snprintf.c
--- a/include/fxlibc/printf.h
+++ b/include/fxlibc/printf.h
@ -75,7 +75,6 @@ extern int __printf(
 	va_list *__args);
 /* Format extension API. */
 struct __printf_format {
@ -84,12 +83,8 @@ struct __printf_format {
 	/* How much significant characters of data, meaning varies. */
 	int16_t precision;
-	/*
+	/* Size of targeted integer type (%o, %x, %i, %d, %u), in bytes */
 	** Size specifier for integers (%o, %x, %i, %d, %u), is equal to the
 	** sizeof() of the targeted type. Also used for %lc.
 	*/
 	uint8_t size;
 	/* (#) Alternative form: base prefixes, decimal point. */
 	uint8_t alternative :1;
 	/* ( ) Add a blank sign before nonnegative numbers. */
@ -111,15 +106,14 @@ struct __printf_format {
 /*
 ** Type of format functions.
-** -> __spec is the specifier letter (eg. "d" in "%d")
+** -> __out specifies the output and is used when generating text
-** -> __opts are the length, precision, sign, alignment, etc. options
+** -> __fmt contains the format options and specifier letter
 ** -> __args is a pointer to the variable list of arguments to read from
 */
 typedef void __printf_formatter_t(
 	struct __printf_output *__out,
-	struct __printf_format *__opts,
+	struct __printf_format *__fmt,
-	va_list *__args
+	va_list *__args);
 );
 /*
 ** Register a new format.
@ -127,10 +121,10 @@ typedef void __printf_formatter_t(
 ** The formatter designated by the specified lowercase or uppercase letter
 ** (eg 'p' or 'P') is registered. This functions allows overriding default
 ** formatters, but this is very much discouraged. Letters with special meaning
-** in the standard cannot be changed. A formatted can be removed of disabled by
+** in the standard cannot be changed. A formatter can be removed of disabled by
 ** registering NULL.
 **
-** Here are used characters in the C standard:
+** Here are the characters used/reserved in the C standard:
 **
 ** a: Hexadecimal floating-point      A: Hexadecimal floating-point
 ** b: _                               B: _
@ -138,7 +132,7 @@ typedef void __printf_formatter_t(
 ** d: Decimal integer                 D: _
 ** e: Exponent floating-point         E: Exponent floating-point
 ** f: Floating-point                  F: Floating-point
-** g: General floating-point          G: General: floating-point
+** g: General floating-point          G: General floating-point
 ** h: short or char size              H: _
 ** i: Integer                         I: Locale-aware digits
 ** j: intmax_t size                   J: _
--- a/src/stdio/scanf/scan.c
+++ b/src/stdio/scanf/scan.c
@ -0,0 +1,37 @@
 #include <stdio.h>
 #include "../stdio_p.h"
 #include "../../stdlib/stdlib_p.h"
 void __scanf_start(struct __scanf_input *__in)
 {
    if(__in->fp)
        __in->buffer = fgetc(__in->fp);
    else {
        __in->buffer = *__in->str;
        __in->str += (__in->buffer != 0);
    }
 }
 int __scanf_fetch(struct __scanf_input *__in)
 {
 	if(__in->fp)
 		return fgetc(__in->fp);
 	int c = *__in->str;
 	if(c == 0)
 		return EOF;
 	__in->str++;
 	return c;
 }
 void __scanf_end(struct __scanf_input *__in)
 {
 	if(__in->buffer == EOF)
 		return;
 	if(__in->fp)
 		ungetc(__in->buffer, __in->fp);
 	else
 		__in->str--;
 }
--- a/src/stdio/stdio_p.h
+++ b/src/stdio/stdio_p.h
@ -0,0 +1,45 @@
 #ifndef __STDIO_P_H__
 # define __STDIO_P_H__
 #include <stdio.h>
 /*
 ** General utilities for scanf(); we expose them here as we use subfunctions of
 ** strto*() from <stdlib.h> to implement numerical specifiers.
 */
 /*
 ** Input for scanf; exactly one of str and fp must be non-NULL. We include a
 ** single-character buffer for convenience for scanning functions to test the
 ** next character, which can be flushed back by ungetc().
 */
 struct __scanf_input {
 	char const * __restrict__ str;
 	FILE *fp;
 	int buffer;
 };
 /* Initialize the input by feeding the buffer byte. */
 void __scanf_start(struct __scanf_input *__in);
 /* Fetch the next byte from the input and return it (don't call directly). */
 int __scanf_fetch(struct __scanf_input *__in);
 /* Read the next byte while maintaining the buffer. */
 static inline int __scanf_in(struct __scanf_input *__in)
 {
 	int c = __in->buffer;
 	__in->buffer = __scanf_fetch(__in);
 	return c;
 }
 /* Peek the next byte without advancing. */
 static inline int __scanf_peek(struct __scanf_input *__in)
 {
 	return __in->buffer;
 }
 /* Close the input by unsending the buffer once finished. */
 void __scanf_end(struct __scanf_input *__in);
 #endif /* __STDIO_P_H__ */
--- a/src/stdlib/stdlib_p.h
+++ b/src/stdlib/stdlib_p.h
@ -3,6 +3,7 @@
 #include <stdlib.h>
 #include <stdbool.h>
 #include "../stdio/stdio_p.h"
 /*
 ** Parse an integer from a string. This is the base function for strtol,
@ -23,8 +24,7 @@
 ** expensive.
 */
 int __strto_int(
-	char const * restrict __ptr,
+	struct __scanf_input *__input,
 	char ** restrict __endptr,
 	int __base,
 	long *__outl,
 	long long *__outll,
@ -39,10 +39,9 @@ int __strto_int(
 ** and outl is set.
 */
 int __strto_fp(
-   char const * restrict __ptr,
+	struct __scanf_input *__input,
-   char ** restrict __endptr,
+	double *__out,
-   double *__out,
+	float *__outf,
-   float *__outf,
+	long double *__outl);
   long double *__outl);
 #endif /*__STDLIB_P_H__*/
--- a/src/stdlib/strto_fp.c
+++ b/src/stdlib/strto_fp.c
@ -1,3 +1,4 @@
 #include "stdlib_p.h"
 #include <stdlib.h>
 #include <stdbool.h>
@ -37,12 +38,11 @@
 ** -> In hexadecimal notation, we read as many bits as the mantissa of a long
 **    double, then later multiply by a power of 2. There are no approximations.
 */
-static void parse_digits(char const * restrict *ptr0, bool *valid,
+static bool parse_digits(struct __scanf_input *input,
 	SIGNIFICAND_TYPE *digits, long *exponent, bool hexadecimal)
 {
 	char const *ptr = *ptr0;
 	bool dot_found = false;
-	int digits_found = 0;
+	int digits_found=0, c=0;
 	*digits = 0;
 	*exponent = 0;
@ -53,13 +53,14 @@ static void parse_digits(char const * restrict *ptr0, bool *valid,
 	int dot_character = '.';
 	int exp_character = (hexadecimal ? 'p' : 'e');
-	for(int i = 0; isdigit(*ptr) || (hexadecimal && isxdigit(*ptr))
+	for(int i = 0; true; i++) {
-		|| *ptr == dot_character; i++, ptr++) {
+		c = __scanf_peek(input);
 		if(!(isdigit(c) ||
 		    (hexadecimal && isxdigit(c)) ||
 		    (c == dot_character && !dot_found))) break;
 		__scanf_in(input);
-		/* Allow only one dot in the string, stop at the second one */
+		if(c == dot_character) {
 		if(*ptr == dot_character && dot_found) break;
 		if(*ptr == dot_character) {
 			dot_found = true;
 			continue;
 		}
@ -67,12 +68,12 @@ static void parse_digits(char const * restrict *ptr0, bool *valid,
 		/* Count digits only until SIGNIFICAND_DIGITS */
 		if(digits_found < max_digits) {
 			if(hexadecimal) {
-				int v = *ptr - '0';
+				int v = c - '0';
-				if(!isdigit(*ptr)) v = tolower(*ptr)-'a'+10;
+				if(!isdigit(c)) v = tolower(c) - 'a' + 10;
 				*digits = (*digits << 4) + v;
 			}
 			else {
-				*digits = (*digits * 10) + (*ptr - '0');
+				*digits = (*digits * 10) + (c - '0');
 			}
 		}
 		else (*exponent)++;
@ -80,7 +81,7 @@ static void parse_digits(char const * restrict *ptr0, bool *valid,
 		if(dot_found) (*exponent)--;
 		/* But also round at the first discarded one */
-		if(digits_found == max_digits && *ptr >= '5')
+		if(digits_found == max_digits && c >= '5')
 			(*digits)++;
 		digits_found++;
@ -88,46 +89,54 @@ static void parse_digits(char const * restrict *ptr0, bool *valid,
 	/* Require at least one digit to be present; if not, the whole string
 	   is considered invalid */
-	if(!digits_found) {
+	if(!digits_found)
-		*valid = false;
+		return false;
 		return;
 	}
 	/* In hexadecimal, each character is worth 4 bits of exponent */
 	if(hexadecimal) (*exponent) *= 4;
 	/* Parse exponent */
-	if(tolower(*ptr) == exp_character) {
+	if(tolower(__scanf_peek(input)) == exp_character) {
-		char *end;
+		/* Hack: Restore the str pointer if this fails (which we
-		long e = strtol(ptr + 1, &end, 10);
+		   cannot determine with a single lookahead) so that *endptr is
 		   set correctly */
 		struct __scanf_input backup = *input;
-		/* If an integer cannot be parsed, ignore the 'e...' part */
+		__scanf_in(input);
-		if(end != ptr + 1) {
+		long e = 0;
-			ptr = end;
+		if(__strto_int(input, 10, &e, NULL, false) == 0)
 			*exponent += e;
-		}
+		else
 			*input = backup;
 	}
-	*ptr0 = ptr;
+	return true;
 	*valid = true;
 }
-int __strto_fp(char const * restrict ptr, char ** restrict endptr, double *out,
+static bool expect(struct __scanf_input *input, char const *sequence)
 	float *outf, long double *outl)
 {
-	/* Save the value of ptr in endptr, in case format is invalid */
+	for(int i = 0; sequence[i]; i++) {
-	if(endptr) *endptr = (char *)ptr;
+		int c = __scanf_in(input);
 		if(tolower(c) != tolower(sequence[i]))
 			return false;
 	}
 	return true;
 }
 int __strto_fp(struct __scanf_input *input, double *out, float *outf,
 	long double *outl)
 {
 	/* Skip initial whitespace */
-	while(isspace(*ptr)) ptr++;
+	while(isspace(__scanf_peek(input))) __scanf_in(input);
 	/* Read optional sign */
 	bool negative = false;
-	if(*ptr == '-') negative = true;
+	int sign = __scanf_peek(input);
-	if(*ptr == '-' || *ptr == '+') ptr++;
+	if(sign == '-') negative = true;
 	if(sign == '-' || sign == '+') __scanf_in(input);
 	int errno_value = 0;
-	bool valid = true;
+	bool valid = false;
 	/* Result variable */
 	if(out)  *out = 0.0;
@ -135,47 +144,64 @@ int __strto_fp(char const * restrict ptr, char ** restrict endptr, double *out,
 	if(outl) *outl = 0.0l;
 	/* NaN possibly with an argument */
-	if(!strncasecmp(ptr, "nan", 3)) {
+	if(tolower(__scanf_peek(input)) == 'n') {
-		char const *arg = "";
+		if(!expect(input, "nan"))
-		ptr += 3;
+			return EINVAL;
-		if(ptr[0] == '(') {
+
-			arg = ptr + 1;
+		/* Get the argument for up to 32 bytes */
-			do ptr++;
+		char arg[32];
-			while(ptr[-1] != ')');
+		int i = 0;
 		if(__scanf_peek(input) == '(') {
 			while(i < 31) {
 				int c = __scanf_in(input);
 				if(c == ')') break;
 				arg[i++] = c;
 			}
 			arg[i] = 0;
 		}
 		if(out)  *out  = __builtin_nan(arg);
 		if(outf) *outf = __builtin_nanf(arg);
 		if(outl) *outl = __builtin_nanl(arg);
 		valid = true;
 	}
-	/* Infinity */
+	else if(tolower(__scanf_peek(input)) == 'i') {
-	else if(!strncasecmp(ptr, "infinity", 8)) {
+		if(!expect(input, "inf"))
 			return EINVAL;
 		if(tolower(__scanf_peek(input)) == 'i' &&
 		   !expect(input, "inity"))
 			return EINVAL;
 		if(out)  *out  = __builtin_inf();
 		if(outf) *outf = __builtin_inff();
 		if(outl) *outl = __builtin_infl();
-		ptr += 8;
+		valid = true;
 	}
 	else if(!strncasecmp(ptr, "inf", 3)) {
 		if(out)  *out  = __builtin_inf();
 		if(outf) *outf = __builtin_inff();
 		if(outl) *outl = __builtin_infl();
 		ptr += 3;
 	}
 	else {
 		SIGNIFICAND_TYPE digits = 0;
 		long e = 0;
-		if(ptr[0] == '0' && tolower(ptr[1]) == 'x') {
+		/* Check for the 0x prefix. Skipping a 0 if we start with 0 but
-			ptr += 2;
+		   not 0x isn't a problem. */
-			parse_digits(&ptr, &valid, &digits, &e, true);
+		bool hexa = false;
 		if(__scanf_peek(input) == '0') {
 			__scanf_in(input);
 			if(tolower(__scanf_peek(input)) == 'x') {
 				__scanf_in(input);
 				hexa = true;
 			}
 			/* Count the 0 as a digit */
 			else valid = true;
 		}
 		if(hexa) {
 			valid |= parse_digits(input, &digits, &e, true);
 			if(out)  *out  = (double)digits * exp2(e);
 			if(outf) *outf = (float)digits * exp2f(e);
 			if(outl) *outl = (long double)digits * exp2l(e);
 		}
 		else {
-			parse_digits(&ptr, &valid, &digits, &e, false);
+			valid |= parse_digits(input, &digits, &e, false);
 			if(out)  *out  = (double)digits * pow(10, e);
 			if(outf) *outf = (float)digits * powf(10, e);
 			if(outl) *outl = (long double)digits * powl(10, e);
@ -200,8 +226,5 @@ int __strto_fp(char const * restrict ptr, char ** restrict endptr, double *out,
 		if(outl) *outl = -(*outl);
 	}
-	/* Save the result pointer */
+	return valid ? errno_value : EINVAL;
 	if(endptr && valid) *endptr = (char *)ptr;
 	return errno_value;
 }
--- a/src/stdlib/strto_int.c
+++ b/src/stdlib/strto_int.c
@ -4,19 +4,17 @@
 #include <errno.h>
 #include <limits.h>
-int __strto_int(char const * restrict ptr, char ** restrict endptr, int base,
+int __strto_int(struct __scanf_input *input, int base, long *outl,
-	long *outl, long long *outll, bool use_unsigned)
+	long long *outll, bool use_unsigned)
 {
 	/* Save the value of ptr in endptr now in case the format is invalid */
 	if(endptr) *endptr = (char *)ptr;
 	/* Skip initial whitespace */
-	while(isspace(*ptr)) ptr++;
+	while(isspace(__scanf_peek(input))) __scanf_in(input);
 	/* Accept a sign character */
 	bool negative = false;
-	if(*ptr == '-') negative = true;
+	int sign = __scanf_peek(input);
-	if(*ptr == '-' || *ptr == '+') ptr++;
+	if(sign == '-') negative = true;
 	if(sign == '-' || sign == '+') __scanf_in(input);
 	/* Use unsigned variables as only these have defined overflow */
 	unsigned long xl = 0;
@ -26,29 +24,34 @@ int __strto_int(char const * restrict ptr, char ** restrict endptr, int base,
 	bool valid = false;
 	/* Read prefixes and determine base */
-	if((base == 0 || base == 16) && ptr[0]=='0' && tolower(ptr[1])=='x') {
+	if(__scanf_peek(input) == '0') {
-		ptr += 2;
+		__scanf_in(input);
-		base = 16;
+		if((base == 0 || base == 16) &&
 		   tolower(__scanf_peek(input)) == 'x') {
 			__scanf_in(input);
 			base = 16;
 		}
 		/* If we don't consume the x then count the 0 as a digit */
 		else valid = true;
 		if(base == 0)
 			base = 8;
 	}
-	else if(base == 0 && ptr[0] == '0') {
+	if(base == 0)
 		ptr++;
 		base = 8;
 	}
 	else if(base == 0) {
 		base = 10;
 	}
 	/* Read digits */
 	while(1) {
 		int v = -1;
-		if(isdigit(*ptr)) v = *ptr - '0';
+		int c = __scanf_peek(input);
-		if(islower(*ptr)) v = *ptr - 'a' + 10;
+		if(isdigit(c)) v = c - '0';
 		if(islower(c)) v = c - 'a' + 10;
 		if(v == -1 || v >= base) break;
 		/* The value is valid as long as there is at least one digit */
 		valid = true;
 		/* (x = base*x + v) but with overflow checks */
 		/* TODO: strto_int: We might fail to represent [L]LONG_MIN */
 		if(outl) {
 			if(__builtin_umull_overflow(xl, base, &xl))
 				errno_value = ERANGE;
@ -62,7 +65,7 @@ int __strto_int(char const * restrict ptr, char ** restrict endptr, int base,
 				errno_value = ERANGE;
 		}
-		ptr++;
+		__scanf_in(input);
 	}
 	/* Handle sign and range */
@ -101,6 +104,6 @@ int __strto_int(char const * restrict ptr, char ** restrict endptr, int base,
 	if(outl) *outl = xl;
 	if(outll) *outll = xll;
-	if(endptr && valid) *endptr = (char *)ptr;
+
-	return errno_value;
+	return valid ? errno_value : EINVAL;
 }
--- a/src/stdlib/strtod.c
+++ b/src/stdlib/strtod.c
@ -4,7 +4,17 @@
 double strtod(char const * restrict ptr, char ** restrict endptr)
 {
 	double d = 0;
-	int err = __strto_fp(ptr, endptr, &d, NULL, NULL);
+	if(endptr)
-	if(err != 0) errno = err;
+		*endptr = (char *)ptr;
 	struct __scanf_input in = { .str = ptr, .fp = NULL };
 	__scanf_start(&in);
 	int err = __strto_fp(&in, &d, NULL, NULL);
 	__scanf_end(&in);
 	if(err != 0)
 		errno = err;
 	if(err != EINVAL && endptr)
 		*endptr = (char *)in.str;
 	return d;
 }
--- a/src/stdlib/strtof.c
+++ b/src/stdlib/strtof.c
@ -4,7 +4,17 @@
 float strtof(char const * restrict ptr, char ** restrict endptr)
 {
 	float f = 0;
-	int err = __strto_fp(ptr, endptr, NULL, &f, NULL);
+	if(endptr)
-	if(err != 0) errno = err;
+		*endptr = (char *)ptr;
 	struct __scanf_input in = { .str = ptr, .fp = NULL };
 	__scanf_start(&in);
 	int err = __strto_fp(&in, NULL, &f, NULL);
 	__scanf_end(&in);
 	if(err != 0)
 		errno = err;
 	if(err != EINVAL && endptr)
 		*endptr = (char *)in.str;
 	return f;
 }
--- a/src/stdlib/strtol.c
+++ b/src/stdlib/strtol.c
@ -4,7 +4,17 @@
 long int strtol(char const * restrict ptr, char ** restrict endptr, int base)
 {
 	long n = 0;
-	int err = __strto_int(ptr, endptr, base, &n, NULL, false);
+	if(endptr)
-	if(err != 0) errno = err;
+		*endptr = (char *)ptr;
 	struct __scanf_input in = { .str = ptr, .fp = NULL };
 	__scanf_start(&in);
 	int err = __strto_int(&in, base, &n, NULL, false);
 	__scanf_end(&in);
 	if(err != 0)
 		errno = err;
 	if(err != EINVAL && endptr)
 		*endptr = (char *)in.str;
 	return n;
 }
--- a/src/stdlib/strtold.c
+++ b/src/stdlib/strtold.c
@ -4,7 +4,17 @@
 long double strtold(char const * restrict ptr, char ** restrict endptr)
 {
 	long double ld = 0;
-	int err = __strto_fp(ptr, endptr, NULL, NULL, &ld);
+	if(endptr)
-	if(err != 0) errno = err;
+		*endptr = (char *)ptr;
 	struct __scanf_input in = { .str = ptr, .fp = NULL };
 	__scanf_start(&in);
 	int err = __strto_fp(&in, NULL, NULL, &ld);
 	__scanf_end(&in);
 	if(err != 0)
 		errno = err;
 	if(err != EINVAL && endptr)
 		*endptr = (char *)in.str;
 	return ld;
 }
--- a/src/stdlib/strtoll.c
+++ b/src/stdlib/strtoll.c
@ -5,7 +5,17 @@ long long int strtoll(char const * restrict ptr, char ** restrict endptr,
 	int base)
 {
 	long long n = 0;
-	int err = __strto_int(ptr, endptr, base, NULL, &n, false);
+	if(endptr)
-	if(err != 0) errno = err;
+		*endptr = (char *)ptr;
 	struct __scanf_input in = { .str = ptr, .fp = NULL };
 	__scanf_start(&in);
 	int err = __strto_int(&in, base, NULL, &n, false);
 	__scanf_end(&in);
 	if(err != 0)
 		errno = err;
 	if(err != EINVAL && endptr)
 		*endptr = (char *)in.str;
 	return n;
 }
--- a/src/stdlib/strtoul.c
+++ b/src/stdlib/strtoul.c
@ -5,7 +5,17 @@ unsigned long int strtoul(char const * restrict ptr, char ** restrict endptr,
 	int base)
 {
 	unsigned long n = 0;
-	int err = __strto_int(ptr, endptr, base, (long *)&n, NULL, true);
+	if(endptr)
-	if(err != 0) errno = err;
+		*endptr = (char *)ptr;
 	struct __scanf_input in = { .str = ptr, .fp = NULL };
 	__scanf_start(&in);
 	int err = __strto_int(&in, base, (long *)&n, NULL, true);
 	__scanf_end(&in);
 	if(err != 0)
 		errno = err;
 	if(err != EINVAL && endptr)
 		*endptr = (char *)in.str;
 	return n;
 }
--- a/src/stdlib/strtoull.c
+++ b/src/stdlib/strtoull.c
@ -5,7 +5,17 @@ unsigned long long int strtoull(char const * restrict ptr,
 	char ** restrict endptr, int base)
 {
 	unsigned long long n = 0;
-	int err = __strto_int(ptr, endptr, base, NULL, (long long *)&n, true);
+	if(endptr)
-	if(err != 0) errno = err;
+		*endptr = (char *)ptr;
 	struct __scanf_input in = { .str = ptr, .fp = NULL };
 	__scanf_start(&in);
 	int err = __strto_int(&in, base, NULL, (long long *)&n, true);
 	__scanf_end(&in);
 	if(err != 0)
 		errno = err;
 	if(err != EINVAL && endptr)
 		*endptr = (char *)in.str;
 	return n;
 }