stdlib: scanf-friendly strto* functions

This commit is contained in:
Lephenixnoir 2022-08-22 19:00:13 +02:00
parent fda0d950ed
commit 26e54af8e0
No known key found for this signature in database
GPG key ID: 1BBA026E13FC0495
14 changed files with 288 additions and 116 deletions

View file

@ -140,6 +140,7 @@ set(SOURCES
src/stdio/puts.c
src/stdio/remove.c
src/stdio/rewind.c
src/stdio/scanf/scan.c
src/stdio/setbuf.c
src/stdio/setvbuf.c
src/stdio/snprintf.c

View file

@ -75,7 +75,6 @@ extern int __printf(
va_list *__args);
/* Format extension API. */
struct __printf_format {
@ -84,12 +83,8 @@ struct __printf_format {
/* How much significant characters of data, meaning varies. */
int16_t precision;
/*
** Size specifier for integers (%o, %x, %i, %d, %u), is equal to the
** sizeof() of the targeted type. Also used for %lc.
*/
/* Size of targeted integer type (%o, %x, %i, %d, %u), in bytes */
uint8_t size;
/* (#) Alternative form: base prefixes, decimal point. */
uint8_t alternative :1;
/* ( ) Add a blank sign before nonnegative numbers. */
@ -111,15 +106,14 @@ struct __printf_format {
/*
** Type of format functions.
** -> __spec is the specifier letter (eg. "d" in "%d")
** -> __opts are the length, precision, sign, alignment, etc. options
** -> __out specifies the output and is used when generating text
** -> __fmt contains the format options and specifier letter
** -> __args is a pointer to the variable list of arguments to read from
*/
typedef void __printf_formatter_t(
struct __printf_output *__out,
struct __printf_format *__opts,
va_list *__args
);
struct __printf_format *__fmt,
va_list *__args);
/*
** Register a new format.
@ -127,10 +121,10 @@ typedef void __printf_formatter_t(
** The formatter designated by the specified lowercase or uppercase letter
** (eg 'p' or 'P') is registered. This functions allows overriding default
** formatters, but this is very much discouraged. Letters with special meaning
** in the standard cannot be changed. A formatted can be removed of disabled by
** in the standard cannot be changed. A formatter can be removed of disabled by
** registering NULL.
**
** Here are used characters in the C standard:
** Here are the characters used/reserved in the C standard:
**
** a: Hexadecimal floating-point A: Hexadecimal floating-point
** b: _ B: _
@ -138,7 +132,7 @@ typedef void __printf_formatter_t(
** d: Decimal integer D: _
** e: Exponent floating-point E: Exponent floating-point
** f: Floating-point F: Floating-point
** g: General floating-point G: General: floating-point
** g: General floating-point G: General floating-point
** h: short or char size H: _
** i: Integer I: Locale-aware digits
** j: intmax_t size J: _

37
src/stdio/scanf/scan.c Normal file
View file

@ -0,0 +1,37 @@
#include <stdio.h>
#include "../stdio_p.h"
#include "../../stdlib/stdlib_p.h"
void __scanf_start(struct __scanf_input *__in)
{
if(__in->fp)
__in->buffer = fgetc(__in->fp);
else {
__in->buffer = *__in->str;
__in->str += (__in->buffer != 0);
}
}
int __scanf_fetch(struct __scanf_input *__in)
{
if(__in->fp)
return fgetc(__in->fp);
int c = *__in->str;
if(c == 0)
return EOF;
__in->str++;
return c;
}
void __scanf_end(struct __scanf_input *__in)
{
if(__in->buffer == EOF)
return;
if(__in->fp)
ungetc(__in->buffer, __in->fp);
else
__in->str--;
}

45
src/stdio/stdio_p.h Normal file
View file

@ -0,0 +1,45 @@
#ifndef __STDIO_P_H__
# define __STDIO_P_H__
#include <stdio.h>
/*
** General utilities for scanf(); we expose them here as we use subfunctions of
** strto*() from <stdlib.h> to implement numerical specifiers.
*/
/*
** Input for scanf; exactly one of str and fp must be non-NULL. We include a
** single-character buffer for convenience for scanning functions to test the
** next character, which can be flushed back by ungetc().
*/
struct __scanf_input {
char const * __restrict__ str;
FILE *fp;
int buffer;
};
/* Initialize the input by feeding the buffer byte. */
void __scanf_start(struct __scanf_input *__in);
/* Fetch the next byte from the input and return it (don't call directly). */
int __scanf_fetch(struct __scanf_input *__in);
/* Read the next byte while maintaining the buffer. */
static inline int __scanf_in(struct __scanf_input *__in)
{
int c = __in->buffer;
__in->buffer = __scanf_fetch(__in);
return c;
}
/* Peek the next byte without advancing. */
static inline int __scanf_peek(struct __scanf_input *__in)
{
return __in->buffer;
}
/* Close the input by unsending the buffer once finished. */
void __scanf_end(struct __scanf_input *__in);
#endif /* __STDIO_P_H__ */

View file

@ -3,6 +3,7 @@
#include <stdlib.h>
#include <stdbool.h>
#include "../stdio/stdio_p.h"
/*
** Parse an integer from a string. This is the base function for strtol,
@ -23,8 +24,7 @@
** expensive.
*/
int __strto_int(
char const * restrict __ptr,
char ** restrict __endptr,
struct __scanf_input *__input,
int __base,
long *__outl,
long long *__outll,
@ -39,8 +39,7 @@ int __strto_int(
** and outl is set.
*/
int __strto_fp(
char const * restrict __ptr,
char ** restrict __endptr,
struct __scanf_input *__input,
double *__out,
float *__outf,
long double *__outl);

View file

@ -1,3 +1,4 @@
#include "stdlib_p.h"
#include <stdlib.h>
#include <stdbool.h>
@ -37,12 +38,11 @@
** -> In hexadecimal notation, we read as many bits as the mantissa of a long
** double, then later multiply by a power of 2. There are no approximations.
*/
static void parse_digits(char const * restrict *ptr0, bool *valid,
static bool parse_digits(struct __scanf_input *input,
SIGNIFICAND_TYPE *digits, long *exponent, bool hexadecimal)
{
char const *ptr = *ptr0;
bool dot_found = false;
int digits_found = 0;
int digits_found=0, c=0;
*digits = 0;
*exponent = 0;
@ -53,13 +53,14 @@ static void parse_digits(char const * restrict *ptr0, bool *valid,
int dot_character = '.';
int exp_character = (hexadecimal ? 'p' : 'e');
for(int i = 0; isdigit(*ptr) || (hexadecimal && isxdigit(*ptr))
|| *ptr == dot_character; i++, ptr++) {
for(int i = 0; true; i++) {
c = __scanf_peek(input);
if(!(isdigit(c) ||
(hexadecimal && isxdigit(c)) ||
(c == dot_character && !dot_found))) break;
__scanf_in(input);
/* Allow only one dot in the string, stop at the second one */
if(*ptr == dot_character && dot_found) break;
if(*ptr == dot_character) {
if(c == dot_character) {
dot_found = true;
continue;
}
@ -67,12 +68,12 @@ static void parse_digits(char const * restrict *ptr0, bool *valid,
/* Count digits only until SIGNIFICAND_DIGITS */
if(digits_found < max_digits) {
if(hexadecimal) {
int v = *ptr - '0';
if(!isdigit(*ptr)) v = tolower(*ptr)-'a'+10;
int v = c - '0';
if(!isdigit(c)) v = tolower(c) - 'a' + 10;
*digits = (*digits << 4) + v;
}
else {
*digits = (*digits * 10) + (*ptr - '0');
*digits = (*digits * 10) + (c - '0');
}
}
else (*exponent)++;
@ -80,7 +81,7 @@ static void parse_digits(char const * restrict *ptr0, bool *valid,
if(dot_found) (*exponent)--;
/* But also round at the first discarded one */
if(digits_found == max_digits && *ptr >= '5')
if(digits_found == max_digits && c >= '5')
(*digits)++;
digits_found++;
@ -88,46 +89,54 @@ static void parse_digits(char const * restrict *ptr0, bool *valid,
/* Require at least one digit to be present; if not, the whole string
is considered invalid */
if(!digits_found) {
*valid = false;
return;
}
if(!digits_found)
return false;
/* In hexadecimal, each character is worth 4 bits of exponent */
if(hexadecimal) (*exponent) *= 4;
/* Parse exponent */
if(tolower(*ptr) == exp_character) {
char *end;
long e = strtol(ptr + 1, &end, 10);
if(tolower(__scanf_peek(input)) == exp_character) {
/* Hack: Restore the str pointer if this fails (which we
cannot determine with a single lookahead) so that *endptr is
set correctly */
struct __scanf_input backup = *input;
/* If an integer cannot be parsed, ignore the 'e...' part */
if(end != ptr + 1) {
ptr = end;
__scanf_in(input);
long e = 0;
if(__strto_int(input, 10, &e, NULL, false) == 0)
*exponent += e;
}
else
*input = backup;
}
*ptr0 = ptr;
*valid = true;
return true;
}
int __strto_fp(char const * restrict ptr, char ** restrict endptr, double *out,
float *outf, long double *outl)
static bool expect(struct __scanf_input *input, char const *sequence)
{
/* Save the value of ptr in endptr, in case format is invalid */
if(endptr) *endptr = (char *)ptr;
for(int i = 0; sequence[i]; i++) {
int c = __scanf_in(input);
if(tolower(c) != tolower(sequence[i]))
return false;
}
return true;
}
int __strto_fp(struct __scanf_input *input, double *out, float *outf,
long double *outl)
{
/* Skip initial whitespace */
while(isspace(*ptr)) ptr++;
while(isspace(__scanf_peek(input))) __scanf_in(input);
/* Read optional sign */
bool negative = false;
if(*ptr == '-') negative = true;
if(*ptr == '-' || *ptr == '+') ptr++;
int sign = __scanf_peek(input);
if(sign == '-') negative = true;
if(sign == '-' || sign == '+') __scanf_in(input);
int errno_value = 0;
bool valid = true;
bool valid = false;
/* Result variable */
if(out) *out = 0.0;
@ -135,47 +144,64 @@ int __strto_fp(char const * restrict ptr, char ** restrict endptr, double *out,
if(outl) *outl = 0.0l;
/* NaN possibly with an argument */
if(!strncasecmp(ptr, "nan", 3)) {
char const *arg = "";
ptr += 3;
if(ptr[0] == '(') {
arg = ptr + 1;
do ptr++;
while(ptr[-1] != ')');
if(tolower(__scanf_peek(input)) == 'n') {
if(!expect(input, "nan"))
return EINVAL;
/* Get the argument for up to 32 bytes */
char arg[32];
int i = 0;
if(__scanf_peek(input) == '(') {
while(i < 31) {
int c = __scanf_in(input);
if(c == ')') break;
arg[i++] = c;
}
arg[i] = 0;
}
if(out) *out = __builtin_nan(arg);
if(outf) *outf = __builtin_nanf(arg);
if(outl) *outl = __builtin_nanl(arg);
valid = true;
}
/* Infinity */
else if(!strncasecmp(ptr, "infinity", 8)) {
else if(tolower(__scanf_peek(input)) == 'i') {
if(!expect(input, "inf"))
return EINVAL;
if(tolower(__scanf_peek(input)) == 'i' &&
!expect(input, "inity"))
return EINVAL;
if(out) *out = __builtin_inf();
if(outf) *outf = __builtin_inff();
if(outl) *outl = __builtin_infl();
ptr += 8;
}
else if(!strncasecmp(ptr, "inf", 3)) {
if(out) *out = __builtin_inf();
if(outf) *outf = __builtin_inff();
if(outl) *outl = __builtin_infl();
ptr += 3;
valid = true;
}
else {
SIGNIFICAND_TYPE digits = 0;
long e = 0;
if(ptr[0] == '0' && tolower(ptr[1]) == 'x') {
ptr += 2;
parse_digits(&ptr, &valid, &digits, &e, true);
/* Check for the 0x prefix. Skipping a 0 if we start with 0 but
not 0x isn't a problem. */
bool hexa = false;
if(__scanf_peek(input) == '0') {
__scanf_in(input);
if(tolower(__scanf_peek(input)) == 'x') {
__scanf_in(input);
hexa = true;
}
/* Count the 0 as a digit */
else valid = true;
}
if(hexa) {
valid |= parse_digits(input, &digits, &e, true);
if(out) *out = (double)digits * exp2(e);
if(outf) *outf = (float)digits * exp2f(e);
if(outl) *outl = (long double)digits * exp2l(e);
}
else {
parse_digits(&ptr, &valid, &digits, &e, false);
valid |= parse_digits(input, &digits, &e, false);
if(out) *out = (double)digits * pow(10, e);
if(outf) *outf = (float)digits * powf(10, e);
if(outl) *outl = (long double)digits * powl(10, e);
@ -200,8 +226,5 @@ int __strto_fp(char const * restrict ptr, char ** restrict endptr, double *out,
if(outl) *outl = -(*outl);
}
/* Save the result pointer */
if(endptr && valid) *endptr = (char *)ptr;
return errno_value;
return valid ? errno_value : EINVAL;
}

View file

@ -4,19 +4,17 @@
#include <errno.h>
#include <limits.h>
int __strto_int(char const * restrict ptr, char ** restrict endptr, int base,
long *outl, long long *outll, bool use_unsigned)
int __strto_int(struct __scanf_input *input, int base, long *outl,
long long *outll, bool use_unsigned)
{
/* Save the value of ptr in endptr now in case the format is invalid */
if(endptr) *endptr = (char *)ptr;
/* Skip initial whitespace */
while(isspace(*ptr)) ptr++;
while(isspace(__scanf_peek(input))) __scanf_in(input);
/* Accept a sign character */
bool negative = false;
if(*ptr == '-') negative = true;
if(*ptr == '-' || *ptr == '+') ptr++;
int sign = __scanf_peek(input);
if(sign == '-') negative = true;
if(sign == '-' || sign == '+') __scanf_in(input);
/* Use unsigned variables as only these have defined overflow */
unsigned long xl = 0;
@ -26,29 +24,34 @@ int __strto_int(char const * restrict ptr, char ** restrict endptr, int base,
bool valid = false;
/* Read prefixes and determine base */
if((base == 0 || base == 16) && ptr[0]=='0' && tolower(ptr[1])=='x') {
ptr += 2;
if(__scanf_peek(input) == '0') {
__scanf_in(input);
if((base == 0 || base == 16) &&
tolower(__scanf_peek(input)) == 'x') {
__scanf_in(input);
base = 16;
}
else if(base == 0 && ptr[0] == '0') {
ptr++;
/* If we don't consume the x then count the 0 as a digit */
else valid = true;
if(base == 0)
base = 8;
}
else if(base == 0) {
if(base == 0)
base = 10;
}
/* Read digits */
while(1) {
int v = -1;
if(isdigit(*ptr)) v = *ptr - '0';
if(islower(*ptr)) v = *ptr - 'a' + 10;
int c = __scanf_peek(input);
if(isdigit(c)) v = c - '0';
if(islower(c)) v = c - 'a' + 10;
if(v == -1 || v >= base) break;
/* The value is valid as long as there is at least one digit */
valid = true;
/* (x = base*x + v) but with overflow checks */
/* TODO: strto_int: We might fail to represent [L]LONG_MIN */
if(outl) {
if(__builtin_umull_overflow(xl, base, &xl))
errno_value = ERANGE;
@ -62,7 +65,7 @@ int __strto_int(char const * restrict ptr, char ** restrict endptr, int base,
errno_value = ERANGE;
}
ptr++;
__scanf_in(input);
}
/* Handle sign and range */
@ -101,6 +104,6 @@ int __strto_int(char const * restrict ptr, char ** restrict endptr, int base,
if(outl) *outl = xl;
if(outll) *outll = xll;
if(endptr && valid) *endptr = (char *)ptr;
return errno_value;
return valid ? errno_value : EINVAL;
}

View file

@ -4,7 +4,17 @@
double strtod(char const * restrict ptr, char ** restrict endptr)
{
double d = 0;
int err = __strto_fp(ptr, endptr, &d, NULL, NULL);
if(err != 0) errno = err;
if(endptr)
*endptr = (char *)ptr;
struct __scanf_input in = { .str = ptr, .fp = NULL };
__scanf_start(&in);
int err = __strto_fp(&in, &d, NULL, NULL);
__scanf_end(&in);
if(err != 0)
errno = err;
if(err != EINVAL && endptr)
*endptr = (char *)in.str;
return d;
}

View file

@ -4,7 +4,17 @@
float strtof(char const * restrict ptr, char ** restrict endptr)
{
float f = 0;
int err = __strto_fp(ptr, endptr, NULL, &f, NULL);
if(err != 0) errno = err;
if(endptr)
*endptr = (char *)ptr;
struct __scanf_input in = { .str = ptr, .fp = NULL };
__scanf_start(&in);
int err = __strto_fp(&in, NULL, &f, NULL);
__scanf_end(&in);
if(err != 0)
errno = err;
if(err != EINVAL && endptr)
*endptr = (char *)in.str;
return f;
}

View file

@ -4,7 +4,17 @@
long int strtol(char const * restrict ptr, char ** restrict endptr, int base)
{
long n = 0;
int err = __strto_int(ptr, endptr, base, &n, NULL, false);
if(err != 0) errno = err;
if(endptr)
*endptr = (char *)ptr;
struct __scanf_input in = { .str = ptr, .fp = NULL };
__scanf_start(&in);
int err = __strto_int(&in, base, &n, NULL, false);
__scanf_end(&in);
if(err != 0)
errno = err;
if(err != EINVAL && endptr)
*endptr = (char *)in.str;
return n;
}

View file

@ -4,7 +4,17 @@
long double strtold(char const * restrict ptr, char ** restrict endptr)
{
long double ld = 0;
int err = __strto_fp(ptr, endptr, NULL, NULL, &ld);
if(err != 0) errno = err;
if(endptr)
*endptr = (char *)ptr;
struct __scanf_input in = { .str = ptr, .fp = NULL };
__scanf_start(&in);
int err = __strto_fp(&in, NULL, NULL, &ld);
__scanf_end(&in);
if(err != 0)
errno = err;
if(err != EINVAL && endptr)
*endptr = (char *)in.str;
return ld;
}

View file

@ -5,7 +5,17 @@ long long int strtoll(char const * restrict ptr, char ** restrict endptr,
int base)
{
long long n = 0;
int err = __strto_int(ptr, endptr, base, NULL, &n, false);
if(err != 0) errno = err;
if(endptr)
*endptr = (char *)ptr;
struct __scanf_input in = { .str = ptr, .fp = NULL };
__scanf_start(&in);
int err = __strto_int(&in, base, NULL, &n, false);
__scanf_end(&in);
if(err != 0)
errno = err;
if(err != EINVAL && endptr)
*endptr = (char *)in.str;
return n;
}

View file

@ -5,7 +5,17 @@ unsigned long int strtoul(char const * restrict ptr, char ** restrict endptr,
int base)
{
unsigned long n = 0;
int err = __strto_int(ptr, endptr, base, (long *)&n, NULL, true);
if(err != 0) errno = err;
if(endptr)
*endptr = (char *)ptr;
struct __scanf_input in = { .str = ptr, .fp = NULL };
__scanf_start(&in);
int err = __strto_int(&in, base, (long *)&n, NULL, true);
__scanf_end(&in);
if(err != 0)
errno = err;
if(err != EINVAL && endptr)
*endptr = (char *)in.str;
return n;
}

View file

@ -5,7 +5,17 @@ unsigned long long int strtoull(char const * restrict ptr,
char ** restrict endptr, int base)
{
unsigned long long n = 0;
int err = __strto_int(ptr, endptr, base, NULL, (long long *)&n, true);
if(err != 0) errno = err;
if(endptr)
*endptr = (char *)ptr;
struct __scanf_input in = { .str = ptr, .fp = NULL };
__scanf_start(&in);
int err = __strto_int(&in, base, NULL, (long long *)&n, true);
__scanf_end(&in);
if(err != 0)
errno = err;
if(err != EINVAL && endptr)
*endptr = (char *)in.str;
return n;
}