stdlib: reword strtoull into a generic strto_int

This new function will support strtol, strtoul, and strtoll as well.
This commit is contained in:
Lephenixnoir 2021-05-20 09:23:06 +02:00
parent b1dc3e77de
commit 5042236392
No known key found for this signature in database
GPG key ID: 1BBA026E13FC0495
4 changed files with 133 additions and 56 deletions

View file

@ -106,6 +106,7 @@ set(SOURCES
src/libc/stdlib/llabs.c src/libc/stdlib/llabs.c
src/libc/stdlib/lldiv.c src/libc/stdlib/lldiv.c
src/libc/stdlib/reallocarray.c src/libc/stdlib/reallocarray.c
src/libc/stdlib/strto_int.c
src/libc/stdlib/strtoull.c src/libc/stdlib/strtoull.c
# string # string
src/libc/string/strchr.c src/libc/string/strchr.c

View file

@ -0,0 +1,31 @@
#ifndef __STDLIB_P_H__
# define __STDLIB_P_H__
#include <stddef.h>
#include <stdbool.h>
/* Parse an integer from a string. This is the base function for strtol,
strtoul, strtoll, and strtoull.
This function does not set errno, and instead returns the error code
according to conversion rules. Setting errno is troublesome because it's a
global state that cannot be reverted and thus cannot be tested.
If outl is non-NULL, strto_int produces a long or an unsigned long result
(depending on use_unsigned). Signedness only affects the range of values
that are considered to be ERANGE, and both results are stored in *outl.
Similarly, if outll is non-NULL, strto_int produces a long long or unsigned
long long result.
On platforms where long is 32-bit, 64-bit operations are performed only if
outll is non-NULL. This is because multiplications with overflow can be
expensive. */
int strto_int(
char const * restrict __ptr,
char ** restrict __endptr,
int __base,
long *__outl,
long long *__outll,
bool __use_unsigned);
#endif /*__STDLIB_P_H__*/

View file

@ -0,0 +1,96 @@
#include "stdlib_p.h"
#include <stdlib.h>
#include <ctype.h>
#include <errno.h>
/* Parse an integer from a string. Base function for strtol, strtoul, strtoll,
and strtoull. This function:
-> Does not set errno, and instead return the potential error code. Setting
errno would prevent these functions from calling each other as they all
have different ranges, resulting in undue ERANGE.
-> Can parse into both long and long long, depending on what pointer is
non-NULL. */
int strto_int(char const * restrict ptr, char ** restrict endptr, int base,
long *outl, long long *outll, bool use_unsigned)
{
/* Save the value of ptr in endptr now in case the format is invalid */
if(endptr) *endptr = (char *)ptr;
/* Skip initial whitespace */
while(isspace(*ptr)) ptr++;
/* Accept a sign character */
bool negative = false;
if(*ptr == '-') negative = true;
if(*ptr == '-' || *ptr == '+') ptr++;
/* Use unsigned variables as only these have defined overflow */
unsigned long xl = 0;
unsigned long long xll = 0;
int errno_value = 0;
bool valid = false;
/* Read prefixes and determine base */
if((base == 0 || base == 16) && ptr[0]=='0' && tolower(ptr[1])=='x') {
ptr += 2;
base = 16;
}
else if(base == 0 && ptr[0] == '0') {
ptr++;
base = 8;
}
else if(base == 0) {
base = 10;
}
/* Read digits */
while(1) {
int v = -1;
if(isdigit(*ptr)) v = *ptr - '0';
if(islower(*ptr)) v = *ptr - 'a' + 10;
if(v == -1 || v >= base) break;
/* The value is valid as long as there is at least one digit */
valid = true;
/* (x = base*x + v) but with overflow checks */
/*
** TODO FIXME: There is a bug with overflows if the unsigned
** value cannot be represented but the signed value can (which
** is the case only for LONG_MIN and LLONG_MIN)
*/
if(outl) {
if(__builtin_umull_overflow(xl, base, &xl))
errno_value = ERANGE;
if(__builtin_uaddl_overflow(xl, v, &xl))
errno_value = ERANGE;
}
if(outll) {
if(__builtin_umulll_overflow(xll, base, &xll))
errno_value = ERANGE;
if(__builtin_uaddll_overflow(xll, v, &xll))
errno_value = ERANGE;
}
ptr++;
}
/* Handle the sign */
if(negative) {
/* Only -0 can be represented as unsigned */
if(outl && use_unsigned && xl != 0)
errno_value = ERANGE;
if(outll && use_unsigned && xll != 0)
errno_value = ERANGE;
xl = -xl;
xll = -xll;
}
if(outl) *outl = xl;
if(outll) *outll = xll;
if(endptr && valid) *endptr = (char *)ptr;
return errno_value;
}

View file

@ -1,62 +1,11 @@
#include <stdlib.h> #include "stdlib_p.h"
#include <stdbool.h>
#include <ctype.h>
#include <errno.h> #include <errno.h>
unsigned long long int strtoull(char const * restrict ptr, unsigned long long int strtoull(char const * restrict ptr,
char ** restrict endptr, int base) char ** restrict endptr, int base)
{ {
/* Save the value of ptr in endptr now in case the format is invalid */ unsigned long long n = 0;
if(endptr) *endptr = (char *)ptr; int err = strto_int(ptr, endptr, base, NULL, (long long *)&n, true);
if(err != 0) errno = err;
/* Skip initial whitespace */ return n;
while(isspace(*ptr)) ptr++;
/* Accept a sign character */
bool negative = false;
if(*ptr == '-') negative = true;
if(*ptr == '-' || *ptr == '+') ptr++;
unsigned long long x = 0;
bool valid = false;
/* Read prefixes and determine base */
if((base == 0 || base == 16) && ptr[0]=='0' && tolower(ptr[1])=='x') {
ptr += 2;
base = 16;
}
else if(base == 0 && ptr[0] == '0') {
ptr++;
base = 8;
}
else if(base == 0) {
base = 10;
}
/* Read digits */
while(1) {
int v = -1;
if(isdigit(*ptr)) v = *ptr - '0';
if(islower(*ptr)) v = *ptr - 'a' + 10;
if(v == -1 || v >= base) break;
/* The value is valid as long as there is at least one digit */
valid = true;
/* (x = base*x + v) but with overflow checks */
if(__builtin_umulll_overflow(x, base, &x))
errno = ERANGE;
if(__builtin_uaddll_overflow(x, v, &x))
errno = ERANGE;
ptr++;
}
if(negative) {
if(x != 0) errno = ERANGE;
x = -x;
}
if(endptr && valid) *endptr = (char *)ptr;
return x;
} }