I wanted to implement sscanf() for ksft_min_kernel_version() and this is a prerequisite for it.
It's also useful on its own so it gets its own submission.
Signed-off-by: Thomas Weißschuh linux@weissschuh.net --- Thomas Weißschuh (2): tools/nolibc: add limits for {u,}intmax_t, ulong and {u,}llong tools/nolibc: implement strtol() and friends
tools/include/nolibc/stdint.h | 19 +++++ tools/include/nolibc/stdlib.h | 109 +++++++++++++++++++++++++++ tools/testing/selftests/nolibc/nolibc-test.c | 59 +++++++++++++++ 3 files changed, 187 insertions(+) --- base-commit: f1652790cd374bcf98efc913ec69ed18d20e7747 change-id: 20240415-nolibc-strtol-af77a1f2c766
Best regards,
They are useful for users and necessary for strtol() and friends.
Signed-off-by: Thomas Weißschuh linux@weissschuh.net --- tools/include/nolibc/stdint.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+)
diff --git a/tools/include/nolibc/stdint.h b/tools/include/nolibc/stdint.h index 6665e272e213..cd79ddd6170e 100644 --- a/tools/include/nolibc/stdint.h +++ b/tools/include/nolibc/stdint.h @@ -96,6 +96,10 @@ typedef uint64_t uintmax_t; #define UINT_FAST32_MAX SIZE_MAX #define UINT_FAST64_MAX UINT64_MAX
+#define INTMAX_MIN INT64_MIN +#define INTMAX_MAX INT64_MAX +#define UINTMAX_MAX UINT64_MAX + #ifndef INT_MIN #define INT_MIN (-__INT_MAX__ - 1) #endif @@ -110,4 +114,19 @@ typedef uint64_t uintmax_t; #define LONG_MAX __LONG_MAX__ #endif
+#ifndef ULONG_MAX +#define ULONG_MAX ((unsigned long)(__LONG_MAX__) * 2 + 1) +#endif + +#ifndef LLONG_MIN +#define LLONG_MIN (-__LONG_LONG_MAX__ - 1) +#endif +#ifndef LLONG_MAX +#define LLONG_MAX __LONG_LONG_MAX__ +#endif + +#ifndef ULLONG_MAX +#define ULLONG_MAX ((unsigned long long)(__LONG_LONG_MAX__) * 2 + 1) +#endif + #endif /* _NOLIBC_STDINT_H */
The implementation always works on uintmax_t values.
This is inefficient when only 32bit are needed. However for all functions this only happens for strtol() on 32bit platforms.
Signed-off-by: Thomas Weißschuh linux@weissschuh.net --- tools/include/nolibc/stdlib.h | 109 +++++++++++++++++++++++++++ tools/testing/selftests/nolibc/nolibc-test.c | 59 +++++++++++++++ 2 files changed, 168 insertions(+)
diff --git a/tools/include/nolibc/stdlib.h b/tools/include/nolibc/stdlib.h index 5be9d3c7435a..f66870e25389 100644 --- a/tools/include/nolibc/stdlib.h +++ b/tools/include/nolibc/stdlib.h @@ -438,6 +438,115 @@ char *u64toa(uint64_t in) return itoa_buffer; }
+static __attribute__((unused)) +uintmax_t __strtox(const char *nptr, char **endptr, int base, intmax_t lower_limit, uintmax_t upper_limit) +{ + const char signed_ = lower_limit != 0; + unsigned char neg = 0, overflow = 0; + uintmax_t val = 0, limit, old_val; + char c; + + if (base < 0 || base > 35) { + SET_ERRNO(EINVAL); + goto out; + } + + while (isspace(*nptr)) + nptr++; + + if (*nptr == '+') { + nptr++; + } else if (*nptr == '-') { + neg = 1; + nptr++; + } + + if (signed_ && neg) + limit = -(uintmax_t)lower_limit; + else + limit = upper_limit; + + if ((base == 0 || base == 16) && + (strncmp(nptr, "0x", 2) == 0 || strncmp(nptr, "0X", 2) == 0)) { + base = 16; + nptr += 2; + } else if (base == 0 && strncmp(nptr, "0", 1) == 0) { + base = 8; + nptr += 1; + } else if (base == 0) { + base = 10; + } + + while (*nptr) { + c = *nptr; + + if (c >= '0' && c <= '9') + c -= '0'; + else if (c >= 'a' && c <= 'z') + c = c - 'a' + 10; + else if (c >= 'A' && c <= 'Z') + c = c - 'A' + 10; + else + goto out; + + if (c > base) + goto out; + + nptr++; + old_val = val; + val *= base; + val += c; + + if (val > limit || val < old_val) + overflow = 1; + } + +out: + if (overflow) { + SET_ERRNO(ERANGE); + val = limit; + } + if (endptr) + *endptr = (char *)nptr; + return (neg ? -1 : 1) * val; +} + +static __attribute__((unused)) +long strtol(const char *nptr, char **endptr, int base) +{ + return __strtox(nptr, endptr, base, LONG_MIN, LONG_MAX); +} + +static __attribute__((unused)) +unsigned long strtoul(const char *nptr, char **endptr, int base) +{ + return __strtox(nptr, endptr, base, 0, ULONG_MAX); +} + +static __attribute__((unused)) +long long strtoll(const char *nptr, char **endptr, int base) +{ + return __strtox(nptr, endptr, base, LLONG_MIN, LLONG_MAX); +} + +static __attribute__((unused)) +unsigned long long strtoull(const char *nptr, char **endptr, int base) +{ + return __strtox(nptr, endptr, base, 0, ULLONG_MAX); +} + +static __attribute__((unused)) +intmax_t strtoimax(const char *nptr, char **endptr, int base) +{ + return __strtox(nptr, endptr, base, INTMAX_MIN, INTMAX_MAX); +} + +static __attribute__((unused)) +uintmax_t strtoumax(const char *nptr, char **endptr, int base) +{ + return __strtox(nptr, endptr, base, 0, UINTMAX_MAX); +} + /* make sure to include all global symbols */ #include "nolibc.h"
diff --git a/tools/testing/selftests/nolibc/nolibc-test.c b/tools/testing/selftests/nolibc/nolibc-test.c index b9c84d42edbe..6161bd57a0c9 100644 --- a/tools/testing/selftests/nolibc/nolibc-test.c +++ b/tools/testing/selftests/nolibc/nolibc-test.c @@ -621,6 +621,51 @@ int expect_str_buf_eq(size_t expr, const char *buf, size_t val, int llen, const return 0; }
+#define EXPECT_STRTOX(cond, func, input, base, expected, chars, expected_errno) \ + do { if (!(cond)) result(llen, SKIPPED); else ret += expect_strtox(llen, func, input, base, expected, chars, expected_errno); } while (0) + +static __attribute__((unused)) +int expect_strtox(int llen, void *func, const char *input, int base, intmax_t expected, int expected_chars, int expected_errno) +{ + char *endptr; + int actual_errno, actual_chars; + intmax_t r; + + errno = 0; + if (func == strtol) { + r = strtol(input, &endptr, base); + } else if (func == strtoul) { + r = strtoul(input, &endptr, base); + } else { + result(llen, FAIL); + return 1; + } + actual_errno = errno; + actual_chars = endptr - input; + + llen += printf(" %lld = %lld", (long long)expected, (long long)r); + if (r != expected) { + result(llen, FAIL); + return 1; + } + if (expected_chars == -1) { + if (*endptr != '\0') { + result(llen, FAIL); + return 1; + } + } else if (expected_chars != actual_chars) { + result(llen, FAIL); + return 1; + } + if (actual_errno != expected_errno) { + result(llen, FAIL); + return 1; + } + + result(llen, OK); + return 0; +} + /* declare tests based on line numbers. There must be exactly one test per line. */ #define CASE_TEST(name) \ case __LINE__: llen += printf("%d %s", test, #name); @@ -1143,6 +1188,20 @@ int run_stdlib(int min, int max) CASE_TEST(limit_ptrdiff_min); EXPECT_EQ(1, PTRDIFF_MIN, sizeof(long) == 8 ? (ptrdiff_t) 0x8000000000000000LL : (ptrdiff_t) 0x80000000); break; CASE_TEST(limit_ptrdiff_max); EXPECT_EQ(1, PTRDIFF_MAX, sizeof(long) == 8 ? (ptrdiff_t) 0x7fffffffffffffffLL : (ptrdiff_t) 0x7fffffff); break; CASE_TEST(limit_size_max); EXPECT_EQ(1, SIZE_MAX, sizeof(long) == 8 ? (size_t) 0xffffffffffffffffULL : (size_t) 0xffffffffU); break; + CASE_TEST(strtol_simple); EXPECT_STRTOX(1, strtol, "35", 10, 35, -1, 0); break; + CASE_TEST(strtol_positive); EXPECT_STRTOX(1, strtol, "+35", 10, 35, -1, 0); break; + CASE_TEST(strtol_negative); EXPECT_STRTOX(1, strtol, "-35", 10, -35, -1, 0); break; + CASE_TEST(strtol_hex_auto); EXPECT_STRTOX(1, strtol, "0xFF", 0, 255, -1, 0); break; + CASE_TEST(strtol_octal_auto); EXPECT_STRTOX(1, strtol, "011", 0, 9, -1, 0); break; + CASE_TEST(strtol_hex_00); EXPECT_STRTOX(1, strtol, "0x00", 16, 0, -1, 0); break; + CASE_TEST(strtol_hex_FF); EXPECT_STRTOX(1, strtol, "FF", 16, 255, -1, 0); break; + CASE_TEST(strtol_hex_ff); EXPECT_STRTOX(1, strtol, "ff", 16, 255, -1, 0); break; + CASE_TEST(strtol_hex_prefix); EXPECT_STRTOX(1, strtol, "0xFF", 16, 255, -1, 0); break; + CASE_TEST(strtol_trailer); EXPECT_STRTOX(1, strtol, "35foo", 10, 35, 2, 0); break; + CASE_TEST(strtol_overflow); EXPECT_STRTOX(1, strtol, "0x8000000000000000", 16, LONG_MAX, -1, ERANGE); break; + CASE_TEST(strtol_underflow); EXPECT_STRTOX(1, strtol, "-0x8000000000000001", 16, LONG_MIN, -1, ERANGE); break; + CASE_TEST(strtoul_negative); EXPECT_STRTOX(1, strtoul, "-0x1", 16, ULONG_MAX, 4, 0); break; + CASE_TEST(strtoul_overflow); EXPECT_STRTOX(1, strtoul, "0x10000000000000000", 16, ULONG_MAX, -1, ERANGE); break;
case __LINE__: return ret; /* must be last */
Hi Thomas,
On Thu, Apr 25, 2024 at 06:09:27PM +0200, Thomas Weißschuh wrote:
The implementation always works on uintmax_t values.
This is inefficient when only 32bit are needed. However for all functions this only happens for strtol() on 32bit platforms.
That's indeed very useful! I think there's two small bugs below where the second one hides the first one:
+static __attribute__((unused)) +uintmax_t __strtox(const char *nptr, char **endptr, int base, intmax_t lower_limit, uintmax_t upper_limit) +{
- const char signed_ = lower_limit != 0;
- unsigned char neg = 0, overflow = 0;
- uintmax_t val = 0, limit, old_val;
- char c;
- if (base < 0 || base > 35) {
^^^^^^^^^ should be 36 otherwise you won't support [0-9a-z].
SET_ERRNO(EINVAL);
goto out;
- }
(...)
if (c > base)
goto out;
This should be "c >= base" otherwise 'z' is accepted in base 35 for example. I think it could be useful to add one more test covering base 36 to make sure all chars pass ?
- if (endptr)
*endptr = (char *)nptr;
- return (neg ? -1 : 1) * val;
I just checked to see what the compiler does on this and quite frequently it emits a multiply while the other approach involving only a negation is always at least as short:
return neg ? -val : val;
E.g. here's the test code:
long fct1(long neg, long val) { return (neg ? -1 : 1) * val; }
long fct2(long neg, long val) { return neg ? -val : val; }
- on x86_64 with gcc-13.2 -Os:
0000000000000000 <fct1>: 0: f7 df neg %edi 2: 48 19 c0 sbb %rax,%rax 5: 48 83 c8 01 or $0x1,%rax 9: 48 0f af c6 imul %rsi,%rax d: c3 ret
000000000000000e <fct2>: e: 48 89 f0 mov %rsi,%rax 11: 85 ff test %edi,%edi 13: 74 03 je 18 <fct2+0xa> 15: 48 f7 d8 neg %rax 18: c3 ret
- on riscv64 with 13.2 -Os:
0000000000000000 <fct1>: 0: c509 beqz a0,a 2: 557d li a0,-1 4: 02b50533 mul a0,a0,a1 8: 8082 ret a: 4505 li a0,1 c: bfe5 j 4
000000000000000e <fct2>: e: c119 beqz a0,14 10: 40b005b3 neg a1,a1 14: 852e mv a0,a1 16: 8082 ret
So IMHO it would be better to go the simpler way even if these are just a few bytes (and possibly ones less mul on some slow archs).
Thanks! Willy
Hi again Thomas,
On Thu, Apr 25, 2024 at 06:09:25PM +0200, Thomas Weißschuh wrote:
I wanted to implement sscanf() for ksft_min_kernel_version() and this is a prerequisite for it.
It's also useful on its own so it gets its own submission.
Signed-off-by: Thomas Weißschuh linux@weissschuh.net
Nice work, thank you. For the whole series, modulo my small comments on 2/2:
Acked-by: Willy Tarreau w@1wt.eu
Cheers, willy
On 2024-04-25 18:30:39+0000, Willy Tarreau wrote:
Hi again Thomas,
On Thu, Apr 25, 2024 at 06:09:25PM +0200, Thomas Weißschuh wrote:
I wanted to implement sscanf() for ksft_min_kernel_version() and this is a prerequisite for it.
It's also useful on its own so it gets its own submission.
Signed-off-by: Thomas Weißschuh linux@weissschuh.net
Nice work, thank you. For the whole series, modulo my small comments on 2/2:
Thanks for those great catches!
I addressed them and pushed the result to nolibc/next.
Acked-by: Willy Tarreau w@1wt.eu
Thomas
linux-kselftest-mirror@lists.linaro.org