Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

gh-112301: Use literal format strings in unicode_fromformat_arg #124203

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
79 changes: 36 additions & 43 deletions Objects/unicodeobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -2694,11 +2694,6 @@ unicode_fromformat_write_wcstr(_PyUnicodeWriter *writer, const wchar_t *str,
#define F_SIZE 3
#define F_PTRDIFF 4
#define F_INTMAX 5
static const char * const formats[] = {"%d", "%ld", "%lld", "%zd", "%td", "%jd"};
static const char * const formats_o[] = {"%o", "%lo", "%llo", "%zo", "%to", "%jo"};
static const char * const formats_u[] = {"%u", "%lu", "%llu", "%zu", "%tu", "%ju"};
static const char * const formats_x[] = {"%x", "%lx", "%llx", "%zx", "%tx", "%jx"};
static const char * const formats_X[] = {"%X", "%lX", "%llX", "%zX", "%tX", "%jX"};

static const char*
unicode_fromformat_arg(_PyUnicodeWriter *writer,
Expand Down Expand Up @@ -2840,47 +2835,45 @@ unicode_fromformat_arg(_PyUnicodeWriter *writer,
case 'd': case 'i':
case 'o': case 'u': case 'x': case 'X':
{
/* used by sprintf */
char buffer[MAX_INTMAX_CHARS];
const char *fmt = NULL;
switch (*f) {
case 'o': fmt = formats_o[sizemod]; break;
case 'u': fmt = formats_u[sizemod]; break;
case 'x': fmt = formats_x[sizemod]; break;
case 'X': fmt = formats_X[sizemod]; break;
default: fmt = formats[sizemod]; break;
}
int issigned = (*f == 'd' || *f == 'i');

// Fill buffer using sprinf, with one of many possible format
// strings, like "%llX" for `long long` in hexadecimal.
// The type/size is in `sizemod`; the format is in `*f`.

// Use macros with nested switches to keep the sprintf format strings
// as compile-time literals, avoiding warnings and maybe allowing
// optimizations.

// `SPRINT` macro does one sprintf
// Example usage: SPRINT("l", "X", unsigned long) expands to
// sprintf(buffer, "%" "l" "X", va_arg(*vargs, unsigned long))
#define SPRINT(SIZE_SPEC, FMT_CHAR, TYPE) \
sprintf(buffer, "%" SIZE_SPEC FMT_CHAR, va_arg(*vargs, TYPE))

// One inner switch to handle all format variants
#define DO_SPRINTS(SIZE_SPEC, SIGNED_TYPE, UNSIGNED_TYPE) \
switch (*f) { \
case 'o': len = SPRINT(SIZE_SPEC, "o", UNSIGNED_TYPE); break; \
case 'u': len = SPRINT(SIZE_SPEC, "u", UNSIGNED_TYPE); break; \
case 'x': len = SPRINT(SIZE_SPEC, "x", UNSIGNED_TYPE); break; \
case 'X': len = SPRINT(SIZE_SPEC, "X", UNSIGNED_TYPE); break; \
default: len = SPRINT(SIZE_SPEC, "d", SIGNED_TYPE); break; \
} \
break;

// Outer switch to handle all the sizes/types
switch (sizemod) {
case F_LONG:
len = issigned ?
sprintf(buffer, fmt, va_arg(*vargs, long)) :
sprintf(buffer, fmt, va_arg(*vargs, unsigned long));
break;
case F_LONGLONG:
len = issigned ?
sprintf(buffer, fmt, va_arg(*vargs, long long)) :
sprintf(buffer, fmt, va_arg(*vargs, unsigned long long));
break;
case F_SIZE:
len = issigned ?
sprintf(buffer, fmt, va_arg(*vargs, Py_ssize_t)) :
sprintf(buffer, fmt, va_arg(*vargs, size_t));
break;
case F_PTRDIFF:
len = sprintf(buffer, fmt, va_arg(*vargs, ptrdiff_t));
break;
case F_INTMAX:
len = issigned ?
sprintf(buffer, fmt, va_arg(*vargs, intmax_t)) :
sprintf(buffer, fmt, va_arg(*vargs, uintmax_t));
break;
default:
len = issigned ?
sprintf(buffer, fmt, va_arg(*vargs, int)) :
sprintf(buffer, fmt, va_arg(*vargs, unsigned int));
break;
case F_LONG: DO_SPRINTS("l", long, unsigned long)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you put the break here? Something like:

Suggested change
case F_LONG: DO_SPRINTS("l", long, unsigned long)
case F_LONG: DO_SPRINTS("l", long, unsigned long); break;

case F_LONGLONG: DO_SPRINTS("ll", long long, unsigned long long)
case F_SIZE: DO_SPRINTS("z", Py_ssize_t, size_t)
case F_PTRDIFF: DO_SPRINTS("t", ptrdiff_t, ptrdiff_t)
case F_INTMAX: DO_SPRINTS("j", intmax_t, uintmax_t)
default: DO_SPRINTS("", int, unsigned int)
}
#undef SPRINT
#undef DO_SPRINTS

assert(len >= 0);

int sign = (buffer[0] == '-');
Expand Down
Loading