*** /home/trentm/main/contrib/python/dist/src/Objects/unicodeobject.c Thu Jun 1 00:13:40 2000 --- /home/trentm/main/Apps/Perlium/Python/dist/src/Objects/unicodeobject.c Fri Jun 2 10:02:12 2000 *************** *** 4218,4228 **** --- 4218,4231 ---- static int formatfloat(Py_UNICODE *buf, + size_t buflen, int flags, int prec, int type, PyObject *v) { + /* fmt = '%#.' + `prec` + `type` + worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/ char fmt[20]; double x; *************** *** 4231,4251 **** return -1; if (prec < 0) prec = 6; - if (prec > 50) - prec = 50; /* Arbitrary limitation */ if (type == 'f' && (fabs(x) / 1e25) >= 1e25) type = 'g'; sprintf(fmt, "%%%s.%d%c", (flags & F_ALT) ? "#" : "", prec, type); return usprintf(buf, fmt, x); } static int formatint(Py_UNICODE *buf, int flags, int prec, int type, PyObject *v) { char fmt[20]; long x; --- 4234,4267 ---- return -1; if (prec < 0) prec = 6; if (type == 'f' && (fabs(x) / 1e25) >= 1e25) type = 'g'; sprintf(fmt, "%%%s.%d%c", (flags & F_ALT) ? "#" : "", prec, type); + /* worst case length calc to ensure no buffer overrun: + fmt = %#.g + buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp + for any double rep.) + len = 1 + prec + 1 + 2 + 5 = 9 + prec + If prec=0 the effective precision is 1 (the leading digit is + always given), therefore increase by one to 10+prec. */ + if (buflen <= (size_t)10 + (size_t)prec) { + PyErr_SetString(PyExc_OverflowError, + "formatted float is too long (precision too long?)"); + return -1; + } return usprintf(buf, fmt, x); } static int formatint(Py_UNICODE *buf, + size_t buflen, int flags, int prec, int type, PyObject *v) { + /* fmt = '%#.' + `prec` + 'l' + `type` + worst case length = 3 + 10 (len of INT_MAX) + 1 + 1 = 15 (use 20)*/ char fmt[20]; long x; *************** *** 4255,4267 **** --- 4271,4292 ---- if (prec < 0) prec = 1; sprintf(fmt, "%%%s.%dl%c", (flags & F_ALT) ? "#" : "", prec, type); + /* buf = '+'/'-'/'0'/'0x' + '[0-9]'*max(prec,len(x in octal)) + worst case buf = '0x' + [0-9]*prec, where prec >= 11 */ + if (buflen <= 13 || buflen <= (size_t)2+(size_t)prec) { + PyErr_SetString(PyExc_OverflowError, + "formatted integer is too long (precision too long?)"); + return -1; + } return usprintf(buf, fmt, x); } static int formatchar(Py_UNICODE *buf, + size_t buflen, PyObject *v) { + /* presume that the buffer is at least 2 characters long */ if (PyUnicode_Check(v)) buf[0] = PyUnicode_AS_UNICODE(v)[0]; *************** *** 4280,4285 **** --- 4305,4320 ---- return 1; } + /* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...) + + FORMATBUFLEN is the length of the buffer in which the floats, ints, & + chars are formatted. XXX This is a magic number. Each formatting + routine does bounds checking to ensure no overflow, but a better + solution may be to malloc a buffer of appropriate size for each + format. For now, the current solution is sufficient. + */ + #define FORMATBUFLEN (size_t)120 + PyObject *PyUnicode_Format(PyObject *format, PyObject *args) { *************** *** 4339,4348 **** Py_UNICODE fill; PyObject *v = NULL; PyObject *temp = NULL; ! Py_UNICODE *buf; Py_UNICODE sign; int len; ! Py_UNICODE tmpbuf[120]; /* For format{float,int,char}() */ fmt++; if (*fmt == '(') { --- 4374,4383 ---- Py_UNICODE fill; PyObject *v = NULL; PyObject *temp = NULL; ! Py_UNICODE *pbuf; Py_UNICODE sign; int len; ! Py_UNICODE formatbuf[FORMATBUFLEN]; /* For format{float,int,char}() */ fmt++; if (*fmt == '(') { *************** *** 4492,4499 **** switch (c) { case '%': ! buf = tmpbuf; ! buf[0] = '%'; len = 1; break; --- 4527,4535 ---- switch (c) { case '%': ! pbuf = formatbuf; ! /* presume that buffer length is at least 1 */ ! pbuf[0] = '%'; len = 1; break; *************** *** 4529,4535 **** if (temp == NULL) goto onError; } ! buf = PyUnicode_AS_UNICODE(temp); len = PyUnicode_GET_SIZE(temp); if (prec >= 0 && len > prec) len = prec; --- 4565,4571 ---- if (temp == NULL) goto onError; } ! pbuf = PyUnicode_AS_UNICODE(temp); len = PyUnicode_GET_SIZE(temp); if (prec >= 0 && len > prec) len = prec; *************** *** 4543,4550 **** case 'X': if (c == 'i') c = 'd'; ! buf = tmpbuf; ! len = formatint(buf, flags, prec, c, v); if (len < 0) goto onError; sign = (c == 'd'); --- 4579,4587 ---- case 'X': if (c == 'i') c = 'd'; ! pbuf = formatbuf; ! len = formatint(pbuf, sizeof(formatbuf)/sizeof(Py_UNICODE), ! flags, prec, c, v); if (len < 0) goto onError; sign = (c == 'd'); *************** *** 4552,4560 **** fill = '0'; if ((flags&F_ALT) && (c == 'x' || c == 'X') && ! buf[0] == '0' && buf[1] == c) { ! *res++ = *buf++; ! *res++ = *buf++; rescnt -= 2; len -= 2; width -= 2; --- 4589,4597 ---- fill = '0'; if ((flags&F_ALT) && (c == 'x' || c == 'X') && ! pbuf[0] == '0' && pbuf[1] == c) { ! *res++ = *pbuf++; ! *res++ = *pbuf++; rescnt -= 2; len -= 2; width -= 2; *************** *** 4569,4576 **** case 'f': case 'g': case 'G': ! buf = tmpbuf; ! len = formatfloat(buf, flags, prec, c, v); if (len < 0) goto onError; sign = 1; --- 4606,4614 ---- case 'f': case 'g': case 'G': ! pbuf = formatbuf; ! len = formatfloat(pbuf, sizeof(formatbuf)/sizeof(Py_UNICODE), ! flags, prec, c, v); if (len < 0) goto onError; sign = 1; *************** *** 4579,4586 **** break; case 'c': ! buf = tmpbuf; ! len = formatchar(buf, v); if (len < 0) goto onError; break; --- 4617,4624 ---- break; case 'c': ! pbuf = formatbuf; ! len = formatchar(pbuf, sizeof(formatbuf)/sizeof(Py_UNICODE), v); if (len < 0) goto onError; break; *************** *** 4592,4599 **** goto onError; } if (sign) { ! if (*buf == '-' || *buf == '+') { ! sign = *buf++; len--; } else if (flags & F_SIGN) --- 4630,4637 ---- goto onError; } if (sign) { ! if (*pbuf == '-' || *pbuf == '+') { ! sign = *pbuf++; len--; } else if (flags & F_SIGN) *************** *** 4629,4635 **** } if (sign && fill == ' ') *res++ = sign; ! memcpy(res, buf, len * sizeof(Py_UNICODE)); res += len; rescnt -= len; while (--width >= len) { --- 4667,4673 ---- } if (sign && fill == ' ') *res++ = sign; ! memcpy(res, pbuf, len * sizeof(Py_UNICODE)); res += len; rescnt -= len; while (--width >= len) { *** /home/trentm/main/contrib/python/dist/src/Objects/stringobject.c Fri Jun 2 08:45:50 2000 --- /home/trentm/main/Apps/Perlium/Python/dist/src/Objects/stringobject.c Fri Jun 2 09:10:18 2000 *************** *** 124,131 **** PyString_FromString(str) const char *str; { ! register unsigned int size = strlen(str); register PyStringObject *op; #ifndef DONT_SHARE_SHORT_STRINGS if (size == 0 && (op = nullstring) != NULL) { #ifdef COUNT_ALLOCS --- 124,136 ---- PyString_FromString(str) const char *str; { ! register size_t size = strlen(str); register PyStringObject *op; + if (size > INT_MAX) { + PyErr_SetString(PyExc_OverflowError, + "string is too long for a Python string"); + return NULL; + } #ifndef DONT_SHARE_SHORT_STRINGS if (size == 0 && (op = nullstring) != NULL) { #ifdef COUNT_ALLOCS *************** *** 237,245 **** string_repr(op) register PyStringObject *op; { ! /* XXX overflow? */ ! int newsize = 2 + 4 * op->ob_size * sizeof(char); ! PyObject *v = PyString_FromStringAndSize((char *)NULL, newsize); if (v == NULL) { return NULL; } --- 242,254 ---- string_repr(op) register PyStringObject *op; { ! size_t newsize = 2 + 4 * op->ob_size * sizeof(char); ! PyObject *v; ! if (newsize > INT_MAX) { ! PyErr_SetString(PyExc_OverflowError, ! "string is too large to make repr"); ! } ! v = PyString_FromStringAndSize((char *)NULL, newsize); if (v == NULL) { return NULL; } *************** *** 2317,2352 **** #define F_ZERO (1<<4) static int ! formatfloat(buf, flags, prec, type, v) char *buf; int flags; int prec; int type; PyObject *v; { char fmt[20]; double x; if (!PyArg_Parse(v, "d;float argument required", &x)) return -1; if (prec < 0) prec = 6; - if (prec > 50) - prec = 50; /* Arbitrary limitation */ if (type == 'f' && fabs(x)/1e25 >= 1e25) type = 'g'; sprintf(fmt, "%%%s.%d%c", (flags&F_ALT) ? "#" : "", prec, type); sprintf(buf, fmt, x); return strlen(buf); } static int ! formatint(buf, flags, prec, type, v) char *buf; int flags; int prec; int type; PyObject *v; { char fmt[20]; long x; if (!PyArg_Parse(v, "l;int argument required", &x)) --- 2326,2377 ---- #define F_ZERO (1<<4) static int ! formatfloat(buf, buflen, flags, prec, type, v) char *buf; + size_t buflen; int flags; int prec; int type; PyObject *v; { + /* fmt = '%#.' + `prec` + `type` + worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/ char fmt[20]; double x; if (!PyArg_Parse(v, "d;float argument required", &x)) return -1; if (prec < 0) prec = 6; if (type == 'f' && fabs(x)/1e25 >= 1e25) type = 'g'; sprintf(fmt, "%%%s.%d%c", (flags&F_ALT) ? "#" : "", prec, type); + /* worst case length calc to ensure no buffer overrun: + fmt = %#.g + buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp + for any double rep.) + len = 1 + prec + 1 + 2 + 5 = 9 + prec + If prec=0 the effective precision is 1 (the leading digit is + always given), therefore increase by one to 10+prec. */ + if (buflen <= (size_t)10 + (size_t)prec) { + PyErr_SetString(PyExc_OverflowError, + "formatted float is too long (precision too long?)"); + return -1; + } sprintf(buf, fmt, x); return strlen(buf); } static int ! formatint(buf, buflen, flags, prec, type, v) char *buf; + size_t buflen; int flags; int prec; int type; PyObject *v; { + /* fmt = '%#.' + `prec` + 'l' + `type` + worst case length = 3 + 10 (len of INT_MAX) + 1 + 1 = 15 (use 20)*/ char fmt[20]; long x; if (!PyArg_Parse(v, "l;int argument required", &x)) *************** *** 2354,2368 **** if (prec < 0) prec = 1; sprintf(fmt, "%%%s.%dl%c", (flags&F_ALT) ? "#" : "", prec, type); sprintf(buf, fmt, x); return strlen(buf); } static int ! formatchar(buf, v) char *buf; PyObject *v; { if (PyString_Check(v)) { if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0])) return -1; --- 2379,2402 ---- if (prec < 0) prec = 1; sprintf(fmt, "%%%s.%dl%c", (flags&F_ALT) ? "#" : "", prec, type); + /* buf = '+'/'-'/'0'/'0x' + '[0-9]'*max(prec,len(x in octal)) + worst case buf = '0x' + [0-9]*prec, where prec >= 11 */ + if (buflen <= 13 || buflen <= (size_t)2+(size_t)prec) { + PyErr_SetString(PyExc_OverflowError, + "formatted integer is too long (precision too long?)"); + return -1; + } sprintf(buf, fmt, x); return strlen(buf); } static int ! formatchar(buf, buflen, v) char *buf; + size_t buflen; PyObject *v; { + /* presume that the buffer is at least 2 characters long */ if (PyString_Check(v)) { if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0])) return -1; *************** *** 2376,2382 **** } ! /* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...) */ PyObject * PyString_Format(format, args) --- 2410,2424 ---- } ! /* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...) ! ! FORMATBUFLEN is the length of the buffer in which the floats, ints, & ! chars are formatted. XXX This is a magic number. Each formatting ! routine does bounds checking to ensure no overflow, but a better ! solution may be to malloc a buffer of appropriate size for each ! format. For now, the current solution is sufficient. ! */ ! #define FORMATBUFLEN (size_t)120 PyObject * PyString_Format(format, args) *************** *** 2433,2442 **** int fill; PyObject *v = NULL; PyObject *temp = NULL; ! char *buf; int sign; int len; ! char tmpbuf[120]; /* For format{float,int,char}() */ char *fmt_start = fmt; fmt++; --- 2475,2484 ---- int fill; PyObject *v = NULL; PyObject *temp = NULL; ! char *pbuf; int sign; int len; ! char formatbuf[FORMATBUFLEN]; /* For format{float,int,char}() */ char *fmt_start = fmt; fmt++; *************** *** 2584,2590 **** fill = ' '; switch (c) { case '%': ! buf = "%"; len = 1; break; case 's': --- 2626,2632 ---- fill = ' '; switch (c) { case '%': ! pbuf = "%"; len = 1; break; case 's': *************** *** 2604,2610 **** "%s argument has non-string str()"); goto error; } ! buf = PyString_AsString(temp); len = PyString_Size(temp); if (prec >= 0 && len > prec) len = prec; --- 2646,2652 ---- "%s argument has non-string str()"); goto error; } ! pbuf = PyString_AsString(temp); len = PyString_Size(temp); if (prec >= 0 && len > prec) len = prec; *************** *** 2617,2624 **** case 'X': if (c == 'i') c = 'd'; ! buf = tmpbuf; ! len = formatint(buf, flags, prec, c, v); if (len < 0) goto error; sign = (c == 'd'); --- 2659,2666 ---- case 'X': if (c == 'i') c = 'd'; ! pbuf = formatbuf; ! len = formatint(pbuf, sizeof(formatbuf), flags, prec, c, v); if (len < 0) goto error; sign = (c == 'd'); *************** *** 2626,2634 **** fill = '0'; if ((flags&F_ALT) && (c == 'x' || c == 'X') && ! buf[0] == '0' && buf[1] == c) { ! *res++ = *buf++; ! *res++ = *buf++; rescnt -= 2; len -= 2; width -= 2; --- 2668,2676 ---- fill = '0'; if ((flags&F_ALT) && (c == 'x' || c == 'X') && ! pbuf[0] == '0' && pbuf[1] == c) { ! *res++ = *pbuf++; ! *res++ = *pbuf++; rescnt -= 2; len -= 2; width -= 2; *************** *** 2642,2649 **** case 'f': case 'g': case 'G': ! buf = tmpbuf; ! len = formatfloat(buf, flags, prec, c, v); if (len < 0) goto error; sign = 1; --- 2684,2691 ---- case 'f': case 'g': case 'G': ! pbuf = formatbuf; ! len = formatfloat(pbuf, sizeof(formatbuf), flags, prec, c, v); if (len < 0) goto error; sign = 1; *************** *** 2651,2658 **** fill = '0'; break; case 'c': ! buf = tmpbuf; ! len = formatchar(buf, v); if (len < 0) goto error; break; --- 2693,2700 ---- fill = '0'; break; case 'c': ! pbuf = formatbuf; ! len = formatchar(pbuf, sizeof(formatbuf), v); if (len < 0) goto error; break; *************** *** 2663,2670 **** goto error; } if (sign) { ! if (*buf == '-' || *buf == '+') { ! sign = *buf++; len--; } else if (flags & F_SIGN) --- 2705,2712 ---- goto error; } if (sign) { ! if (*pbuf == '-' || *pbuf == '+') { ! sign = *pbuf++; len--; } else if (flags & F_SIGN) *************** *** 2700,2706 **** } if (sign && fill == ' ') *res++ = sign; ! memcpy(res, buf, len); res += len; rescnt -= len; while (--width >= len) { --- 2742,2748 ---- } if (sign && fill == ' ') *res++ = sign; ! memcpy(res, pbuf, len); res += len; rescnt -= len; while (--width >= len) { *** /home/trentm/main/contrib/python/dist/src/Lib/test/test_format.py Fri Jun 2 10:06:56 2000 --- /home/trentm/main/Apps/Perlium/Python/dist/src/Lib/test/test_format.py Fri Jun 2 10:04:32 2000 *************** *** 0 **** --- 1,52 ---- + from test_support import verbose + import string, sys + + # test string formatting operator (I am not sure if this is being tested + # elsewhere but, surely, some of the given cases are *not* tested because + # they crash python) + # test on unicode strings as well + + def testformat(formatstr, args, output=None): + if verbose: + if output: + print "%s %% %s =? %s ..." %\ + (repr(formatstr), repr(args), repr(output)), + else: + print "%s %% %s works? ..." % (repr(formatstr), repr(args)), + try: + result = formatstr % args + except OverflowError: + if verbose: + print 'overflow (this is fine)' + else: + if output and result != output: + if verbose: + print 'no' + print "%s %% %s == %s != %s" %\ + (repr(formatstr), repr(args), repr(result), repr(output)) + else: + if verbose: + print 'yes' + + def testboth(formatstr, *args): + testformat(formatstr, *args) + testformat(unicode(formatstr), *args) + + + testboth("%.1d", (1,), "1") + testboth("%.*d", (sys.maxint,1)) # expect overflow + testboth("%.100d", (1,), '0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001') + testboth("%#.117x", (1,), '0x000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001') + testboth("%#.118x", (1,), '0x0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001') + + testboth("%f", (1.0,), "1.000000") + # these are trying to test the limits of the internal magic-number-length + # formatting buffer, if that number changes then these tests are less + # effective + testboth("%#.*g", (109, -1.e+49/3.)) + testboth("%#.*g", (110, -1.e+49/3.)) + testboth("%#.*g", (110, -1.e+100/3.)) + + # test some ridiculously large precision, expect overflow + testboth('%12.*f', (123456, 1.0)) + *** /home/trentm/main/contrib/python/dist/src/Lib/test/output/test_format Fri Jun 2 10:06:56 2000 --- /home/trentm/main/Apps/Perlium/Python/dist/src/Lib/test/output/test_format Wed May 31 23:54:16 2000 *************** *** 0 **** --- 1 ---- + test_format