Index: Doc/library/struct.rst =================================================================== --- Doc/library/struct.rst (revision 87813) +++ Doc/library/struct.rst (working copy) @@ -76,10 +76,17 @@ Format Strings -------------- +.. productionlist:: + format_string: `byte_order_specifier`? `format_string_body` + format_string_body: (`count`? `type_string`)* + type_string: `character_code` | `structure` + count: `decimalinteger` + Format strings are the mechanism used to specify the expected layout when packing and unpacking data. They are built up from :ref:`format-characters`, -which specify the type of data being packed/unpacked. In addition, there are -special characters for controlling the :ref:`struct-alignment`. +which specify the type of data being packed/unpacked. There are special +characters for controlling the :ref:`struct-alignment`. In addition, +the layout of nested :ref:`structure-layout` may be specified. .. _struct-alignment: @@ -87,6 +94,9 @@ Byte Order, Size, and Alignment ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +.. productionlist:: + byte_order_specifier: "!" | "@" | "=" | ">" | "<" | "^" + By default, C types are represented in the machine's native format and byte order, and properly aligned by skipping pad bytes if necessary (according to the rules used by the C compiler). @@ -145,11 +155,34 @@ count of zero. See :ref:`struct-examples`. +.. _structure-layout: + +Structure Layout +^^^^^^^^^^^^^^^^ + +.. productionlist:: + structure: "T" "{" `format_string_body` "}" + +The structure layout format string provides a way to specify layouts which +may be arbitrarily nested (with limits, see note). Packing a structure layout +requires a nested tuple with the same shape as the structure layout being +packed. Similarly, unpacking produces a nested structure with the same shape +as the structure layout being unpacked. + +.. note:: + The allowable nesting depth of structures is limited. It is guaranteed + that at least 63 nesting levels are allowed. + .. _format-characters: Format Characters ^^^^^^^^^^^^^^^^^ +.. productionlist:: + character_code: "x" | "c" | "b" | "B" | "?" | "h" | "H" | "i" | "I" | "l" + : | "L" | "q" | "Q" | "f" | "d" | "s" | "p" | "P" | "t" | "g" | "u" + : | "w" | "O" | "Z" + Format characters have the following meaning; the conversion between C and Python values should be obvious given their types. The 'Standard size' column refers to the size of the packed value in bytes when using standard size; that @@ -323,7 +356,30 @@ This only works when native size and alignment are in effect; standard size and alignment does not enforce any alignment. +In addition to primitive character codes, nested structure layouts can be +packed:: + + >>> pack('c T { iii T { h } }', '*', (1, 2, 3, (4,))) + '*\x00\x00\x00\x01\x00\x00\x00\x02\x00\x00\x00\x03\x00\x00\x00\x04\x00' +and unpacked:: + + >>> unpack('T{ c T{ c } c}', 'rgb') + (('r', ('g',), 'b'),) + +Empty structures are allowed, but note that they must be packed with empty +tuples:: + + >>> pack('T{}', ()) + b'' + >>> pack('T{T{}}', ((),)) + b'' + >>> pack('T{}') + Traceback (most recent call last): + File "", line 1, in + struct.error: pack requires exactly 1 arguments + + .. seealso:: Module :mod:`array` Index: Lib/test/test_struct.py =================================================================== --- Lib/test/test_struct.py (revision 87813) +++ Lib/test/test_struct.py (working copy) @@ -29,7 +29,37 @@ else: return string_reverse(value) +def sizeof(fmt): + return struct.calcsize(fmt) + +def alignof(code): + return sizeof('b' + code) - sizeof(code) + class StructTest(unittest.TestCase): + def assertStructEquivalence(self, fmt1, fmt2, tup1, tup2): + flat = struct.Struct(fmt1) + flat_bytes = flat.pack(*tup1) + nested = struct.Struct(fmt2) + nested_bytes = nested.pack(*tup2) + self.assertEqual(flat_bytes, nested_bytes) + self.assertEqual(tup1, flat.unpack(flat_bytes)) + self.assertEqual(tup2, nested.unpack(nested_bytes)) + + def assertBadPack(self, fmt, tup): + self.assertRaises(struct.error, struct.pack, fmt, *tup) + + def assertBadUnpack(self, fmt, byte_str): + self.assertRaises(struct.error, struct.unpack, fmt, byte_str) + + def assertBadFormat(self, fmt): + self.assertRaises(struct.error, struct.calcsize, fmt) + + def assertEqualSizes(self, fmt1, fmt2): + self.assertEqual(sizeof(fmt1), sizeof(fmt2)) + + def assertUnequalSizes(self, fmt1, fmt2): + self.assertNotEqual(sizeof(fmt1), sizeof(fmt2)) + def test_isbigendian(self): self.assertEqual((struct.pack('=i', 1)[0] == 0), ISBIGENDIAN) @@ -556,6 +586,161 @@ s = struct.Struct('i') s.__init__('ii') + def test_struct_equivalence(self): + def empty_tuples(depth): + tup = () + for i in range(0, depth): + tup = (tup,) + return tup + + self.assertStructEquivalence('', 'T{}', (), empty_tuples(1)) + self.assertStructEquivalence('', 'T{' * 61 + '}' * 61, + (), empty_tuples(61)) + self.assertStructEquivalence('ii ', 'T{ii} ', (1, 2), ((1, 2),)) + self.assertStructEquivalence('iii', 'T{ii} i', (1, 2, 3), ((1, 2),3)) + self.assertStructEquivalence('HB2B', '\t\tT{\n\tH\n\n\nB 2B}', + (1, 2, 3, 4), ((1, 2, 3, 4),)) + self.assertStructEquivalence('i2i', 'T{ i 2i }', + (1, 2, 3), ((1, 2, 3),)) + self.assertStructEquivalence('iiiHHiBiHBiHi', + '\t T{\n\nii T {i HH }\ni T { BiH B i } } T\t{ H }i\t', + (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13), + ((1, 2, (3, 4, 5), 6, (7, 8, 9, 10, 11)), (12,), 13)) + self.assertStructEquivalence('ii', 'T { i T { i } }', + (187, 42), ((187, (42,)),)) + self.assertStructEquivalence('iHBB', + """i + T{ + H + B + B + } + """, + (1, 2, 3, 4), + (1, (2, 3, 4))) + self.assertStructEquivalence('iHBBBB', + """i + T{ + H + T {} + B + T { + } + B + T { BB } + } + """, + (1, 2, 3, 4, 5, 6), + (1, (2, (), 3, (), 4, (5, 6)))) + self.assertStructEquivalence('2T{bb}', 'T{bb}T{bb}', + ((1, 2), (3, 4)), ((1, 2), (3, 4))) + self.assertStructEquivalence('2i3i', '5i', + (1, 2, 3, 4, 5), (1, 2, 3, 4, 5)) + + def test_bad_pack(self): + self.assertBadPack('T{}', ()) + self.assertBadPack('T{ T{} }', ()) + self.assertBadPack('T{T{T{}}}', (1,)) + self.assertBadPack('T{i}', ((),)) + self.assertBadPack('T{i}', (1,)) + self.assertBadPack('h T { b }', ((2,), 1)) + self.assertBadPack('T { h } T { h }', (1, 2)) + self.assertBadPack('T { T { h } }', ((1,),)) + self.assertBadPack('T { h }', (('fizzbuzz',),)) + self.assertBadPack('T { i T { i } }', (('foo', (42,)),)) + self.assertBadPack('bb', ((1,), (2,))) + + def test_bad_unpack(self): + self.assertBadUnpack('T{i}', b'fizzbuzz') + self.assertBadUnpack('i T{ h T{b} }', b'\x00' * 100) + self.assertBadUnpack('T{ b T{T{T{T{bbb}}} 10s }}', b'\x00') + + def test_bad_structs(self): + self.assertBadFormat('T{') + self.assertBadFormat('\nT\t\t{') + self.assertBadFormat('T') + self.assertBadFormat('T { T { }') + self.assertBadFormat('T\n{\nT{\n}\n\n') + self.assertBadFormat('((()))') + self.assertBadFormat('T<{}') + self.assertBadFormat('12&&&afa7sasdf7asdfl><>aaa') + self.assertBadFormat('{}') + self.assertBadFormat('T{}TTTTT') + self.assertBadFormat('1+2*200') + self.assertBadFormat('def foo(): return 187') + self.assertBadFormat('t{b}') + self.assertBadFormat('<<<<<<<<<<<') + self.assertBadFormat('.......................') + self.assertBadFormat('i') + self.assertBadFormat('>ii>i') + self.assertBadFormat('i }') + self.assertBadFormat(' T { i }') + self.assertBadFormat('}}}}') + self.assertBadFormat('}this is bogus!!!!') + + + def test_struct_alignment(self): + largest_code_struct = 'T{%s}' % ''.join(integer_codes) + largest_code = max(integer_codes, key=alignof) + largest_code_size = sizeof(largest_code) + largest_code_alignment = alignof(largest_code) + all_codes_size = sizeof(''.join(integer_codes)) + + for code in integer_codes: + code_size = sizeof(code) + struct_str = code + largest_code_struct + padding_size = max(code_size, largest_code_alignment) + self.assertEquals(sizeof(struct_str), + padding_size + all_codes_size) + + def test_no_padding(self): + all_codes_size = struct.calcsize('=' + ''.join(integer_codes)) + + for code in integer_codes: + for order in ['<', '>', '=']: + code_size = sizeof(order + code) + struct_str = '%s%sT{%s}' % (order, code, ''.join(integer_codes)) + self.assertEquals(sizeof(struct_str), + code_size + all_codes_size) + + def test_equal_sizes(self): + self.assertEqualSizes('b', 'T{b}') + self.assertEqualSizes('bT{bbb}bbT{T{b}}', 'T{bbbbbbb}') + self.assertEqualSizes('=hibihibi', '=hiT{bi}hiT{bi}') + self.assertEqualSizes('T{T{T{T{}}}}', '') + self.assertEqualSizes('121T{}', '') + self.assertEqualSizes('=bhi', '=T{bT{hT{i}}}') + self.assertEqualSizes('=T{T{T{bb}}}hhT{T{T{b}}}', '=bbhhb') + self.assertEqualSizes('xxxxT{T{x}}xxxx', 'xxxxxxxxx') + self.assertEqualSizes('bl', 'll') + self.assertEqualSizes('bi', 'ii') + self.assertEqualSizes('bh', 'hh') + self.assertEqualSizes('100b', 'b' * 100) + self.assertEqualSizes('13T{bT{hi}}', 'T{bT{hi}}' * 13) + self.assertEqualSizes('20T{ 13T{b} }', '260b') + self.assertEqualSizes('20T{ 13T{4T{b}} 10T{60T{b}} }', '13040b') + + def test_unequal_sizes(self): + self.assertUnequalSizes('', 'x') + self.assertUnequalSizes('bi', '=bi') + self.assertUnequalSizes('iT{b}', 'bT{i}') + self.assertUnequalSizes('20T{ 13T{4T{b}} 10T{60T{b}} }', '130b') + + def test_del(self): + s = struct.Struct('T{ T { h T{i} T{d} } b}') + del s + with self.assertRaises(NameError) as exc: + s.pack( ((1,), 2)) + + def test_max_nesting_depth(self): + self.assertBadFormat('T{' * 65 + '}' * 65) + self.assertBadFormat('T{' * 187 + '}' * 187) + self.assertBadFormat('T{' * 500 + '}' * 500) + self.assertEqualSizes('T{' * 64 + '}' * 64, '') + self.assertEqualSizes('T{' * 64 + '}' * 64 + 'T{' * 64 + '}' * 64, '') + def test_main(): run_unittest(StructTest) Index: Modules/_struct.c =================================================================== --- Modules/_struct.c (revision 87813) +++ Modules/_struct.c (working copy) @@ -22,19 +22,67 @@ const struct _formatdef *); } formatdef; -typedef struct _formatcode { + +/* A left child / right sibling tree used to represent the structure + of the format codes. Child nodes are added to represent nested + struct formats. */ + +typedef struct _formattree { + Py_ssize_t s_size; + Py_ssize_t s_len; + Py_ssize_t s_count; + Py_ssize_t s_offset; + Py_ssize_t s_alignment; const struct _formatdef *fmtdef; - Py_ssize_t offset; - Py_ssize_t size; -} formatcode; + struct _formattree *child; + struct _formattree *sibling; +} formattree; + +#define FormatTree_HasChildren(ft) ((ft)->fmtdef == 0) +#define FormatTree_AppendChild(ft, new_child) \ + do { \ + if ((ft)->child == NULL) { \ + (ft)->child = (new_child); \ + } else { \ + formattree *_t = (ft)->child; \ + for ( ; _t->sibling != NULL; _t = _t->sibling); \ + _t->sibling = (new_child); \ + } \ + } while (0) +#define FormatTree_RemoveChild(ft, rmchild) \ + do { \ + if ((ft)->child && (ft)->child == rmchild) { \ + (ft)->child = rmchild->sibling; \ + } else { \ + formattree *t = (ft)->child; \ + for ( ; t->sibling != NULL \ + && t->sibling != rmchild; t = t->sibling); \ + if (t->sibling == rmchild) \ + t->sibling = rmchild->sibling; \ + } \ + } while (0) + +#define FormatTree_IsPadding(ft) (((ft)->fmtdef != 0) && \ + ((ft)->fmtdef->format == 'x')) + +#define MAX_STRUCT_DEPTH 64 + +/* Holds the state of the struct string parser. */ + +typedef struct _parser_state { + const formatdef *byte_fmt; + const char *fmt; + Py_ssize_t error_cnt; + Py_ssize_t struct_depth; +} parser_state; + + /* Struct object interface */ typedef struct { PyObject_HEAD - Py_ssize_t s_size; - Py_ssize_t s_len; - formatcode *s_codes; + formattree *s_tree; PyObject *s_format; PyObject *weakreflist; /* List of weak references */ } PyStructObject; @@ -83,14 +131,78 @@ #endif #define STRINGIFY(x) #x +#define MAX(x, y) ((x) < (y) ? (y) : (x)) #ifdef __powerc #pragma options align=reset #endif +/* Prototypes for the parser. These are needed because the parser is mutually + recursive. */ + +static void +parse_format_string(parser_state *state, formattree *tree); + +static void +parse_format_string_body(parser_state *state, formattree *tree); + +static void +parse_struct(parser_state *state, formattree *tree, Py_ssize_t count); + +static void +parse_character_code(parser_state *state, formattree *tree, Py_ssize_t count); + + +static formattree * +formattree_new(void) +{ + formattree * new_tree = PyMem_MALLOC(sizeof(formattree)); + if (new_tree == NULL) { + PyErr_NoMemory(); + return NULL; + } + + new_tree->s_size = 0; + new_tree->s_len = 0; + new_tree->s_count = 0; + new_tree->s_offset = 0; + new_tree->s_alignment = 0; + new_tree->fmtdef = NULL; + new_tree->child = NULL; + new_tree->sibling = NULL; + + return new_tree; +} + + +static void +formattree_free_r(formattree *tree) +{ + formattree *child = tree->child; + formattree *del_child = NULL; + while (child != NULL) { + if (FormatTree_HasChildren(child)) { + formattree_free_r(child); + } + del_child = child; + child = child->sibling; + PyMem_FREE(del_child); + } +} + +static void +formattree_free(formattree *tree) +{ + formattree_free_r(tree); + PyMem_FREE(tree); +} + + /* Helper for integer format codes: converts an arbitrary Python object to a PyLongObject if possible, otherwise fails. Caller should decref. */ +/* Helper to get a PyLongObject by hook or by crook. Caller should decref. */ + static PyObject * get_pylong(PyObject *v) { @@ -1143,155 +1255,288 @@ } -/* Align a size according to a format code. Return -1 on overflow. */ +/* Align a number to a given multiple. */ static Py_ssize_t -align(Py_ssize_t size, char c, const formatdef *e) +align_to(Py_ssize_t number, Py_ssize_t multiple) { Py_ssize_t extra; - if (e->format == c) { - if (e->alignment && size > 0) { - extra = (e->alignment - 1) - (size - 1) % (e->alignment); - if (extra > PY_SSIZE_T_MAX - size) - return -1; - size += extra; - } + if (multiple && number > 0) { + extra = (multiple - 1) - (number - 1) % multiple; + if (extra > PY_SSIZE_T_MAX - number) + return -1; + number += extra; } - return size; + return number; } +static void +whitespace(parser_state *state) +{ + while ( (*state->fmt != '\0') && isspace(Py_CHARMASK(*state->fmt))) + state->fmt++; +} -/* calculate the size of a format string */ +static int +match(parser_state *state, char c) +{ + if (*state->fmt == c) { + state->fmt++; + return 0; + } else { + return -1; + } +} static int -prepare_s(PyStructObject *self) +is_primitive(char c) { - const formatdef *f; - const formatdef *e; - formatcode *codes; + static char *primitive_codes = "xcbB?hHiIlLqQfdspP"; + const char *begin; + for (begin = primitive_codes; *begin; ++begin) { + if (c == *begin) + return 1; + } + return isdigit(c); +} - const char *s; - const char *fmt; +static int +is_byte_order_marker(char c) +{ + static char *byte_order_codes = "<>!=@"; + const char *begin; + for (begin = byte_order_codes; *begin; ++begin) { + if (c == *begin) + return 1; + } + return 0; +} + +static void +parse_error(parser_state *state, const char *error_msg) +{ + PyErr_SetString(StructError, error_msg); + state->error_cnt += 1; +} + +static void +parse_add_child(parser_state *state, formattree *tree, formattree *child) +{ + FormatTree_AppendChild(tree, child); + /* Padding does not contribute to the length. */ + tree->s_len += child->s_count * !FormatTree_IsPadding(child); + tree->s_alignment = MAX(tree->s_alignment, child->s_alignment); +} + +static Py_ssize_t +parse_count(parser_state *state) +{ char c; - Py_ssize_t size, len, num, itemsize; + Py_ssize_t num; - fmt = PyBytes_AS_STRING(self->s_format); + c = *state->fmt; + num = 0; + while (isdigit(c)) { + /* overflow-safe version of + if (num*10 + (c - '0') > PY_SSIZE_T_MAX) { ... } */ + if (num >= PY_SSIZE_T_MAX / 10 && ( + num > PY_SSIZE_T_MAX / 10 || + (c - '0') > PY_SSIZE_T_MAX % 10)) + goto overflow; + num = num * 10 + (c - '0'); + c = *++state->fmt; + } - f = whichtable((char **)&fmt); + return num; +overflow: + parse_error(state, "total struct size too long"); + return -1; +} - s = fmt; - size = 0; - len = 0; - while ((c = *s++) != '\0') { - if (isspace(Py_CHARMASK(c))) - continue; - if ('0' <= c && c <= '9') { - num = c - '0'; - while ('0' <= (c = *s++) && c <= '9') { - /* overflow-safe version of - if (num*10 + (c - '0') > PY_SSIZE_T_MAX) { ... } */ - if (num >= PY_SSIZE_T_MAX / 10 && ( - num > PY_SSIZE_T_MAX / 10 || - (c - '0') > PY_SSIZE_T_MAX % 10)) - goto overflow; - num = num*10 + (c - '0'); +static void +parse_character_code(parser_state *state, formattree *tree, Py_ssize_t count) +{ + const char c = *state->fmt++; + formattree *new_tree; + const formatdef *e; + + assert(is_primitive(c)); + + e = getentry(c, state->byte_fmt); + if (e != NULL) { + new_tree = formattree_new(); + if (new_tree != NULL) { + new_tree->fmtdef = e; + if (c == 'p' || c == 's') { + new_tree->s_count = 1; + new_tree->s_size = count; + new_tree->s_alignment = e->alignment; + new_tree->s_len = 1; + } else if (c == 'x') { + /* NOTE: Padding nodes are kept so that we can compute + offsets later. We remove these nodes from the + tree after computing the offsets. */ + new_tree->s_count = count; + new_tree->s_size = e->size; + new_tree->s_alignment = 0; + new_tree->s_len = 0; + } else { + new_tree->s_count = count; + new_tree->s_size = e->size; + new_tree->s_alignment = e->alignment; + new_tree->s_len = count; } - if (c == '\0') { - PyErr_SetString(StructError, - "repeat count given without format specifier"); - return -1; - } + parse_add_child(state, tree, new_tree); } - else - num = 1; + } +} - e = getentry(c, f); - if (e == NULL) - return -1; +/* XXX: Limit nested struct depth. */ - switch (c) { - case 's': /* fall through */ - case 'p': len++; break; - case 'x': break; - default: len += num; break; - } +static void +parse_struct(parser_state *state, formattree *tree, Py_ssize_t count) +{ + formattree *new_tree; - itemsize = e->size; - size = align(size, c, e); - if (size == -1) - goto overflow; + if (match(state, 'T') == 0) { + whitespace(state); - /* if (size + num * itemsize > PY_SSIZE_T_MAX) { ... } */ - if (num > (PY_SSIZE_T_MAX - size) / itemsize) - goto overflow; - size += num * itemsize; - } + if (match(state, '{') == 0) { + state->struct_depth++; - /* check for overflow */ - if ((len + 1) > (PY_SSIZE_T_MAX / sizeof(formatcode))) { - PyErr_NoMemory(); - return -1; - } + if (state->struct_depth > MAX_STRUCT_DEPTH) { + parse_error(state, + "the maximum struct nesting depth has been exceeded"); + return; + } - self->s_size = size; - self->s_len = len; - codes = PyMem_MALLOC((len + 1) * sizeof(formatcode)); - if (codes == NULL) { - PyErr_NoMemory(); - return -1; + new_tree = formattree_new(); + + if (new_tree != NULL) { + whitespace(state); + + if (*state->fmt != '}') + parse_format_string_body(state, new_tree); + + if (!state->error_cnt) { + new_tree->s_count = count; + parse_add_child(state, tree, new_tree); + + whitespace(state); + if (match(state, '}') != 0) { + parse_error(state, + "missing '}' in struct string"); + } + } else { + formattree_free(new_tree); + } + } + + state->struct_depth--; + } else { + parse_error(state, + "missing '{' after 'T' in struct string"); + } } - /* Free any s_codes value left over from a previous initialization. */ - if (self->s_codes != NULL) - PyMem_FREE(self->s_codes); - self->s_codes = codes; +} - s = fmt; - size = 0; - while ((c = *s++) != '\0') { - if (isspace(Py_CHARMASK(c))) - continue; - if ('0' <= c && c <= '9') { - num = c - '0'; - while ('0' <= (c = *s++) && c <= '9') - num = num*10 + (c - '0'); - if (c == '\0') +static void +parse_format_string_body(parser_state *state, formattree *tree) +{ + whitespace(state); + + while (*state->fmt == 'T' || is_primitive(*state->fmt)) { + Py_ssize_t count = 1; + + if (isdigit(*state->fmt)) { + count = parse_count(state); + if (count == -1) break; } + + if (*state->fmt == 'T') + parse_struct(state, tree, count); + else if (is_primitive(*state->fmt)) + parse_character_code(state, tree, count); else - num = 1; + parse_error(state, "unexpected character in struct string"); - e = getentry(c, f); + if (state->error_cnt > 0) + break; - size = align(size, c, e); - if (c == 's' || c == 'p') { - codes->offset = size; - codes->size = num; - codes->fmtdef = e; - codes++; - size += num; - } else if (c == 'x') { - size += num; + whitespace(state); + } +} + +static void +parse_format_string(parser_state *state, formattree *root) +{ + whitespace(state); + + if (is_byte_order_marker(*state->fmt)) { + state->byte_fmt = whichtable((char**)&state->fmt); + } + + parse_format_string_body(state, root); + if (*state->fmt != '\0' && !state->error_cnt) + parse_error(state, "unexpected character in struct string"); +} + +static int +compute_offsets(formattree *tree, Py_ssize_t *offset) +{ + formattree *child = tree->child; + + while (child != NULL) { + formattree *next_child = child->sibling; + *offset = child->s_offset = align_to(*offset, child->s_alignment); + Py_ssize_t count; + + if (FormatTree_HasChildren(child)) { + count = child->s_count - 1; + if (compute_offsets(child, offset) < 0) + return -1; } else { - while (--num >= 0) { - codes->offset = size; - codes->size = e->size; - codes->fmtdef = e; - codes++; - size += e->size; - } + count = child->s_count; } + + if (count && (child->s_size > (PY_SSIZE_T_MAX - *offset) / count)) { + PyErr_SetString(StructError, "total struct size too long"); + return -1; + } + *offset += count * child->s_size; + + if (FormatTree_IsPadding(child)) { + FormatTree_RemoveChild(tree, child); + PyMem_FREE(child); + } + child = next_child; } - codes->fmtdef = NULL; - codes->offset = size; - codes->size = 0; + tree->s_size = *offset - tree->s_offset; + return 0; +} - overflow: - PyErr_SetString(StructError, - "total struct size too long"); - return -1; +static int +prepare_s(PyStructObject *self) +{ + parser_state state = { + native_table, + PyBytes_AS_STRING(self->s_format), + 0, + 0, + }; + Py_ssize_t offset = 0; + + parse_format_string(&state, self->s_tree); + if (state.error_cnt == 0) { + self->s_tree->s_offset = offset; + return compute_offsets(self->s_tree, &offset); + } else { + return -1; + } } static PyObject * @@ -1306,9 +1551,7 @@ PyStructObject *s = (PyStructObject*)self; Py_INCREF(Py_None); s->s_format = Py_None; - s->s_codes = NULL; - s->s_size = -1; - s->s_len = -1; + s->s_tree = formattree_new(); } return self; } @@ -1357,39 +1600,50 @@ { if (s->weakreflist != NULL) PyObject_ClearWeakRefs((PyObject *)s); - if (s->s_codes != NULL) { - PyMem_FREE(s->s_codes); + if (s->s_tree != NULL) { + formattree_free(s->s_tree); } Py_XDECREF(s->s_format); Py_TYPE(s)->tp_free((PyObject *)s); } +/* Unpacks the given buffer in accordance with the given formattree. The + * formattree should not have any children and should have a single format + * code defined. + * + * A Tuple object is returned upon success. NULL is returned upon failure. + */ + static PyObject * -s_unpack_internal(PyStructObject *soself, char *startfrom) { - formatcode *code; +s_unpack_primitive(formattree *tree, const char *buf) +{ Py_ssize_t i = 0; - PyObject *result = PyTuple_New(soself->s_len); + PyObject *result; + PyObject *v; + const formatdef *e; + const char *res; + + assert(!FormatTree_HasChildren(tree)); + + result = PyTuple_New(1); if (result == NULL) return NULL; - for (code = soself->s_codes; code->fmtdef != NULL; code++) { - PyObject *v; - const formatdef *e = code->fmtdef; - const char *res = startfrom + code->offset; - if (e->format == 's') { - v = PyBytes_FromStringAndSize(res, code->size); - } else if (e->format == 'p') { - Py_ssize_t n = *(unsigned char*)res; - if (n >= code->size) - n = code->size - 1; - v = PyBytes_FromStringAndSize(res + 1, n); - } else { - v = e->unpack(res, e); - } - if (v == NULL) - goto fail; - PyTuple_SET_ITEM(result, i++, v); + e = tree->fmtdef; + res = buf + tree->s_offset; + if (e->format == 's') { + v = PyBytes_FromStringAndSize(res, tree->s_size); + } else if (e->format == 'p') { + Py_ssize_t n = *(unsigned char*)res; + if (n >= tree->s_size) + n = tree->s_size - 1; + v = PyBytes_FromStringAndSize(res + 1, n); + } else { + v = e->unpack(res, e); } + if (v == NULL) + goto fail; + PyTuple_SET_ITEM(result, i++, v); return result; fail: @@ -1397,7 +1651,71 @@ return NULL; } +/* Unpacks the given buffer in accordance with the given formattree. The + * formattree specifies how the given character buffer should be unpacked. + * + * A Tuple object is returned upon success. This Tuple object may have nested + * Tuple objects, if the formattree dictates as such. NULL is returned upon + * failure. + */ +static PyObject * +s_unpack_struct(formattree *tree, const char *buf) +{ + formattree *child; + PyObject *tup = PyTuple_New(0); + if (tup == NULL) + return NULL; + + for (child = tree->child; child != NULL; child = child->sibling) { + PyObject *sub_tup = NULL; + PyObject *new_tup = NULL; + const char *child_buf; + Py_ssize_t count; + + for (count = 0; count < child->s_count; ++count) { + child_buf = buf + count * child->s_size; + + if (FormatTree_HasChildren(child)) { + PyObject *wrap_tup = NULL; + sub_tup = s_unpack_struct(child, child_buf); + if (sub_tup == NULL) + goto fail; + wrap_tup = PyTuple_New(1); + if (wrap_tup == NULL) { + Py_DECREF(sub_tup); + goto fail; + } + PyTuple_SetItem(wrap_tup, 0, sub_tup); + new_tup = PySequence_Concat(tup, wrap_tup); + Py_DECREF(wrap_tup); + } else { + sub_tup = s_unpack_primitive(child, child_buf); + if (sub_tup == NULL) + goto fail; + new_tup = PySequence_Concat(tup, sub_tup); + Py_DECREF(sub_tup); + } + + Py_DECREF(tup); + if (new_tup == NULL) + goto fail; + tup = new_tup; + } + } + + return tup; +fail: + Py_DECREF(tup); + return NULL; +} + +static PyObject * +s_unpack_internal(PyStructObject *soself, char *startfrom) +{ + return s_unpack_struct(soself->s_tree, startfrom); +} + PyDoc_STRVAR(s_unpack__doc__, "S.unpack(buffer) -> (v1, v2, ...)\n\ \n\ @@ -1413,13 +1731,13 @@ PyStructObject *soself = (PyStructObject *)self; assert(PyStruct_Check(self)); - assert(soself->s_codes != NULL); + assert(soself->s_tree != NULL); if (PyObject_GetBuffer(input, &vbuf, PyBUF_SIMPLE) < 0) return NULL; - if (vbuf.len != soself->s_size) { + if (vbuf.len != soself->s_tree->s_size) { PyErr_Format(StructError, "unpack requires a bytes object of length %zd", - soself->s_size); + soself->s_tree->s_size); PyBuffer_Release(&vbuf); return NULL; } @@ -1447,7 +1765,7 @@ PyStructObject *soself = (PyStructObject *)self; assert(PyStruct_Check(self)); - assert(soself->s_codes != NULL); + assert(soself->s_tree != NULL); if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|n:unpack_from", kwlist, @@ -1457,10 +1775,10 @@ return NULL; if (offset < 0) offset += vbuf.len; - if (offset < 0 || vbuf.len - offset < soself->s_size) { + if (offset < 0 || vbuf.len - offset < soself->s_tree->s_size) { PyErr_Format(StructError, "unpack_from requires a buffer of at least %zd bytes", - soself->s_size); + soself->s_tree->s_size); PyBuffer_Release(&vbuf); return NULL; } @@ -1469,91 +1787,141 @@ return result; } +/* Packs a non-nested tuples of arguments to the given buffer. + * + * Takes a struct object, a tuple of arguments, and a character buffer for + * writing the packed string. 0 is returned on success, -1 is returned if + * there is an error. + */ -/* - * Guts of the pack function. +static int +s_pack_primitive(formattree *tree, PyObject *v, char *buf) +{ + Py_ssize_t n; + const formatdef *e; + char *res; + + e = tree->fmtdef; + res = buf + tree->s_offset; + if (e->format == 's') { + int isstring; + void *p; + isstring = PyBytes_Check(v); + if (!isstring && !PyByteArray_Check(v)) { + PyErr_SetString(StructError, + "argument for 's' must be a bytes or string"); + return -1; + } + if (isstring) { + n = PyBytes_GET_SIZE(v); + p = PyBytes_AS_STRING(v); + } + else { + n = PyByteArray_GET_SIZE(v); + p = PyByteArray_AS_STRING(v); + } + if (n > tree->s_size) + n = tree->s_size; + if (n > 0) + memcpy(res, p, n); + } else if (e->format == 'p') { + int isstring; + void *p; + isstring = PyBytes_Check(v); + if (!isstring && !PyByteArray_Check(v)) { + PyErr_SetString(StructError, + "argument for 'p' must be a bytes or string"); + return -1; + } + if (isstring) { + n = PyBytes_GET_SIZE(v); + p = PyBytes_AS_STRING(v); + } + else { + n = PyByteArray_GET_SIZE(v); + p = PyByteArray_AS_STRING(v); + } + if (n > (tree->s_size - 1)) + n = tree->s_size - 1; + if (n > 0) + memcpy(res + 1, p, n); + if (n > 255) + n = 255; + *res = Py_SAFE_DOWNCAST(n, Py_ssize_t, unsigned char); + } else { + if (e->pack(res, v, e) < 0) { + if (PyLong_Check(v) && PyErr_ExceptionMatches(PyExc_OverflowError)) + PyErr_SetString(StructError, + "long too large to convert to int"); + return -1; + } + } + + /* Success */ + return 0; +} + + +/* Packs a potentially nested tuple of arguments to the given buffer. * - * Takes a struct object, a tuple of arguments, and offset in that tuple of - * argument for where to start processing the arguments for packing, and a - * character buffer for writing the packed string. The caller must insure - * that the buffer may contain the required length for packing the arguments. - * 0 is returned on success, 1 is returned if there is an error. - * + * Takes a struct object, a tuple of arguments, and a character buffer for + * writing the packed string. If there are nested structures within the given + * structure, then the nested structures are recursively packed. 0 is returned + * on success, -1 is returned if there is an error. */ + static int -s_pack_internal(PyStructObject *soself, PyObject *args, int offset, char* buf) +s_pack_struct(formattree *tree, PyObject *args, char *buf) { - formatcode *code; - /* XXX(nnorwitz): why does i need to be a local? can we use - the offset parameter or do we need the wider width? */ - Py_ssize_t i; + Py_ssize_t arg_i = 0; + formattree *child; - memset(buf, '\0', soself->s_size); - i = offset; - for (code = soself->s_codes; code->fmtdef != NULL; code++) { - Py_ssize_t n; - PyObject *v = PyTuple_GET_ITEM(args, i++); - const formatdef *e = code->fmtdef; - char *res = buf + code->offset; - if (e->format == 's') { - int isstring; - void *p; - isstring = PyBytes_Check(v); - if (!isstring && !PyByteArray_Check(v)) { - PyErr_SetString(StructError, - "argument for 's' must be a bytes object"); - return -1; + if (PyTuple_GET_SIZE(args) != tree->s_len) + { + PyErr_Format(StructError, + "pack requires exactly %zd arguments", tree->s_len); + return -1; + } + + for (child = tree->child; child != NULL; child = child->sibling) { + int ret = 0; + Py_ssize_t count; + + for (count = 0; count < child->s_count; ++count) { + PyObject *arg = PyTuple_GET_ITEM(args, arg_i); + char *child_buf = buf + count * child->s_size; + + if (FormatTree_HasChildren(child)) { + if (!PyTuple_Check(arg)) { + PyErr_SetString(StructError, + "argument for 'T{...}' must be a tuple"); + return -1; + } + ret = s_pack_struct(child, arg, child_buf); + } else { + ret = s_pack_primitive(child, arg, child_buf); } - if (isstring) { - n = PyBytes_GET_SIZE(v); - p = PyBytes_AS_STRING(v); - } - else { - n = PyByteArray_GET_SIZE(v); - p = PyByteArray_AS_STRING(v); - } - if (n > code->size) - n = code->size; - if (n > 0) - memcpy(res, p, n); - } else if (e->format == 'p') { - int isstring; - void *p; - isstring = PyBytes_Check(v); - if (!isstring && !PyByteArray_Check(v)) { - PyErr_SetString(StructError, - "argument for 'p' must be a bytes object"); + + if (ret < 0) { return -1; } - if (isstring) { - n = PyBytes_GET_SIZE(v); - p = PyBytes_AS_STRING(v); - } - else { - n = PyByteArray_GET_SIZE(v); - p = PyByteArray_AS_STRING(v); - } - if (n > (code->size - 1)) - n = code->size - 1; - if (n > 0) - memcpy(res + 1, p, n); - if (n > 255) - n = 255; - *res = Py_SAFE_DOWNCAST(n, Py_ssize_t, unsigned char); - } else { - if (e->pack(res, v, e) < 0) { - if (PyLong_Check(v) && PyErr_ExceptionMatches(PyExc_OverflowError)) - PyErr_SetString(StructError, - "long too large to convert to int"); - return -1; - } + + arg_i += 1; } } - /* Success */ return 0; } +static int +s_pack_internal(PyStructObject *soself, PyObject *args, int offset, char *buf) +{ + memset(buf, '\0', soself->s_tree->s_size); + PyObject *new_args = PyTuple_GetSlice(args, offset, PyTuple_Size(args)); + int ret = s_pack_struct(soself->s_tree, new_args, buf); + Py_DECREF(new_args); + return ret; +} PyDoc_STRVAR(s_pack__doc__, "S.pack(v1, v2, ...) -> bytes\n\ @@ -1571,16 +1939,9 @@ /* Validate arguments. */ soself = (PyStructObject *)self; assert(PyStruct_Check(self)); - assert(soself->s_codes != NULL); - if (PyTuple_GET_SIZE(args) != soself->s_len) - { - PyErr_Format(StructError, - "pack requires exactly %zd arguments", soself->s_len); - return NULL; - } /* Allocate a new string */ - result = PyBytes_FromStringAndSize((char *)NULL, soself->s_size); + result = PyBytes_FromStringAndSize((char *)NULL, soself->s_tree->s_size); if (result == NULL) return NULL; @@ -1611,14 +1972,6 @@ /* Validate arguments. +1 is for the first arg as buffer. */ soself = (PyStructObject *)self; assert(PyStruct_Check(self)); - assert(soself->s_codes != NULL); - if (PyTuple_GET_SIZE(args) != (soself->s_len + 2)) - { - PyErr_Format(StructError, - "pack_into requires exactly %zd arguments", - (soself->s_len + 2)); - return NULL; - } /* Extract a writable memory buffer from the first argument */ if ( PyObject_AsWriteBuffer(PyTuple_GET_ITEM(args, 0), @@ -1637,10 +1990,10 @@ offset += buffer_len; /* Check boundaries */ - if (offset < 0 || (buffer_len - offset) < soself->s_size) { + if (offset < 0 || (buffer_len - offset) < soself->s_tree->s_size) { PyErr_Format(StructError, "pack_into requires a buffer of at least %zd bytes", - soself->s_size); + soself->s_tree->s_size); return NULL; } @@ -1662,7 +2015,7 @@ static PyObject * s_get_size(PyStructObject *self, void *unused) { - return PyLong_FromSsize_t(self->s_size); + return PyLong_FromSsize_t(self->s_tree->s_size); } /* List of functions */ @@ -1789,7 +2142,7 @@ PyObject *s_object = cache_struct(fmt); if (s_object == NULL) return NULL; - n = ((PyStructObject *)s_object)->s_size; + n = ((PyStructObject *)s_object)->s_tree->s_size; Py_DECREF(s_object); return PyLong_FromSsize_t(n); } @@ -1943,8 +2296,9 @@ >: big-endian, std. size & alignment\n\ !: same as >\n\ \n\ -The remaining chars indicate types of args and must match exactly;\n\ -these can be preceded by a decimal repeat count:\n\ +The primitive format chars are used to indicate types of primitive arg\n\ +values and must match exactly; these can be preceded by a decimal repeat\n\ +count:\n\ x: pad byte (no data); c:char; b:signed byte; B:unsigned byte;\n\ ?: _Bool (requires C99; if not available, char is used instead)\n\ h:short; H:unsigned short; i:int; I:unsigned int;\n\ @@ -1957,6 +2311,10 @@ q:long long; Q:unsigned long long\n\ Whitespace between formats is ignored.\n\ \n\ +Additionally, there may be nested structures. These are specified by\n\ +surrounding a format string with 'T{' ... '}'. They maybe arbitrarily\n\ +nested.\n\ +\n\ The variable struct.error is an exception raised on errors.\n");