diff -r 724473a18ada Doc/howto/clinic.rst --- a/Doc/howto/clinic.rst Thu Apr 16 17:24:52 2015 -0400 +++ b/Doc/howto/clinic.rst Sun Apr 19 00:19:22 2015 -0700 @@ -803,8 +803,8 @@ Please note, not every possible combination of arguments will work. Often these arguments are implemented internally by specific ``PyArg_ParseTuple`` *format units*, with specific behavior. For example, currently you cannot -call ``str`` and pass in ``zeroes=True`` without also specifying an ``encoding``; -although it's perfectly reasonable to think this would work, these semantics don't +call ``str`` and pass in ``zeroes=True`` without also specifying an ``encoding``. +Although it's perfectly reasonable to think this would work, these semantics don't map to any existing format unit. So Argument Clinic doesn't support it. (Or, at least, not yet.) @@ -816,13 +816,13 @@ ``'B'`` ``unsigned_char(bitwise=True)`` ``'b'`` ``unsigned_char`` ``'c'`` ``char`` -``'C'`` ``int(types='str')`` +``'C'`` ``int(types={'str'})`` ``'d'`` ``double`` ``'D'`` ``Py_complex`` +``'es'`` ``str(encoding='name_of_encoding')`` ``'es#'`` ``str(encoding='name_of_encoding', length=True, zeroes=True)`` -``'es'`` ``str(encoding='name_of_encoding')`` -``'et#'`` ``str(encoding='name_of_encoding', types='bytes bytearray str', length=True)`` -``'et'`` ``str(encoding='name_of_encoding', types='bytes bytearray str')`` +``'et'`` ``str(encoding='name_of_encoding', types={'bytes', 'bytearray', 'str'})`` +``'et#'`` ``str(encoding='name_of_encoding', length=True, types={'bytes', 'bytearray', 'str'}, zeroes=True)`` ``'f'`` ``float`` ``'h'`` ``short`` ``'H'`` ``unsigned_short(bitwise=True)`` @@ -832,27 +832,27 @@ ``'K'`` ``unsigned_PY_LONG_LONG(bitwise=True)`` ``'L'`` ``PY_LONG_LONG`` ``'n'`` ``Py_ssize_t`` +``'O'`` ``object`` ``'O!'`` ``object(subclass_of='&PySomething_Type')`` ``'O&'`` ``object(converter='name_of_c_function')`` -``'O'`` ``object`` ``'p'`` ``bool`` -``'s#'`` ``str(length=True)`` ``'S'`` ``PyBytesObject`` ``'s'`` ``str`` -``'s*'`` ``Py_buffer(types='str bytes bytearray buffer')`` +``'s#'`` ``str(length=True)`` +``'s*'`` ``Py_buffer(types={'buffer', 'str'})`` +``'U'`` ``unicode`` +``'u'`` ``Py_UNICODE`` ``'u#'`` ``Py_UNICODE(length=True)`` -``'u'`` ``Py_UNICODE`` -``'U'`` ``unicode`` -``'w*'`` ``Py_buffer(types='bytearray rwbuffer')`` -``'y#'`` ``str(types='bytes', length=True)`` +``'w*'`` ``Py_buffer(types={'rwbuffer'})`` ``'Y'`` ``PyByteArrayObject`` -``'y'`` ``str(types='bytes')`` +``'y'`` ``str(types={'bytes'})`` +``'y#'`` ``str(types={'robuffer'}, length=True)`` ``'y*'`` ``Py_buffer`` +``'Z'`` ``Py_UNICODE(nullable=True)`` ``'Z#'`` ``Py_UNICODE(nullable=True, length=True)`` +``'z'`` ``str(nullable=True)`` ``'z#'`` ``str(nullable=True, length=True)`` -``'Z'`` ``Py_UNICODE(nullable=True)`` -``'z'`` ``str(nullable=True)`` -``'z*'`` ``Py_buffer(types='str bytes bytearray buffer', nullable=True)`` +``'z*'`` ``Py_buffer(types={'buffer', 'str'}, nullable=True)`` ========= ================================================================================= As an example, here's our sample ``pickle.Pickler.dump`` using the proper diff -r 724473a18ada Modules/_dbmmodule.c --- a/Modules/_dbmmodule.c Thu Apr 16 17:24:52 2015 -0400 +++ b/Modules/_dbmmodule.c Sun Apr 19 00:19:22 2015 -0700 @@ -272,7 +272,7 @@ self: dbmobject - key: str(types={'str', 'robuffer'}, length=True) + key: str(types={'str', 'robuffer'}, length=True, zeroes=True) default: object = None / @@ -282,7 +282,7 @@ static PyObject * dbm_dbm_get_impl(dbmobject *dp, const char *key, Py_ssize_clean_t key_length, PyObject *default_value) -/*[clinic end generated code: output=4f5c0e523eaf1251 input=f81478bc211895ef]*/ +/*[clinic end generated code: output=4f5c0e523eaf1251 input=9402c0af8582dc69]*/ { datum dbm_key, val; diff -r 724473a18ada Tools/clinic/clinic.py --- a/Tools/clinic/clinic.py Thu Apr 16 17:24:52 2015 -0400 +++ b/Tools/clinic/clinic.py Sun Apr 19 00:19:22 2015 -0700 @@ -2197,6 +2197,8 @@ # The C type to use for this variable. # 'type' should be a Python string specifying the type, e.g. "int". # If this is a pointer type, the type string should end with ' *'. + # It's explicitly supported that you can set a new default value for + # this at class scope in a subclass. type = None # The Python default value for this parameter, as a Python value. @@ -2640,62 +2642,113 @@ # 'robuffer': any object supporting the buffer interface, but must not be writeable # -@add_legacy_c_converter('s#', types={"str", "robuffer"}, length=True) -@add_legacy_c_converter('y', types={"robuffer"}) -@add_legacy_c_converter('y#', types={"robuffer"}, length=True) -@add_legacy_c_converter('z', nullable=True) -@add_legacy_c_converter('z#', types={"str", "robuffer"}, nullable=True, length=True) -# add_legacy_c_converter not supported for es, es#, et, et# -# because of their extra encoding argument +# str_converter_argument_map is deeply nested: +# format_unit = str_converter_argument_map \ +# [types][encoding][length][nullable][zeroes] +# types is a set (technically, frozenset) +# the rest are bools. +# +# if at any point a key isn't set, that combination of arguments +# is invalid. + +str_converter_argument_map = {} + class str_converter(CConverter): type = 'const char *' default_type = (str, Null, NoneType) format_unit = 's' - def converter_init(self, *, encoding=None, types={"str"}, - length=False, nullable=False, zeroes=False): - - self.length = bool(length) - - is_b_or_ba = types == {"bytes", "bytearray"} - is_str = types == {"str"} - is_robuffer = types == {"robuffer"} - is_str_or_robuffer = types == {"str", "robuffer"} - - format_unit = None - - if encoding: - self.encoding = encoding - - if is_str and not length and not zeroes and not nullable: - format_unit = 'es' - elif is_str and length and zeroes and nullable: - format_unit = 'es#' - elif is_b_or_ba and not length and not zeroes and not nullable: - format_unit = 'et' - elif is_b_or_ba and length and zeroes and nullable: - format_unit = 'et#' - - else: - if zeroes: - fail("str_converter: illegal combination of arguments (zeroes is only legal with an encoding)") - - if is_str and not length and not nullable: - format_unit = 's' - elif is_str and not length and nullable: - format_unit = 'z' - elif is_robuffer and not length and not nullable: - format_unit = 'y' - elif is_robuffer and length and not nullable: - format_unit = 'y#' - elif is_str_or_robuffer and length and not nullable: - format_unit = 's#' - elif is_str_or_robuffer and length and nullable: - format_unit = 'z#' - + def converter_init(self, *, types={"str"}, + encoding=None, length=False, nullable=False, zeroes=False): + + key = (frozenset(types), bool(encoding), bool(length), bool(nullable)) + format_unit = str_converter_argument_map.get(key) if not format_unit: fail("str_converter: illegal combination of arguments") + self.format_unit = format_unit + self.length = bool(length) + if encoding: + if self.default not in (Null, None, unspecified): + fail("str_converter: Argument Clinic doesn't support default values for encoded strings") + self.encoding = encoding + self.type = 'char *' + # sorry, clinic can't support preallocated buffers + # for es# and et# + self.c_default = "NULL" + + def cleanup(self): + if self.encoding: + name = ensure_legal_c_identifier(self.name) + return "".join(["if (", name, ")\n PyMem_FREE(", name, ");\n"]) + +# +# This is the fourth or fifth rewrite of registering all the +# crazy string converter format units. Previous approaches hid +# bugs--generally mismatches between the semantics of the format +# unit and the arguments necessary to represent those semantics +# properly. Hopefully with this approach we'll get it 100% right. +# +# The r() function (short for "register") both registers the +# mapping from arguments to format unit *and* registers the +# legacy C converter for that format unit. +# +def r(format_unit, *, types, encoding, length, nullable, zeroes): + d = str_converter_argument_map + + if not encoding and format_unit != 's': + # add the legacy c converters here too. + # + # note: add_legacy_c_converter can't work for + # es, es#, et, or et# + # because of their extra encoding argument + # also don't add the converter for 's' because + # the metaclass for CConverter adds it for us. + kwargs = {} + if types != {'str'}: + kwargs['types'] = types + if length: + kwargs['length'] = True + if nullable: + kwargs['nullable'] = True + if zeroes: + kwargs['zeroes'] = True + added_f = functools.partial(str_converter, **kwargs) + legacy_converters[format_unit] = added_f + + d[frozenset(types), bool(encoding), bool(length), bool(nullable)] = format_unit + +r('es', types={'str'}, + encoding=True , length=False, nullable=False, zeroes=False) + +r('es#', types={'str'}, + encoding=True , length=True , nullable=False, zeroes=True ) + +r('et', types={'bytes', 'bytearray', 'str'}, + encoding=True , length=False, nullable=False, zeroes=False) + +r('et#', types={'bytes', 'bytearray', 'str'}, + encoding=True , length=True , nullable=False, zeroes=True ) + +r('s', types={'str'}, + encoding=False, length=False, nullable=False, zeroes=False) + +r('s#', types={'robuffer', 'str'}, + encoding=False, length=True , nullable=False, zeroes=True ) + +r('y', types={'robuffer'}, + encoding=False, length=False, nullable=False, zeroes=False) + +r('y#', types={'robuffer'}, + encoding=False, length=True , nullable=False, zeroes=True ) + +r('z', types={'str'}, + encoding=False, length=False, nullable=True , zeroes=False) + +r('z#', types={'robuffer', 'str'}, + encoding=False, length=True , nullable=True , zeroes=True ) + +del r class PyBytesObject_converter(CConverter): @@ -2728,8 +2781,8 @@ self.length = True self.format_unit = format_unit -@add_legacy_c_converter('s*', types={'str', 'buffer'}) -@add_legacy_c_converter('z*', types={'str', 'buffer'}, nullable=True) +@add_legacy_c_converter('s*', types={'buffer', 'str'}) +@add_legacy_c_converter('z*', types={'buffer', 'str'}, nullable=True) @add_legacy_c_converter('w*', types={'rwbuffer'}) class Py_buffer_converter(CConverter): type = 'Py_buffer' @@ -2738,6 +2791,8 @@ c_ignored_default = "{NULL, NULL}" def converter_init(self, *, types={'buffer'}, nullable=False): + # didn't bother with zeroes=True here, + # all four legacy format units allow NULs if self.default not in (unspecified, None): fail("The only legal default value for Py_buffer is None.") self.c_default = self.c_ignored_default