This issue tracker has been migrated to GitHub, and is currently read-only.
For more information, see the GitHub FAQs in the Python's Developer Guide.

Author abael
Recipients abael
Date 2012-08-01.09:33:51
SpamBayes Score -1.0
Marked as misclassified Yes
Message-id <1343813632.92.0.115452555082.issue15522@psf.upfronthosting.co.za>
In-reply-to
Content
Python-2.7.3/Objects/stringobject.c( SHA256SUM ad7795c75e2a25247e4dea4cc5327c225c4da03b7c7d57226c817ba6d12a316c)
static PyObject *string_join(PyStringObject *self, PyObject *orig);

OLD IMPLEMENT LOGIC(Pseudo code):
        char *sep = PyString_AS_STRING(self);
        const Py_ssize_t seplen = PyString_GET_SIZE(self);

        seq = PySequence_Fast(orig, "");
        seqlen = PySequence_Size(seq);

        if (seqlen == 0)
            return PyString_FromString("");
        else if (seqlen == 1)return the exactly first one item;
        else{
            for (i = 0; i < seqlen; i++) {
                const size_t old_sz = sz;
                item = PySequence_Fast_GET_ITEM(seq, i);
                if (!PyString_Check(item)){
                   if ( Py_USING_UNICODE and PyUnicode_Check(item))
                        return PyUnicode_Join((PyObject *)self, seq);
                   else  PyErr_Format(...);
                }
                sz += PyString_GET_SIZE(item);

                if (i != 0)
                    sz += seplen;
            }
        }

        /* Allocate result space. */
        res = PyString_FromStringAndSize((char*)NULL, sz);

        /* Catenate everything. */
        p = PyString_AS_STRING(res);
        for (i = 0; i < seqlen; ++i) {
            size_t n;
            item = PySequence_Fast_GET_ITEM(seq, i);
            n = PyString_GET_SIZE(item);
            Py_MEMCPY(p, PyString_AS_STRING(item), n);
            p += n;
            if (i < seqlen - 1) {
                Py_MEMCPY(p, sep, seplen);
                p += seplen;
            }
        }




Abael's IMPLEMENT LOGIC:
        char *sep = PyString_AS_STRING(self);
        const Py_ssize_t seplen = PyString_GET_SIZE(self);

        seq = PySequence_Fast(orig, "");
        seqlen = PySequence_Size(seq);

        if (seqlen == 0)
            return PyString_FromString("");
        if (seqlen == 1)
            return the exactly first one item;

        if (seqlen <0)return NULL

         /**** PREFETCH start, get the first item size, since here we can assume seqleng >= 2 ****/
        register size_t sz=0;
        register size_t old_sz=0;
        PyObject *res = NULL;

        item = PySequence_Fast_GET_ITEM(seq, 0);
        if (!PyString_Check(item)){
           if ( Py_USING_UNICODE and PyUnicode_Check(item))
                return PyUnicode_Join((PyObject *)self, seq);
           else  PyErr_Format(...);
        }

        sz += PyString_GET_SIZE(item);
        if (sz < old_sz || sz > PY_SSIZE_T_MAX) PyErr_SetString(PyExc_OverflowError,"join() result is too long for a Python string");
         /**** PREFETCH end, get the first item size, since here we can assume seqleng >= 2 ****/

        register Py_ssize_t i;
        for (i=1; i < seqlen; i++) { /**** then here we can loop start from 1 ****/
            const size_t old_sz = sz;
            item = PySequence_Fast_GET_ITEM(seq, i);
            if (!PyString_Check(item)){
               if ( Py_USING_UNICODE and PyUnicode_Check(item))
                    return PyUnicode_Join((PyObject *)self, seq);
               else  PyErr_Format(...);
            }
            sz += PyString_GET_SIZE(item);
            sz += seplen; /**** now we don't need to test (i != 0) every loop ****/
        }

        /* Allocate result space. */
        res = PyString_FromStringAndSize((char*)NULL, sz);

        /* Catenate everything. */
        /**** PREFETCH start, memcpy the first item first, since here we can assume seqleng >= 2 ****/
        register char *p = PyString_AS_STRING(res);
        item = PySequence_Fast_GET_ITEM(seq, 0);
        sz = PyString_GET_SIZE(item);
        Py_MEMCPY(p, PyString_AS_STRING(item),sz);
        p += sz;
        /**** PREFETCH end, memcpy the first item first, since here we can assume seqleng >= 2 ****/

        for (i=1; i<seqlen; ++i){ /**** here we also loop start from 1 ****/
            item = PySequence_Fast_GET_ITEM(seq, i);
            sz = PyString_GET_SIZE(item);
            Py_MEMCPY(p, sep, seplen); /**** avoid test (i < seqlen - 1) each loop in old implement ****/ 
            p += seplen;
            Py_MEMCPY(p, PyString_AS_STRING(item),sz);
            p += sz;
        }
        return res;
History
Date User Action Args
2012-08-01 09:33:52abaelsetrecipients: + abael
2012-08-01 09:33:52abaelsetmessageid: <1343813632.92.0.115452555082.issue15522@psf.upfronthosting.co.za>
2012-08-01 09:33:52abaellinkissue15522 messages
2012-08-01 09:33:51abaelcreate