Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code | Sign in
(1846)

Unified Diff: Objects/unicodeobject.c

Issue 14419: Faster ascii decoding (Closed)
Patch Set: Created 1 year, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Please Sign in to add in-line comments.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « no previous file | no next file » | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
--- a/Objects/unicodeobject.c Thu Mar 22 22:40:44 2012 -0400
+++ b/Objects/unicodeobject.c Tue Mar 27 01:49:43 2012 +0300
@@ -6874,7 +6874,6 @@
Py_ssize_t endinpos;
Py_ssize_t outpos;
const char *e;
- int has_error;
const unsigned char *p = (const unsigned char *)s;
const unsigned char *end = p + size;
const unsigned char *aligned_end = (const unsigned char *) ((size_t) end & ~LONG_PTR_MASK);
@@ -6890,8 +6889,38 @@
if (size == 1 && (unsigned char)s[0] < 128)
return get_latin1_char((unsigned char)s[0]);
- has_error = 0;
- while (p < end && !has_error) {
+ v = PyUnicode_New(size, 127);
+ if (v == NULL)
+ goto onError;
+
+#if SIZEOF_LONG <= SIZEOF_VOID_P
+ if (!((size_t) p & LONG_PTR_MASK)) {
+ /* Fast path, see below in PyUnicode_DecodeUTF8Stateful for
+ an explanation. */
+ /* Help register allocation */
+ register const unsigned char *_p = p;
+ register unsigned char * q = PyUnicode_1BYTE_DATA(v);
+ while (_p < aligned_end) {
+ unsigned long value = *(const unsigned long *) _p;
+ if (value & ASCII_CHAR_MASK)
+ goto hasError;
+ *((unsigned long *)q) = value;
+ _p += SIZEOF_LONG;
+ q += SIZEOF_LONG;
+ }
+ p = _p;
+ while (p < end) {
+ if (*p & 0x80)
+ goto hasError;
+ *q = *p;
+ ++p;
+ ++q;
+ }
+ return v;
+ }
+#endif
+
+ while (p < end) {
/* Fast path, see below in PyUnicode_DecodeUTF8Stateful for
an explanation. */
if (!((size_t) p & LONG_PTR_MASK)) {
@@ -6899,36 +6928,24 @@
register const unsigned char *_p = p;
while (_p < aligned_end) {
unsigned long value = *(unsigned long *) _p;
- if (value & ASCII_CHAR_MASK) {
- has_error = 1;
- break;
- }
+ if (value & ASCII_CHAR_MASK)
+ goto hasError;
_p += SIZEOF_LONG;
}
if (_p == end)
break;
- if (has_error)
- break;
p = _p;
}
- if (*p & 0x80) {
- has_error = 1;
- break;
- }
- else {
- ++p;
- }
- }
- if (!has_error)
- return unicode_fromascii((const unsigned char *)s, size);
-
- v = PyUnicode_New(size, 127);
- if (v == NULL)
- goto onError;
- if (size == 0)
- return v;
- kind = PyUnicode_KIND(v);
- data = PyUnicode_DATA(v);
+ if (*p & 0x80)
+ goto hasError;
+ ++p;
+ }
+ memcpy(PyUnicode_1BYTE_DATA(v), s, size);
+ return v;
+
+hasError:
+ kind = PyUnicode_1BYTE_KIND;
+ data = PyUnicode_1BYTE_DATA(v);
outpos = 0;
e = s + size;
while (s < e) {
« no previous file with comments | « no previous file | no next file » | no next file with comments »

RSS Feeds Recent Issues | This issue
This is Rietveld cbc36f91f3f7