Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code | Sign in
(6)

Side by Side Diff: Objects/unicodeobject.c

Issue 10639: reindent.py converts newlines to platform default
Patch Set: Created 8 years, 8 months ago
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments. Please Sign in to add in-line comments.
Jump to:
View unified diff | Download patch
« no previous file with comments | « Objects/typeobject.c ('k') | Parser/asdl_c.py » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 2
3 Unicode implementation based on original code by Fredrik Lundh, 3 Unicode implementation based on original code by Fredrik Lundh,
4 modified by Marc-Andre Lemburg <mal@lemburg.com> according to the 4 modified by Marc-Andre Lemburg <mal@lemburg.com> according to the
5 Unicode Integration Proposal (see file Misc/unicode.txt). 5 Unicode Integration Proposal (see file Misc/unicode.txt).
6 6
7 Major speed upgrades to the method implementations at the Reykjavik 7 Major speed upgrades to the method implementations at the Reykjavik
8 NeedForSpeed sprint, by Fredrik Lundh and Andrew Dalke. 8 NeedForSpeed sprint, by Fredrik Lundh and Andrew Dalke.
9 9
10 Copyright (c) Corporation for National Research Initiatives. 10 Copyright (c) Corporation for National Research Initiatives.
(...skipping 1488 matching lines...) Expand 10 before | Expand all | Expand 10 after
1499 1499
1500 /* Shortcuts for common default encodings */ 1500 /* Shortcuts for common default encodings */
1501 if (normalize_encoding(encoding, lower, sizeof(lower))) { 1501 if (normalize_encoding(encoding, lower, sizeof(lower))) {
1502 if ((strcmp(lower, "utf-8") == 0) || 1502 if ((strcmp(lower, "utf-8") == 0) ||
1503 (strcmp(lower, "utf8") == 0)) 1503 (strcmp(lower, "utf8") == 0))
1504 return PyUnicode_DecodeUTF8(s, size, errors); 1504 return PyUnicode_DecodeUTF8(s, size, errors);
1505 else if ((strcmp(lower, "latin-1") == 0) || 1505 else if ((strcmp(lower, "latin-1") == 0) ||
1506 (strcmp(lower, "latin1") == 0) || 1506 (strcmp(lower, "latin1") == 0) ||
1507 (strcmp(lower, "iso-8859-1") == 0)) 1507 (strcmp(lower, "iso-8859-1") == 0))
1508 return PyUnicode_DecodeLatin1(s, size, errors); 1508 return PyUnicode_DecodeLatin1(s, size, errors);
1509 #ifdef HAVE_MBCS 1509 #if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T)
1510 else if (strcmp(lower, "mbcs") == 0) 1510 else if (strcmp(lower, "mbcs") == 0)
1511 return PyUnicode_DecodeMBCS(s, size, errors); 1511 return PyUnicode_DecodeMBCS(s, size, errors);
1512 #endif 1512 #endif
1513 else if (strcmp(lower, "ascii") == 0) 1513 else if (strcmp(lower, "ascii") == 0)
1514 return PyUnicode_DecodeASCII(s, size, errors); 1514 return PyUnicode_DecodeASCII(s, size, errors);
1515 else if (strcmp(lower, "utf-16") == 0) 1515 else if (strcmp(lower, "utf-16") == 0)
1516 return PyUnicode_DecodeUTF16(s, size, errors, 0); 1516 return PyUnicode_DecodeUTF16(s, size, errors, 0);
1517 else if (strcmp(lower, "utf-32") == 0) 1517 else if (strcmp(lower, "utf-32") == 0)
1518 return PyUnicode_DecodeUTF32(s, size, errors, 0); 1518 return PyUnicode_DecodeUTF32(s, size, errors, 0);
1519 } 1519 }
(...skipping 117 matching lines...) Expand 10 before | Expand all | Expand 10 after
1637 goto onError; 1637 goto onError;
1638 return v; 1638 return v;
1639 1639
1640 onError: 1640 onError:
1641 return NULL; 1641 return NULL;
1642 } 1642 }
1643 1643
1644 PyObject * 1644 PyObject *
1645 PyUnicode_EncodeFSDefault(PyObject *unicode) 1645 PyUnicode_EncodeFSDefault(PyObject *unicode)
1646 { 1646 {
1647 #ifdef HAVE_MBCS 1647 #if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T)
1648 return PyUnicode_EncodeMBCS(PyUnicode_AS_UNICODE(unicode), 1648 return PyUnicode_EncodeMBCS(PyUnicode_AS_UNICODE(unicode),
1649 PyUnicode_GET_SIZE(unicode), 1649 PyUnicode_GET_SIZE(unicode),
1650 NULL); 1650 NULL);
1651 #elif defined(__APPLE__) 1651 #elif defined(__APPLE__)
1652 return PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(unicode), 1652 return PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(unicode),
1653 PyUnicode_GET_SIZE(unicode), 1653 PyUnicode_GET_SIZE(unicode),
1654 "surrogateescape"); 1654 "surrogateescape");
1655 #else 1655 #else
1656 PyInterpreterState *interp = PyThreadState_GET()->interp; 1656 PyInterpreterState *interp = PyThreadState_GET()->interp;
1657 /* Bootstrap check: if the filesystem codec is implemented in Python, we 1657 /* Bootstrap check: if the filesystem codec is implemented in Python, we
(...skipping 81 matching lines...) Expand 10 before | Expand all | Expand 10 after
1739 return PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(unicode), 1739 return PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(unicode),
1740 PyUnicode_GET_SIZE(unicode), 1740 PyUnicode_GET_SIZE(unicode),
1741 errors); 1741 errors);
1742 } 1742 }
1743 else if ((strcmp(lower, "latin-1") == 0) || 1743 else if ((strcmp(lower, "latin-1") == 0) ||
1744 (strcmp(lower, "latin1") == 0) || 1744 (strcmp(lower, "latin1") == 0) ||
1745 (strcmp(lower, "iso-8859-1") == 0)) 1745 (strcmp(lower, "iso-8859-1") == 0))
1746 return PyUnicode_EncodeLatin1(PyUnicode_AS_UNICODE(unicode), 1746 return PyUnicode_EncodeLatin1(PyUnicode_AS_UNICODE(unicode),
1747 PyUnicode_GET_SIZE(unicode), 1747 PyUnicode_GET_SIZE(unicode),
1748 errors); 1748 errors);
1749 #ifdef HAVE_MBCS 1749 #if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T)
1750 else if (strcmp(lower, "mbcs") == 0) 1750 else if (strcmp(lower, "mbcs") == 0)
1751 return PyUnicode_EncodeMBCS(PyUnicode_AS_UNICODE(unicode), 1751 return PyUnicode_EncodeMBCS(PyUnicode_AS_UNICODE(unicode),
1752 PyUnicode_GET_SIZE(unicode), 1752 PyUnicode_GET_SIZE(unicode),
1753 errors); 1753 errors);
1754 #endif 1754 #endif
1755 else if (strcmp(lower, "ascii") == 0) 1755 else if (strcmp(lower, "ascii") == 0)
1756 return PyUnicode_EncodeASCII(PyUnicode_AS_UNICODE(unicode), 1756 return PyUnicode_EncodeASCII(PyUnicode_AS_UNICODE(unicode),
1757 PyUnicode_GET_SIZE(unicode), 1757 PyUnicode_GET_SIZE(unicode),
1758 errors); 1758 errors);
1759 } 1759 }
(...skipping 81 matching lines...) Expand 10 before | Expand all | Expand 10 after
1841 1841
1842 PyObject* 1842 PyObject*
1843 PyUnicode_DecodeFSDefault(const char *s) { 1843 PyUnicode_DecodeFSDefault(const char *s) {
1844 Py_ssize_t size = (Py_ssize_t)strlen(s); 1844 Py_ssize_t size = (Py_ssize_t)strlen(s);
1845 return PyUnicode_DecodeFSDefaultAndSize(s, size); 1845 return PyUnicode_DecodeFSDefaultAndSize(s, size);
1846 } 1846 }
1847 1847
1848 PyObject* 1848 PyObject*
1849 PyUnicode_DecodeFSDefaultAndSize(const char *s, Py_ssize_t size) 1849 PyUnicode_DecodeFSDefaultAndSize(const char *s, Py_ssize_t size)
1850 { 1850 {
1851 #ifdef HAVE_MBCS 1851 #if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T)
1852 return PyUnicode_DecodeMBCS(s, size, NULL); 1852 return PyUnicode_DecodeMBCS(s, size, NULL);
1853 #elif defined(__APPLE__) 1853 #elif defined(__APPLE__)
1854 return PyUnicode_DecodeUTF8(s, size, "surrogateescape"); 1854 return PyUnicode_DecodeUTF8(s, size, "surrogateescape");
1855 #else 1855 #else
1856 PyInterpreterState *interp = PyThreadState_GET()->interp; 1856 PyInterpreterState *interp = PyThreadState_GET()->interp;
1857 /* Bootstrap check: if the filesystem codec is implemented in Python, we 1857 /* Bootstrap check: if the filesystem codec is implemented in Python, we
1858 cannot use it to encode and decode filenames before it is loaded. Load 1858 cannot use it to encode and decode filenames before it is loaded. Load
1859 the Python codec requires to encode at least its own filename. Use the C 1859 the Python codec requires to encode at least its own filename. Use the C
1860 version of the locale codec until the codec registry is initialized and 1860 version of the locale codec until the codec registry is initialized and
1861 the Python codec is loaded. 1861 the Python codec is loaded.
(...skipping 3073 matching lines...) Expand 10 before | Expand all | Expand 10 after
4935 { 4935 {
4936 if (!PyUnicode_Check(unicode)) { 4936 if (!PyUnicode_Check(unicode)) {
4937 PyErr_BadArgument(); 4937 PyErr_BadArgument();
4938 return NULL; 4938 return NULL;
4939 } 4939 }
4940 return PyUnicode_EncodeASCII(PyUnicode_AS_UNICODE(unicode), 4940 return PyUnicode_EncodeASCII(PyUnicode_AS_UNICODE(unicode),
4941 PyUnicode_GET_SIZE(unicode), 4941 PyUnicode_GET_SIZE(unicode),
4942 NULL); 4942 NULL);
4943 } 4943 }
4944 4944
4945 #ifdef HAVE_MBCS 4945 #if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T)
4946 4946
4947 /* --- MBCS codecs for Windows -------------------------------------------- */ 4947 /* --- MBCS codecs for Windows -------------------------------------------- */
4948 4948
4949 #if SIZEOF_INT < SIZEOF_SIZE_T 4949 #if SIZEOF_INT < SIZEOF_SIZE_T
4950 #define NEED_RETRY 4950 #define NEED_RETRY
4951 #endif 4951 #endif
4952 4952
4953 /* XXX This code is limited to "true" double-byte encodings, as 4953 /* XXX This code is limited to "true" double-byte encodings, as
4954 a) it assumes an incomplete character consists of a single byte, and 4954 a) it assumes an incomplete character consists of a single byte, and
4955 b) IsDBCSLeadByte (probably) does not work for non-DBCS multi-byte 4955 b) IsDBCSLeadByte (probably) does not work for non-DBCS multi-byte
(...skipping 266 matching lines...) Expand 10 before | Expand all | Expand 10 after
5222 PyErr_BadArgument(); 5222 PyErr_BadArgument();
5223 return NULL; 5223 return NULL;
5224 } 5224 }
5225 return PyUnicode_EncodeMBCS(PyUnicode_AS_UNICODE(unicode), 5225 return PyUnicode_EncodeMBCS(PyUnicode_AS_UNICODE(unicode),
5226 PyUnicode_GET_SIZE(unicode), 5226 PyUnicode_GET_SIZE(unicode),
5227 NULL); 5227 NULL);
5228 } 5228 }
5229 5229
5230 #undef NEED_RETRY 5230 #undef NEED_RETRY
5231 5231
5232 #endif /* HAVE_MBCS */ 5232 #endif /* MS_WINDOWS */
5233 5233
5234 /* --- Character Mapping Codec -------------------------------------------- */ 5234 /* --- Character Mapping Codec -------------------------------------------- */
5235 5235
5236 PyObject * 5236 PyObject *
5237 PyUnicode_DecodeCharmap(const char *s, 5237 PyUnicode_DecodeCharmap(const char *s,
5238 Py_ssize_t size, 5238 Py_ssize_t size,
5239 PyObject *mapping, 5239 PyObject *mapping,
5240 const char *errors) 5240 const char *errors)
5241 { 5241 {
5242 const char *starts = s; 5242 const char *starts = s;
(...skipping 4511 matching lines...) Expand 10 before | Expand all | Expand 10 after
9754 if (prec >= 0 && len > prec) 9754 if (prec >= 0 && len > prec)
9755 len = prec; 9755 len = prec;
9756 break; 9756 break;
9757 9757
9758 case 'i': 9758 case 'i':
9759 case 'd': 9759 case 'd':
9760 case 'u': 9760 case 'u':
9761 case 'o': 9761 case 'o':
9762 case 'x': 9762 case 'x':
9763 case 'X': 9763 case 'X':
9764 if (c == 'i')
9765 c = 'd';
9764 isnumok = 0; 9766 isnumok = 0;
9765 if (PyNumber_Check(v)) { 9767 if (PyNumber_Check(v)) {
9766 PyObject *iobj=NULL; 9768 PyObject *iobj=NULL;
9767 9769
9768 if (PyLong_Check(v)) { 9770 if (PyLong_Check(v)) {
9769 iobj = v; 9771 iobj = v;
9770 Py_INCREF(iobj); 9772 Py_INCREF(iobj);
9771 } 9773 }
9772 else { 9774 else {
9773 iobj = PyNumber_Long(v); 9775 iobj = PyNumber_Long(v);
9774 } 9776 }
9775 if (iobj!=NULL) { 9777 if (iobj!=NULL) {
9776 if (PyLong_Check(iobj)) { 9778 if (PyLong_Check(iobj)) {
9777 isnumok = 1; 9779 isnumok = 1;
9778 temp = formatlong(iobj, flags, prec, (c == 'i'? 'd': c)); 9780 temp = formatlong(iobj, flags, prec, c);
9779 Py_DECREF(iobj); 9781 Py_DECREF(iobj);
9780 if (!temp) 9782 if (!temp)
9781 goto onError; 9783 goto onError;
9782 pbuf = PyUnicode_AS_UNICODE(temp); 9784 pbuf = PyUnicode_AS_UNICODE(temp);
9783 len = PyUnicode_GET_SIZE(temp); 9785 len = PyUnicode_GET_SIZE(temp);
9784 sign = 1; 9786 sign = 1;
9785 } 9787 }
9786 else { 9788 else {
9787 Py_DECREF(iobj); 9789 Py_DECREF(iobj);
9788 } 9790 }
(...skipping 708 matching lines...) Expand 10 before | Expand all | Expand 10 after
10497 PyMODINIT_FUNC 10499 PyMODINIT_FUNC
10498 PyInit__string(void) 10500 PyInit__string(void)
10499 { 10501 {
10500 return PyModule_Create(&_string_module); 10502 return PyModule_Create(&_string_module);
10501 } 10503 }
10502 10504
10503 10505
10504 #ifdef __cplusplus 10506 #ifdef __cplusplus
10505 } 10507 }
10506 #endif 10508 #endif
OLDNEW
« no previous file with comments | « Objects/typeobject.c ('k') | Parser/asdl_c.py » ('j') | no next file with comments »

RSS Feeds Recent Issues | This issue
This is Rietveld 894c83f36cb7+