diff -r a88310d86455 Modules/cjkcodecs/_codecs_cn.c --- a/Modules/cjkcodecs/_codecs_cn.c Sun Apr 14 19:22:47 2013 +0200 +++ b/Modules/cjkcodecs/_codecs_cn.c Mon Apr 15 23:53:06 2013 +0200 @@ -47,7 +47,8 @@ ENCODER(gb2312) DBCHAR code; if (c < 0x80) { - WRITEBYTE1((unsigned char)c) + REQUIRE_OUTBUF(1); + WRITEBYTE1((unsigned char)c); NEXT(1, 1); continue; } @@ -55,15 +56,15 @@ ENCODER(gb2312) if (c > 0xFFFF) return 1; - REQUIRE_OUTBUF(2) + REQUIRE_OUTBUF(2); TRYMAP_ENC(gbcommon, code, c); else return 1; if (code & 0x8000) /* MSB set: GBK */ return 1; - OUTBYTE1((code >> 8) | 0x80) - OUTBYTE2((code & 0xFF) | 0x80) + OUTBYTE1((code >> 8) | 0x80); + OUTBYTE2((code & 0xFF) | 0x80); NEXT(1, 2); } @@ -103,7 +104,7 @@ ENCODER(gbk) DBCHAR code; if (c < 0x80) { - WRITEBYTE1((unsigned char)c) + WRITEBYTE1((unsigned char)c); NEXT(1, 1); continue; } @@ -111,16 +112,16 @@ ENCODER(gbk) if (c > 0xFFFF) return 1; - REQUIRE_OUTBUF(2) + REQUIRE_OUTBUF(2); GBK_ENCODE(c, code) else return 1; - OUTBYTE1((code >> 8) | 0x80) + OUTBYTE1((code >> 8) | 0x80); if (code & 0x8000) - OUTBYTE2((code & 0xFF)) /* MSB set: GBK */ + OUTBYTE2((code & 0xFF)); /* MSB set: GBK */ else - OUTBYTE2((code & 0xFF) | 0x80) /* MSB unset: GB2312 */ + OUTBYTE2((code & 0xFF) | 0x80); /* MSB unset: GB2312 */ NEXT(1, 2); } @@ -161,7 +162,7 @@ ENCODER(gb18030) DBCHAR code; if (c < 0x80) { - WRITEBYTE1(c) + WRITEBYTE1(c); NEXT(1, 1); continue; } @@ -170,28 +171,28 @@ ENCODER(gb18030) Py_UCS4 tc = c - 0x10000; assert (c <= 0x10FFFF); - REQUIRE_OUTBUF(4) + REQUIRE_OUTBUF(4); - OUTBYTE4((unsigned char)(tc % 10) + 0x30) + OUTBYTE4((unsigned char)(tc % 10) + 0x30); tc /= 10; - OUTBYTE3((unsigned char)(tc % 126) + 0x81) + OUTBYTE3((unsigned char)(tc % 126) + 0x81); tc /= 126; - OUTBYTE2((unsigned char)(tc % 10) + 0x30) + OUTBYTE2((unsigned char)(tc % 10) + 0x30); tc /= 10; - OUTBYTE1((unsigned char)(tc + 0x90)) + OUTBYTE1((unsigned char)(tc + 0x90)); NEXT(1, 4); continue; } - REQUIRE_OUTBUF(2) + REQUIRE_OUTBUF(2); GBK_ENCODE(c, code) else TRYMAP_ENC(gb18030ext, code, c); else { const struct _gb18030_to_unibmp_ranges *utrrange; - REQUIRE_OUTBUF(4) + REQUIRE_OUTBUF(4); for (utrrange = gb18030_to_unibmp_ranges; utrrange->first != 0; @@ -203,13 +204,13 @@ ENCODER(gb18030) tc = c - utrrange->first + utrrange->base; - OUTBYTE4((unsigned char)(tc % 10) + 0x30) + OUTBYTE4((unsigned char)(tc % 10) + 0x30); tc /= 10; - OUTBYTE3((unsigned char)(tc % 126) + 0x81) + OUTBYTE3((unsigned char)(tc % 126) + 0x81); tc /= 126; - OUTBYTE2((unsigned char)(tc % 10) + 0x30) + OUTBYTE2((unsigned char)(tc % 10) + 0x30); tc /= 10; - OUTBYTE1((unsigned char)tc + 0x81) + OUTBYTE1((unsigned char)tc + 0x81); NEXT(1, 4); break; @@ -220,11 +221,11 @@ ENCODER(gb18030) continue; } - OUTBYTE1((code >> 8) | 0x80) + OUTBYTE1((code >> 8) | 0x80); if (code & 0x8000) - OUTBYTE2((code & 0xFF)) /* MSB set: GBK or GB18030ext */ + OUTBYTE2((code & 0xFF)); /* MSB set: GBK or GB18030ext */ else - OUTBYTE2((code & 0xFF) | 0x80) /* MSB unset: GB2312 */ + OUTBYTE2((code & 0xFF) | 0x80); /* MSB unset: GB2312 */ NEXT(1, 2); } @@ -307,7 +308,7 @@ ENCODER_INIT(hz) ENCODER_RESET(hz) { if (state->i != 0) { - WRITEBYTE2('~', '}') + WRITEBYTE2('~', '}'); state->i = 0; NEXT_OUT(2); } @@ -322,11 +323,11 @@ ENCODER(hz) if (c < 0x80) { if (state->i == 0) { - WRITEBYTE1((unsigned char)c) + WRITEBYTE1((unsigned char)c); NEXT(1, 1); } else { - WRITEBYTE3('~', '}', (unsigned char)c) + WRITEBYTE3('~', '}', (unsigned char)c); NEXT(1, 3); state->i = 0; } @@ -343,12 +344,12 @@ ENCODER(hz) return 1; if (state->i == 0) { - WRITEBYTE4('~', '{', code >> 8, code & 0xff) + WRITEBYTE4('~', '{', code >> 8, code & 0xff); NEXT(1, 4); state->i = 1; } else { - WRITEBYTE2(code >> 8, code & 0xff) + WRITEBYTE2(code >> 8, code & 0xff); NEXT(1, 2); } } diff -r a88310d86455 Modules/cjkcodecs/_codecs_hk.c --- a/Modules/cjkcodecs/_codecs_hk.c Sun Apr 14 19:22:47 2013 +0200 +++ b/Modules/cjkcodecs/_codecs_hk.c Mon Apr 15 23:53:06 2013 +0200 @@ -44,14 +44,13 @@ ENCODER(big5hkscs) Py_ssize_t insize; if (c < 0x80) { - REQUIRE_OUTBUF(1) - **outbuf = (unsigned char)c; + WRITEBYTE1(c); NEXT(1, 1); continue; } insize = 1; - REQUIRE_OUTBUF(2) + REQUIRE_OUTBUF(2); if (c < 0x10000) { TRYMAP_ENC(big5hkscs_bmp, code, c) { @@ -93,8 +92,8 @@ ENCODER(big5hkscs) else return insize; - OUTBYTE1(code >> 8) - OUTBYTE2(code & 0xFF) + OUTBYTE1(code >> 8); + OUTBYTE2(code & 0xFF); NEXT(insize, 2); } diff -r a88310d86455 Modules/cjkcodecs/_codecs_iso2022.c --- a/Modules/cjkcodecs/_codecs_iso2022.c Sun Apr 14 19:22:47 2013 +0200 +++ b/Modules/cjkcodecs/_codecs_iso2022.c Mon Apr 15 23:53:06 2013 +0200 @@ -141,12 +141,12 @@ ENCODER_INIT(iso2022) ENCODER_RESET(iso2022) { if (STATE_GETFLAG(F_SHIFTED)) { - WRITEBYTE1(SI) + WRITEBYTE1(SI); NEXT_OUT(1); STATE_CLEARFLAG(F_SHIFTED) } if (STATE_G0 != CHARSET_ASCII) { - WRITEBYTE3(ESC, '(', 'B') + WRITEBYTE3(ESC, '(', 'B'); NEXT_OUT(3); STATE_SETG0(CHARSET_ASCII) } @@ -163,16 +163,16 @@ ENCODER(iso2022) if (c < 0x80) { if (STATE_G0 != CHARSET_ASCII) { - WRITEBYTE3(ESC, '(', 'B') + WRITEBYTE3(ESC, '(', 'B'); STATE_SETG0(CHARSET_ASCII) NEXT_OUT(3); } if (STATE_GETFLAG(F_SHIFTED)) { - WRITEBYTE1(SI) + WRITEBYTE1(SI); STATE_CLEARFLAG(F_SHIFTED) NEXT_OUT(1); } - WRITEBYTE1((unsigned char)c) + WRITEBYTE1((unsigned char)c); NEXT(1, 1); continue; } @@ -210,24 +210,23 @@ ENCODER(iso2022) switch (dsg->plane) { case 0: /* G0 */ if (STATE_GETFLAG(F_SHIFTED)) { - WRITEBYTE1(SI) + WRITEBYTE1(SI); STATE_CLEARFLAG(F_SHIFTED) NEXT_OUT(1); } if (STATE_G0 != dsg->mark) { if (dsg->width == 1) { - WRITEBYTE3(ESC, '(', ESCMARK(dsg->mark)) + WRITEBYTE3(ESC, '(', ESCMARK(dsg->mark)); STATE_SETG0(dsg->mark) NEXT_OUT(3); } else if (dsg->mark == CHARSET_JISX0208) { - WRITEBYTE3(ESC, '$', ESCMARK(dsg->mark)) + WRITEBYTE3(ESC, '$', ESCMARK(dsg->mark)); STATE_SETG0(dsg->mark) NEXT_OUT(3); } else { - WRITEBYTE4(ESC, '$', '(', - ESCMARK(dsg->mark)) + WRITEBYTE4(ESC, '$', '(', ESCMARK(dsg->mark)); STATE_SETG0(dsg->mark) NEXT_OUT(4); } @@ -236,19 +235,18 @@ ENCODER(iso2022) case 1: /* G1 */ if (STATE_G1 != dsg->mark) { if (dsg->width == 1) { - WRITEBYTE3(ESC, ')', ESCMARK(dsg->mark)) + WRITEBYTE3(ESC, ')', ESCMARK(dsg->mark)); STATE_SETG1(dsg->mark) NEXT_OUT(3); } else { - WRITEBYTE4(ESC, '$', ')', - ESCMARK(dsg->mark)) + WRITEBYTE4(ESC, '$', ')', ESCMARK(dsg->mark)); STATE_SETG1(dsg->mark) NEXT_OUT(4); } } if (!STATE_GETFLAG(F_SHIFTED)) { - WRITEBYTE1(SO) + WRITEBYTE1(SO); STATE_SETFLAG(F_SHIFTED) NEXT_OUT(1); } @@ -259,11 +257,11 @@ ENCODER(iso2022) } if (dsg->width == 1) { - WRITEBYTE1((unsigned char)encoded) + WRITEBYTE1((unsigned char)encoded); NEXT_OUT(1); } else { - WRITEBYTE2(encoded >> 8, encoded & 0xff) + WRITEBYTE2(encoded >> 8, encoded & 0xff); NEXT_OUT(2); } NEXT_INCHAR(insize); diff -r a88310d86455 Modules/cjkcodecs/_codecs_jp.c --- a/Modules/cjkcodecs/_codecs_jp.c Sun Apr 14 19:22:47 2013 +0200 +++ b/Modules/cjkcodecs/_codecs_jp.c Mon Apr 15 23:53:06 2013 +0200 @@ -25,33 +25,33 @@ ENCODER(cp932) unsigned char c1, c2; if (c <= 0x80) { - WRITEBYTE1((unsigned char)c) + WRITEBYTE1((unsigned char)c); NEXT(1, 1); continue; } else if (c >= 0xff61 && c <= 0xff9f) { - WRITEBYTE1(c - 0xfec0) + WRITEBYTE1(c - 0xfec0); NEXT(1, 1); continue; } else if (c >= 0xf8f0 && c <= 0xf8f3) { /* Windows compatibility */ - REQUIRE_OUTBUF(1) + REQUIRE_OUTBUF(1); if (c == 0xf8f0) - OUTBYTE1(0xa0) + OUTBYTE1(0xa0); else - OUTBYTE1(c - 0xfef1 + 0xfd) + OUTBYTE1(c - 0xfef1 + 0xfd); NEXT(1, 1); continue; } if (c > 0xFFFF) return 1; - REQUIRE_OUTBUF(2) + REQUIRE_OUTBUF(2); TRYMAP_ENC(cp932ext, code, c) { - OUTBYTE1(code >> 8) - OUTBYTE2(code & 0xff) + OUTBYTE1(code >> 8); + OUTBYTE2(code & 0xff); } else TRYMAP_ENC(jisxcommon, code, c) { if (code & 0x8000) /* MSB set: JIS X 0212 */ @@ -62,15 +62,15 @@ ENCODER(cp932) c2 = code & 0xff; c2 = (((c1 - 0x21) & 1) ? 0x5e : 0) + (c2 - 0x21); c1 = (c1 - 0x21) >> 1; - OUTBYTE1(c1 < 0x1f ? c1 + 0x81 : c1 + 0xc1) - OUTBYTE2(c2 < 0x3f ? c2 + 0x40 : c2 + 0x41) + OUTBYTE1(c1 < 0x1f ? c1 + 0x81 : c1 + 0xc1); + OUTBYTE2(c2 < 0x3f ? c2 + 0x40 : c2 + 0x41); } else if (c >= 0xe000 && c < 0xe758) { /* User-defined area */ c1 = (Py_UCS4)(c - 0xe000) / 188; c2 = (Py_UCS4)(c - 0xe000) % 188; - OUTBYTE1(c1 + 0xf0) - OUTBYTE2(c2 < 0x3f ? c2 + 0x40 : c2 + 0x41) + OUTBYTE1(c1 + 0xf0); + OUTBYTE2(c2 < 0x3f ? c2 + 0x40 : c2 + 0x41); } else return 1; @@ -152,7 +152,7 @@ ENCODER(euc_jis_2004) Py_ssize_t insize; if (c < 0x80) { - WRITEBYTE1(c) + WRITEBYTE1(c); NEXT(1, 1); continue; } @@ -196,7 +196,7 @@ ENCODER(euc_jis_2004) else TRYMAP_ENC(jisxcommon, code, c); else if (c >= 0xff61 && c <= 0xff9f) { /* JIS X 0201 half-width katakana */ - WRITEBYTE2(0x8e, c - 0xfec0) + WRITEBYTE2(0x8e, c - 0xfec0); NEXT(1, 2); continue; } @@ -219,11 +219,11 @@ ENCODER(euc_jis_2004) if (code & 0x8000) { /* Codeset 2 */ - WRITEBYTE3(0x8f, code >> 8, (code & 0xFF) | 0x80) + WRITEBYTE3(0x8f, code >> 8, (code & 0xFF) | 0x80); NEXT(insize, 3); } else { /* Codeset 1 */ - WRITEBYTE2((code >> 8) | 0x80, (code & 0xFF) | 0x80) + WRITEBYTE2((code >> 8) | 0x80, (code & 0xFF) | 0x80); NEXT(insize, 2); } } @@ -318,7 +318,7 @@ ENCODER(euc_jp) DBCHAR code; if (c < 0x80) { - WRITEBYTE1((unsigned char)c) + WRITEBYTE1((unsigned char)c); NEXT(1, 1); continue; } @@ -329,7 +329,7 @@ ENCODER(euc_jp) TRYMAP_ENC(jisxcommon, code, c); else if (c >= 0xff61 && c <= 0xff9f) { /* JIS X 0201 half-width katakana */ - WRITEBYTE2(0x8e, c - 0xfec0) + WRITEBYTE2(0x8e, c - 0xfec0); NEXT(1, 2); continue; } @@ -351,11 +351,11 @@ ENCODER(euc_jp) if (code & 0x8000) { /* JIS X 0212 */ - WRITEBYTE3(0x8f, code >> 8, (code & 0xFF) | 0x80) + WRITEBYTE3(0x8f, code >> 8, (code & 0xFF) | 0x80); NEXT(1, 3); } else { /* JIS X 0208 */ - WRITEBYTE2((code >> 8) | 0x80, (code & 0xFF) | 0x80) + WRITEBYTE2((code >> 8) | 0x80, (code & 0xFF) | 0x80); NEXT(1, 2); } } @@ -448,14 +448,14 @@ ENCODER(shift_jis) code = NOCHAR; if (code < 0x80 || (code >= 0xa1 && code <= 0xdf)) { - REQUIRE_OUTBUF(1) + REQUIRE_OUTBUF(1); - OUTBYTE1((unsigned char)code) + OUTBYTE1((unsigned char)code); NEXT(1, 1); continue; } - REQUIRE_OUTBUF(2) + REQUIRE_OUTBUF(2); if (code == NOCHAR) { TRYMAP_ENC(jisxcommon, code, c); @@ -474,8 +474,8 @@ ENCODER(shift_jis) c2 = code & 0xff; c2 = (((c1 - 0x21) & 1) ? 0x5e : 0) + (c2 - 0x21); c1 = (c1 - 0x21) >> 1; - OUTBYTE1(c1 < 0x1f ? c1 + 0x81 : c1 + 0xc1) - OUTBYTE2(c2 < 0x3f ? c2 + 0x40 : c2 + 0x41) + OUTBYTE1(c1 < 0x1f ? c1 + 0x81 : c1 + 0xc1); + OUTBYTE2(c2 < 0x3f ? c2 + 0x40 : c2 + 0x41); NEXT(1, 2); } @@ -546,12 +546,12 @@ ENCODER(shift_jis_2004) JISX0201_ENCODE(c, code) if (code < 0x80 || (code >= 0xa1 && code <= 0xdf)) { - WRITEBYTE1((unsigned char)code) + WRITEBYTE1((unsigned char)code); NEXT(1, 1); continue; } - REQUIRE_OUTBUF(2) + REQUIRE_OUTBUF(2); insize = 1; if (code == NOCHAR) { @@ -619,8 +619,8 @@ ENCODER(shift_jis_2004) if (c1 & 1) c2 += 0x5e; c1 >>= 1; - OUTBYTE1(c1 + (c1 < 0x1f ? 0x81 : 0xc1)) - OUTBYTE2(c2 + (c2 < 0x3f ? 0x40 : 0x41)) + OUTBYTE1(c1 + (c1 < 0x1f ? 0x81 : 0xc1)); + OUTBYTE2(c2 + (c2 < 0x3f ? 0x40 : 0x41)); NEXT(insize, 2); } diff -r a88310d86455 Modules/cjkcodecs/_codecs_kr.c --- a/Modules/cjkcodecs/_codecs_kr.c Sun Apr 14 19:22:47 2013 +0200 +++ b/Modules/cjkcodecs/_codecs_kr.c Mon Apr 15 23:53:06 2013 +0200 @@ -38,7 +38,7 @@ ENCODER(euc_kr) DBCHAR code; if (c < 0x80) { - WRITEBYTE1((unsigned char)c) + WRITEBYTE1((unsigned char)c); NEXT(1, 1); continue; } @@ -46,39 +46,39 @@ ENCODER(euc_kr) if (c > 0xFFFF) return 1; - REQUIRE_OUTBUF(2) + REQUIRE_OUTBUF(2); TRYMAP_ENC(cp949, code, c); else return 1; if ((code & 0x8000) == 0) { /* KS X 1001 coded character */ - OUTBYTE1((code >> 8) | 0x80) - OUTBYTE2((code & 0xFF) | 0x80) + OUTBYTE1((code >> 8) | 0x80); + OUTBYTE2((code & 0xFF) | 0x80); NEXT(1, 2); } else { /* Mapping is found in CP949 extension, * but we encode it in KS X 1001:1998 Annex 3, * make-up sequence for EUC-KR. */ - REQUIRE_OUTBUF(8) + REQUIRE_OUTBUF(8); /* syllable composition precedence */ - OUTBYTE1(EUCKR_JAMO_FIRSTBYTE) - OUTBYTE2(EUCKR_JAMO_FILLER) + OUTBYTE1(EUCKR_JAMO_FIRSTBYTE); + OUTBYTE2(EUCKR_JAMO_FILLER); /* All codepoints in CP949 extension are in unicode * Hangul Syllable area. */ assert(0xac00 <= c && c <= 0xd7a3); c -= 0xac00; - OUTBYTE3(EUCKR_JAMO_FIRSTBYTE) - OUTBYTE4(u2cgk_choseong[c / 588]) + OUTBYTE3(EUCKR_JAMO_FIRSTBYTE); + OUTBYTE4(u2cgk_choseong[c / 588]); NEXT_OUT(4); - OUTBYTE1(EUCKR_JAMO_FIRSTBYTE) - OUTBYTE2(u2cgk_jungseong[(c / 28) % 21]) - OUTBYTE3(EUCKR_JAMO_FIRSTBYTE) - OUTBYTE4(u2cgk_jongseong[c % 28]) + OUTBYTE1(EUCKR_JAMO_FIRSTBYTE); + OUTBYTE2(u2cgk_jungseong[(c / 28) % 21]); + OUTBYTE3(EUCKR_JAMO_FIRSTBYTE); + OUTBYTE4(u2cgk_jongseong[c % 28]); NEXT(1, 4); } } @@ -171,7 +171,7 @@ ENCODER(cp949) DBCHAR code; if (c < 0x80) { - WRITEBYTE1((unsigned char)c) + WRITEBYTE1((unsigned char)c); NEXT(1, 1); continue; } @@ -179,15 +179,15 @@ ENCODER(cp949) if (c > 0xFFFF) return 1; - REQUIRE_OUTBUF(2) + REQUIRE_OUTBUF(2); TRYMAP_ENC(cp949, code, c); else return 1; - OUTBYTE1((code >> 8) | 0x80) + OUTBYTE1((code >> 8) | 0x80); if (code & 0x8000) - OUTBYTE2(code & 0xFF) /* MSB set: CP949 */ + OUTBYTE2(code & 0xFF); /* MSB set: CP949 */ else - OUTBYTE2((code & 0xFF) | 0x80) /* MSB unset: ks x 1001 */ + OUTBYTE2((code & 0xFF) | 0x80); /* MSB unset: ks x 1001 */ NEXT(1, 2); } @@ -255,7 +255,7 @@ ENCODER(johab) DBCHAR code; if (c < 0x80) { - WRITEBYTE1((unsigned char)c) + WRITEBYTE1((unsigned char)c); NEXT(1, 1); continue; } @@ -263,7 +263,7 @@ ENCODER(johab) if (c > 0xFFFF) return 1; - REQUIRE_OUTBUF(2) + REQUIRE_OUTBUF(2); if (c >= 0xac00 && c <= 0xd7a3) { c -= 0xac00; @@ -287,8 +287,8 @@ ENCODER(johab) t1 = (c1 < 0x4a ? (c1 - 0x21 + 0x1b2) : (c1 - 0x21 + 0x197)); t2 = ((t1 & 1) ? 0x5e : 0) + (c2 - 0x21); - OUTBYTE1(t1 >> 1) - OUTBYTE2(t2 < 0x4e ? t2 + 0x31 : t2 + 0x43) + OUTBYTE1(t1 >> 1); + OUTBYTE2(t2 < 0x4e ? t2 + 0x31 : t2 + 0x43); NEXT(1, 2); continue; } @@ -298,8 +298,8 @@ ENCODER(johab) else return 1; - OUTBYTE1(code >> 8) - OUTBYTE2(code & 0xff) + OUTBYTE1(code >> 8); + OUTBYTE2(code & 0xff); NEXT(1, 2); } diff -r a88310d86455 Modules/cjkcodecs/_codecs_tw.c --- a/Modules/cjkcodecs/_codecs_tw.c Sun Apr 14 19:22:47 2013 +0200 +++ b/Modules/cjkcodecs/_codecs_tw.c Mon Apr 15 23:53:06 2013 +0200 @@ -18,8 +18,7 @@ ENCODER(big5) DBCHAR code; if (c < 0x80) { - REQUIRE_OUTBUF(1) - **outbuf = (unsigned char)c; + WRITEBYTE1(c); NEXT(1, 1); continue; } @@ -27,13 +26,13 @@ ENCODER(big5) if (c > 0xFFFF) return 1; - REQUIRE_OUTBUF(2) + REQUIRE_OUTBUF(2); TRYMAP_ENC(big5, code, c); else return 1; - OUTBYTE1(code >> 8) - OUTBYTE2(code & 0xFF) + OUTBYTE1(code >> 8); + OUTBYTE2(code & 0xFF); NEXT(1, 2); } @@ -73,7 +72,7 @@ ENCODER(cp950) DBCHAR code; if (c < 0x80) { - WRITEBYTE1((unsigned char)c) + WRITEBYTE1((unsigned char)c); NEXT(1, 1); continue; } @@ -81,13 +80,13 @@ ENCODER(cp950) if (c > 0xFFFF) return 1; - REQUIRE_OUTBUF(2) + REQUIRE_OUTBUF(2); TRYMAP_ENC(cp950ext, code, c); else TRYMAP_ENC(big5, code, c); else return 1; - OUTBYTE1(code >> 8) - OUTBYTE2(code & 0xFF) + OUTBYTE1(code >> 8); + OUTBYTE2(code & 0xFF); NEXT(1, 2); } diff -r a88310d86455 Modules/cjkcodecs/cjkcodecs.h --- a/Modules/cjkcodecs/cjkcodecs.h Sun Apr 14 19:22:47 2013 +0200 +++ b/Modules/cjkcodecs/cjkcodecs.h Mon Apr 15 23:53:06 2013 +0200 @@ -74,11 +74,11 @@ static const struct dbcs_map *mapping_li MultibyteCodec_State *state, const void *config, \ int kind, void *data, \ Py_ssize_t *inpos, Py_ssize_t inlen, \ - unsigned char **outbuf, Py_ssize_t outleft, int flags) + _PyBytesWriter *writer, int flags) #define ENCODER_RESET(encoding) \ static Py_ssize_t encoding##_encode_reset( \ MultibyteCodec_State *state, const void *config, \ - unsigned char **outbuf, Py_ssize_t outleft) + _PyBytesWriter *writer) #define DECODER_INIT(encoding) \ static int encoding##_decode_init( \ @@ -103,21 +103,22 @@ static const struct dbcs_map *mapping_li } while (0) #define NEXT_OUT(o) \ do { \ - (*outbuf) += (o); \ - (outleft) -= (o); \ + writer->pos += o; \ } while (0) #define NEXT(i, o) \ do { \ - NEXT_INCHAR(i); \ - NEXT_OUT(o); \ + NEXT_INCHAR(i); \ + NEXT_OUT(o); \ } while (0) #define REQUIRE_INBUF(n) \ if (inleft < (n)) \ return MBERR_TOOFEW; -#define REQUIRE_OUTBUF(n) \ - if (outleft < (n)) \ - return MBERR_TOOSMALL; +#define REQUIRE_OUTBUF(n) \ + do { \ + if (_PyBytesWriter_Prepare(writer, (n)) < 0) \ + return MBERR_EXCEPTION; \ + } while (0) #define INBYTE1 ((*inbuf)[0]) #define INBYTE2 ((*inbuf)[1]) @@ -130,7 +131,7 @@ static const struct dbcs_map *mapping_li #define OUTCHAR(c) \ do { \ if (_PyUnicodeWriter_WriteChar(writer, (c)) < 0) \ - return MBERR_TOOSMALL; \ + return MBERR_EXCEPTION; \ } while (0) #define OUTCHAR2(c1, c2) \ @@ -138,35 +139,43 @@ static const struct dbcs_map *mapping_li Py_UCS4 _c1 = (c1); \ Py_UCS4 _c2 = (c2); \ if (_PyUnicodeWriter_Prepare(writer, 2, Py_MAX(_c1, c2)) < 0) \ - return MBERR_TOOSMALL; \ + return MBERR_EXCEPTION; \ PyUnicode_WRITE(writer->kind, writer->data, writer->pos, _c1); \ PyUnicode_WRITE(writer->kind, writer->data, writer->pos + 1, _c2); \ writer->pos += 2; \ } while (0) -#define OUTBYTE1(c) ((*outbuf)[0]) = (c); -#define OUTBYTE2(c) ((*outbuf)[1]) = (c); -#define OUTBYTE3(c) ((*outbuf)[2]) = (c); -#define OUTBYTE4(c) ((*outbuf)[3]) = (c); +#define OUTBYTE1(c) writer->str[writer->pos] = (c) +#define OUTBYTE2(c) writer->str[writer->pos+1] = (c) +#define OUTBYTE3(c) writer->str[writer->pos+2] = (c) +#define OUTBYTE4(c) writer->str[writer->pos+3] = (c) -#define WRITEBYTE1(c1) \ - REQUIRE_OUTBUF(1) \ - (*outbuf)[0] = (c1); -#define WRITEBYTE2(c1, c2) \ - REQUIRE_OUTBUF(2) \ - (*outbuf)[0] = (c1); \ - (*outbuf)[1] = (c2); -#define WRITEBYTE3(c1, c2, c3) \ - REQUIRE_OUTBUF(3) \ - (*outbuf)[0] = (c1); \ - (*outbuf)[1] = (c2); \ - (*outbuf)[2] = (c3); +#define WRITEBYTE1(c1) \ + do { \ + REQUIRE_OUTBUF(1); \ + writer->str[writer->pos] = (c1); \ + } while (0) +#define WRITEBYTE2(c1, c2) \ + do { \ + REQUIRE_OUTBUF(2); \ + writer->str[writer->pos] = (c1); \ + writer->str[writer->pos+1] = (c2); \ + } while (0) +#define WRITEBYTE3(c1, c2, c3) \ + do { \ + REQUIRE_OUTBUF(3); \ + writer->str[writer->pos] = (c1); \ + writer->str[writer->pos+1] = (c2); \ + writer->str[writer->pos+2] = (c3); \ + } while (0) #define WRITEBYTE4(c1, c2, c3, c4) \ - REQUIRE_OUTBUF(4) \ - (*outbuf)[0] = (c1); \ - (*outbuf)[1] = (c2); \ - (*outbuf)[2] = (c3); \ - (*outbuf)[3] = (c4); + do { \ + REQUIRE_OUTBUF(4); \ + writer->str[writer->pos] = (c1); \ + writer->str[writer->pos+1] = (c2); \ + writer->str[writer->pos+2] = (c3); \ + writer->str[writer->pos+3] = (c4); \ + } while (0) #define _TRYMAP_ENC(m, assi, val) \ ((m)->map != NULL && (val) >= (m)->bottom && \ diff -r a88310d86455 Modules/cjkcodecs/multibytecodec.c --- a/Modules/cjkcodecs/multibytecodec.c Sun Apr 14 19:22:47 2013 +0200 +++ b/Modules/cjkcodecs/multibytecodec.c Mon Apr 15 23:53:06 2013 +0200 @@ -12,7 +12,7 @@ typedef struct { PyObject *inobj; Py_ssize_t inpos, inlen; - unsigned char *outbuf, *outbuf_end; + _PyBytesWriter writer; PyObject *excobj, *outobj; } MultibyteEncodeBuffer; @@ -170,35 +170,6 @@ static PyGetSetDef codecctx_getsets[] = {NULL,} }; -static int -expand_encodebuffer(MultibyteEncodeBuffer *buf, Py_ssize_t esize) -{ - Py_ssize_t orgpos, orgsize, incsize; - - orgpos = (Py_ssize_t)((char *)buf->outbuf - - PyBytes_AS_STRING(buf->outobj)); - orgsize = PyBytes_GET_SIZE(buf->outobj); - incsize = (esize < (orgsize >> 1) ? (orgsize >> 1) | 1 : esize); - - if (orgsize > PY_SSIZE_T_MAX - incsize) - return -1; - - if (_PyBytes_Resize(&buf->outobj, orgsize + incsize) == -1) - return -1; - - buf->outbuf = (unsigned char *)PyBytes_AS_STRING(buf->outobj) +orgpos; - buf->outbuf_end = (unsigned char *)PyBytes_AS_STRING(buf->outobj) - + PyBytes_GET_SIZE(buf->outobj); - - return 0; -} -#define REQUIRE_ENCODEBUFFER(buf, s) { \ - if ((s) < 1 || (buf)->outbuf + (s) > (buf)->outbuf_end) \ - if (expand_encodebuffer(buf, s) == -1) \ - goto errorexit; \ -} - - /** * MultibyteCodec object */ @@ -210,7 +181,7 @@ multibytecodec_encerror(MultibyteCodec * PyObject *errors, Py_ssize_t e) { PyObject *retobj = NULL, *retstr = NULL, *tobj; - Py_ssize_t retstrsize, newpos; + Py_ssize_t newpos; Py_ssize_t esize, start, end; const char *reason; @@ -220,9 +191,6 @@ multibytecodec_encerror(MultibyteCodec * } else { switch (e) { - case MBERR_TOOSMALL: - REQUIRE_ENCODEBUFFER(buf, -1); - return 0; /* retry it */ case MBERR_TOOFEW: reason = "incomplete multibyte sequence"; esize = (Py_ssize_t)buf->inpos; @@ -253,24 +221,21 @@ multibytecodec_encerror(MultibyteCodec * inpos = 0; for (;;) { - Py_ssize_t outleft = (Py_ssize_t)(buf->outbuf_end - buf->outbuf); - r = codec->encode(state, codec->config, kind, data, &inpos, 1, - &buf->outbuf, outleft, 0); - if (r == MBERR_TOOSMALL) { - REQUIRE_ENCODEBUFFER(buf, -1); - continue; + &buf->writer, 0); + if (r == MBERR_EXCEPTION) { + Py_DECREF(replchar); + goto errorexit; } - else - break; + break; } Py_DECREF(replchar); if (r != 0) { - REQUIRE_ENCODEBUFFER(buf, 1); - *buf->outbuf++ = '?'; + if (_PyBytesWriter_WriteChar(&buf->writer, '?') < 0) + goto errorexit; } } if (errors == ERROR_IGNORE || errors == ERROR_REPLACE) { @@ -328,12 +293,8 @@ multibytecodec_encerror(MultibyteCodec * retstr = tobj; } - assert(PyBytes_Check(retstr)); - retstrsize = PyBytes_GET_SIZE(retstr); - REQUIRE_ENCODEBUFFER(buf, retstrsize); - - memcpy(buf->outbuf, PyBytes_AS_STRING(retstr), retstrsize); - buf->outbuf += retstrsize; + if (_PyBytesWriter_WriteStr(&buf->writer, retstr) < 0) + goto errorexit; newpos = PyLong_AsSsize_t(PyTuple_GET_ITEM(retobj, 1)); if (newpos < 0 && !PyErr_Occurred()) @@ -374,8 +335,6 @@ multibytecodec_decerror(MultibyteCodec * } else { switch (e) { - case MBERR_TOOSMALL: - return 0; /* retry it */ case MBERR_TOOFEW: reason = "incomplete multibyte sequence"; esize = (Py_ssize_t)(buf->inbuf_end - buf->inbuf); @@ -466,7 +425,7 @@ multibytecodec_encode(MultibyteCodec *co PyObject *errors, int flags) { MultibyteEncodeBuffer buf; - Py_ssize_t finalsize, r = 0; + Py_ssize_t r = 0; Py_ssize_t datalen; int kind; void *data; @@ -478,6 +437,7 @@ multibytecodec_encode(MultibyteCodec *co if (datalen == 0 && !(flags & MBENC_RESET)) return PyBytes_FromStringAndSize(NULL, 0); + _PyBytesWriter_Init(&buf.writer); buf.excobj = NULL; buf.outobj = NULL; buf.inobj = text; /* borrowed reference */ @@ -494,18 +454,18 @@ multibytecodec_encode(MultibyteCodec *co buf.outobj = PyBytes_FromStringAndSize(NULL, datalen * 2 + 16); if (buf.outobj == NULL) goto errorexit; - buf.outbuf = (unsigned char *)PyBytes_AS_STRING(buf.outobj); - buf.outbuf_end = buf.outbuf + PyBytes_GET_SIZE(buf.outobj); while (buf.inpos < buf.inlen) { /* we don't reuse inleft and outleft here. * error callbacks can relocate the cursor anywhere on buffer*/ - Py_ssize_t outleft = (Py_ssize_t)(buf.outbuf_end - buf.outbuf); r = codec->encode(state, codec->config, kind, data, &buf.inpos, buf.inlen, - &buf.outbuf, outleft, flags); + &buf.writer, + flags); + if (r == MBERR_EXCEPTION) + goto errorexit; if ((r == 0) || (r == MBERR_TOOFEW && !(flags & MBENC_FLUSH))) break; else if (multibytecodec_encerror(codec, state, &buf, errors,r)) @@ -516,31 +476,25 @@ multibytecodec_encode(MultibyteCodec *co if (codec->encreset != NULL && (flags & MBENC_RESET)) for (;;) { - Py_ssize_t outleft; + r = codec->encreset(state, codec->config, &buf.writer); + if (r == MBERR_EXCEPTION) + goto errorexit; - outleft = (Py_ssize_t)(buf.outbuf_end - buf.outbuf); - r = codec->encreset(state, codec->config, &buf.outbuf, - outleft); if (r == 0) break; - else if (multibytecodec_encerror(codec, state, + + if (multibytecodec_encerror(codec, state, &buf, errors, r)) goto errorexit; } - finalsize = (Py_ssize_t)((char *)buf.outbuf - - PyBytes_AS_STRING(buf.outobj)); - - if (finalsize != PyBytes_GET_SIZE(buf.outobj)) - if (_PyBytes_Resize(&buf.outobj, finalsize) == -1) - goto errorexit; - if (inpos_t) *inpos_t = buf.inpos; Py_XDECREF(buf.excobj); - return buf.outobj; + return _PyBytesWriter_Finish(&buf.writer); errorexit: + _PyBytesWriter_Dealloc(&buf.writer); Py_XDECREF(buf.excobj); Py_XDECREF(buf.outobj); return NULL; @@ -886,16 +840,19 @@ mbiencoder_encode(MultibyteIncrementalEn static PyObject * mbiencoder_reset(MultibyteIncrementalEncoderObject *self) { - /* Longest output: 4 bytes (b'\x0F\x1F(B') with ISO 2022 */ - unsigned char buffer[4], *outbuf; + _PyBytesWriter writer; Py_ssize_t r; + if (self->codec->encreset != NULL) { - outbuf = buffer; + _PyBytesWriter_Init(&writer); r = self->codec->encreset(&self->state, self->codec->config, - &outbuf, sizeof(buffer)); - if (r != 0) + &writer); + _PyBytesWriter_Dealloc(&writer); + if (r != 0) { return NULL; + } } + Py_CLEAR(self->pending); Py_RETURN_NONE; } diff -r a88310d86455 Modules/cjkcodecs/multibytecodec.h --- a/Modules/cjkcodecs/multibytecodec.h Sun Apr 14 19:22:47 2013 +0200 +++ b/Modules/cjkcodecs/multibytecodec.h Mon Apr 15 23:53:06 2013 +0200 @@ -29,13 +29,13 @@ typedef Py_ssize_t (*mbencode_func)(Mult const void *config, int kind, void *data, Py_ssize_t *inpos, Py_ssize_t inlen, - unsigned char **outbuf, Py_ssize_t outleft, + _PyBytesWriter *writer, int flags); typedef int (*mbencodeinit_func)(MultibyteCodec_State *state, const void *config); typedef Py_ssize_t (*mbencodereset_func)(MultibyteCodec_State *state, const void *config, - unsigned char **outbuf, Py_ssize_t outleft); + _PyBytesWriter *writer); typedef Py_ssize_t (*mbdecode_func)(MultibyteCodec_State *state, const void *config, const unsigned char **inbuf, Py_ssize_t inleft, @@ -109,9 +109,9 @@ typedef struct { } MultibyteStreamWriterObject; /* positive values for illegal sequences */ -#define MBERR_TOOSMALL (-1) /* insufficient output buffer space */ #define MBERR_TOOFEW (-2) /* incomplete input buffer */ #define MBERR_INTERNAL (-3) /* internal runtime error */ +#define MBERR_EXCEPTION (-4) /* a Python exception was raised */ #define ERROR_STRICT (PyObject *)(1) #define ERROR_IGNORE (PyObject *)(2)