Index: Doc/library/pyexpat.rst
===================================================================
--- Doc/library/pyexpat.rst (revision 59843)
+++ Doc/library/pyexpat.rst (working copy)
@@ -182,11 +182,15 @@
.. attribute:: xmlparser.buffer_size
- The size of the buffer used when :attr:`buffer_text` is true. This value cannot
- be changed at this time.
+ The size of the buffer used when :attr:`buffer_text` is true.
+ A new buffer size can be set by assigning a new integer value
+ to this attribute.
+ When the size is changed, the buffer will be flushed.
.. versionadded:: 2.3
+ .. versionchanged:: 2.6
+ The buffer size can now be changed.
.. attribute:: xmlparser.buffer_text
Index: Lib/test/test_pyexpat.py
===================================================================
--- Lib/test/test_pyexpat.py (revision 59843)
+++ Lib/test/test_pyexpat.py (working copy)
@@ -434,7 +434,99 @@
self.assertRaises(Exception, parser.Parse, xml)
+class ChardataBufferTest(unittest.TestCase):
+ """
+ test setting of chardata buffer size
+ """
+ def test_1025_bytes(self):
+ self.assertEquals(self.small_buffer_test(1025), 2)
+
+ def test_1000_bytes(self):
+ self.assertEquals(self.small_buffer_test(1000), 1)
+
+ def test_change_size_1(self):
+ self.assertEquals(self.change_buffer_size_test1(), 2)
+
+ def test_change_size_2(self):
+ self.assertEquals(self.change_buffer_size_test2(), 4)
+
+ def test_unchanged_size(self):
+ xml1 = ("%s" % ('a' * 512))
+ xml2 = 'a'*512 + ''
+ parser = expat.ParserCreate()
+ parser.CharacterDataHandler = self.counting_handler
+ parser.buffer_size=512
+ parser.buffer_text=1
+
+ # Feed 512 bytes of character data: the handler should be called
+ # once.
+ self.n = 0
+ parser.Parse(xml1)
+ self.assertEquals(self.n, 1)
+
+ # Reassign to buffer_size, but assign the same size.
+ parser.buffer_size = parser.buffer_size
+ self.assertEquals(self.n, 1)
+
+ # Try parsing rest of the document
+ parser.Parse(xml2)
+ self.assertEquals(self.n, 2)
+
+
+
+ def make_document(self, bytes):
+ return ("" + bytes * 'a' + '')
+
+ def counting_handler(self, text):
+ self.n += 1
+
+ def small_buffer_test(self, buffer_len):
+ xml = "%s" % ('a' * buffer_len)
+ parser = expat.ParserCreate()
+ parser.CharacterDataHandler = self.counting_handler
+ parser.buffer_size = 1024
+ parser.buffer_text = 1
+
+ self.n = 0
+ parser.Parse(xml)
+ return self.n
+
+ def change_buffer_size_test1(self):
+ xml1 = "%s" % ('a' * 1024)
+ xml2 = "aaa%s" % ('a' * 1025)
+ parser = expat.ParserCreate()
+ parser.CharacterDataHandler = self.counting_handler
+ parser.buffer_text = 1
+ parser.buffer_size = 1024
+ self.assertEquals(parser.buffer_size, 1024)
+
+ self.n = 0
+ parser.Parse(xml1, 0)
+ parser.buffer_size *= 2
+ self.assertEquals(parser.buffer_size, 2048)
+ parser.Parse(xml2, 1)
+ return self.n
+
+ def change_buffer_size_test2(self):
+ xml1 = "a%s" % ('a' * 1023)
+ xml2 = "aaa%s" % ('a' * 1025)
+ parser = expat.ParserCreate()
+ parser.CharacterDataHandler = self.counting_handler
+ parser.buffer_text = 1
+ parser.buffer_size = 2048
+ self.assertEquals(parser.buffer_size, 2048)
+
+ self.n=0
+ parser.Parse(xml1, 0)
+ parser.buffer_size /= 2
+ self.assertEquals(parser.buffer_size, 1024)
+ parser.Parse(xml2, 1)
+ return self.n
+
+
+
+
def test_main():
run_unittest(SetAttributeTest,
ParseTest,
@@ -443,7 +535,8 @@
BufferTextTest,
HandlerExceptionTest,
PositionTest,
- sf1296433Test)
+ sf1296433Test,
+ ChardataBufferTest)
if __name__ == "__main__":
test_main()
Index: Modules/pyexpat.c
===================================================================
--- Modules/pyexpat.c (revision 59843)
+++ Modules/pyexpat.c (working copy)
@@ -1649,6 +1649,48 @@
self->specified_attributes = 0;
return 0;
}
+
+ if (strcmp(name, "buffer_size") == 0) {
+ long new_buffer_size;
+ if (!PyInt_Check(v)) {
+ PyErr_SetString(PyExc_TypeError, "buffer_size must be an integer");
+ return -1;
+ }
+
+ new_buffer_size=PyInt_AS_LONG(v);
+ if (new_buffer_size <= 0) {
+ PyErr_SetString(PyExc_ValueError, "buffer_size must be greater than zero");
+ return -1;
+ }
+
+ /* trivial case -- no change */
+ if (new_buffer_size == self->buffer_size) {
+ return 0;
+ }
+
+ /* check maximum */
+ if (new_buffer_size > INT_MAX) {
+ PyErr_SetString(PyExc_ValueError, "buffer_size must not be greater than INT_MAX");
+ return -1;
+ }
+
+ if (self->buffer != NULL) {
+ /* there is already a buffer */
+ if (self->buffer_used != 0) {
+ flush_character_buffer(self);
+ }
+ /* free existing buffer */
+ free(self->buffer);
+ }
+ self->buffer = malloc(new_buffer_size);
+ if (self->buffer == NULL) {
+ PyErr_NoMemory();
+ return -1;
+ }
+ self->buffer_size = new_buffer_size;
+ return 0;
+ }
+
if (strcmp(name, "CharacterDataHandler") == 0) {
/* If we're changing the character data handler, flush all
* cached data with the old handler. Not sure there's a