--- C:\DOCUME~1\PROPRI~1\LOCALS~1\Temp\cgi.py-rev85530.svn000.tmp.py	dim. janv.  2 20:52:20 2011
+++ C:\python-dev\cgi.py	dim. janv.  2 20:52:00 2011
@@ -31,13 +31,13 @@
 # Imports
 # =======
 
-from operator import attrgetter
-from io import StringIO
+from io import BytesIO,StringIO
 import sys
 import os
 import urllib.parse
 import email.parser
 from warnings import warn
+import html
 
 __all__ = ["MiniFieldStorage", "FieldStorage",
            "parse", "parse_qs", "parse_qsl", "parse_multipart",
@@ -352,9 +352,10 @@
 
     value: the value as a *string*; for file uploads, this
         transparently reads the file every time you request the value
+        and returns *bytes*
 
-    file: the file(-like) object from which you can read the data;
-        None if the data is stored a simple string
+    file: the file(-like) object from which you can read the data *in
+        binary mode* ; None if the data is stored a simple string
 
     type: the content-type, or None if not specified
 
@@ -377,7 +378,8 @@
     """
 
     def __init__(self, fp=None, headers=None, outerboundary="",
-                 environ=os.environ, keep_blank_values=0, strict_parsing=0):
+                 environ=os.environ, keep_blank_values=0, strict_parsing=0,
+                 limit=None):
         """Constructor.  Read multipart/* until last part.
 
         Arguments, all optional:
@@ -436,6 +438,9 @@
         self.fp = fp or sys.stdin
         self.headers = headers
         self.outerboundary = outerboundary
+        
+        self.bytes_read = 0
+        self.limit = limit
 
         # Process content-disposition header
         cdisp, pdict = "", {}
@@ -482,6 +487,9 @@
             if maxlen and clen > maxlen:
                 raise ValueError('Maximum content length exceeded')
         self.length = clen
+        print('length',clen)
+        if self.limit is None and clen:
+            self.limit = clen
 
         self.list = self.file = None
         self.done = 0
@@ -491,6 +499,7 @@
             self.read_multi(environ, keep_blank_values, strict_parsing)
         else:
             self.read_single()
+        print('bytes read',self.bytes_read)
 
     def __repr__(self):
         """Return a printable representation."""
@@ -531,7 +540,7 @@
         """Dictionary style get() method, including 'value' lookup."""
         if key in self:
             value = self[key]
-            if type(value) is type([]):
+            if isinstance(value,list):
                 return [x.value for x in value]
             else:
                 return value.value
@@ -542,7 +551,7 @@
         """ Return the first value received."""
         if key in self:
             value = self[key]
-            if type(value) is type([]):
+            if isinstance(value,list):
                 return value[0].value
             else:
                 return value.value
@@ -553,7 +562,7 @@
         """ Return list of received values."""
         if key in self:
             value = self[key]
-            if type(value) is type([]):
+            if isinstance(value,list):
                 return [x.value for x in value]
             else:
                 return [value.value]
@@ -584,10 +593,10 @@
         qs = self.fp.read(self.length)
         if self.qs_on_post:
             qs += '&' + self.qs_on_post
-        self.list = list = []
+        self.list = []
         for key, value in urllib.parse.parse_qsl(qs, self.keep_blank_values,
                                 self.strict_parsing):
-            list.append(MiniFieldStorage(key, value))
+            self.list.append(MiniFieldStorage(key, value))
         self.skip_lines()
 
     FieldStorageClass = None
@@ -600,23 +609,38 @@
         self.list = []
         if self.qs_on_post:
             for key, value in urllib.parse.parse_qsl(self.qs_on_post,
-                                    self.keep_blank_values, self.strict_parsing):
+                                self.keep_blank_values, self.strict_parsing):
                 self.list.append(MiniFieldStorage(key, value))
             FieldStorageClass = None
 
         klass = self.FieldStorageClass or self.__class__
-        parser = email.parser.FeedParser()
-        # Create bogus content-type header for proper multipart parsing
-        parser.feed('Content-Type: %s; boundary=%s\r\n\r\n' % (self.type, ib))
-        parser.feed(self.fp.read())
-        full_msg = parser.close()
-        # Get subparts
-        msgs = full_msg.get_payload()
-        for msg in msgs:
-            fp = StringIO(msg.get_payload())
-            part = klass(fp, msg, ib, environ, keep_blank_values,
-                         strict_parsing)
+        # data must be read as bytes, not strings, so we use the buffer attribute
+        first_line = self.fp.buffer.readline() 
+        self.bytes_read += len(first_line)
+        # first line holds boundary ; ignore it, or check that
+        # "--"+ib == first_line.decode('ascii').strip() ?
+        while True:
+            parser = email.parser.FeedParser()
+            hdr_text = b""
+            while True:
+                data = self.fp.buffer.readline()
+                hdr_text += data
+                if not data.strip():
+                    break
+            if not hdr_text:
+                break
+            # parser takes strings, not bytes
+            self.bytes_read += len(hdr_text)
+            parser.feed(hdr_text.decode(self.fp.encoding))
+            headers = parser.close()
+            part = klass(self.fp, headers, ib, environ, keep_blank_values,
+                         strict_parsing,self.limit-self.bytes_read)
+            self.bytes_read += part.bytes_read
+            print('after reading part,bytes read',self.bytes_read,self.length)
             self.list.append(part)
+            if self.bytes_read >= self.length:
+                print('limit reached')
+                break
         self.skip_lines()
 
     def read_single(self):
@@ -636,7 +660,8 @@
         todo = self.length
         if todo >= 0:
             while todo > 0:
-                data = self.fp.read(min(todo, self.bufsize))
+                data = self.fp.buffer.read(min(todo, self.bufsize))
+                self.bytes_read += len(data)
                 if not data:
                     self.done = -1
                     break
@@ -645,42 +670,59 @@
 
     def read_lines(self):
         """Internal: read lines until EOF or outerboundary."""
-        self.file = self.__file = StringIO()
+        if self.filename is not None:
+            self.file = self.__file = BytesIO() # store data as bytes for files
+        else:
+            self.file = self.__file = StringIO() # as strings for other fields
         if self.outerboundary:
             self.read_lines_to_outerboundary()
         else:
             self.read_lines_to_eof()
 
     def __write(self, line):
+        """line is always bytes, not string"""
         if self.__file is not None:
             if self.__file.tell() + len(line) > 1000:
                 self.file = self.make_file()
                 data = self.__file.getvalue()
                 self.file.write(data)
                 self.__file = None
-        self.file.write(line)
-
+        if self.filename is not None:
+            self.file.write(line) # keep bytes
+        else:
+            self.file.write(line.decode(self.fp.encoding)) # decode to string
+       
     def read_lines_to_eof(self):
         """Internal: read lines until EOF."""
-        while 1:
-            line = self.fp.readline(1<<16)
+        while True:
+            line = self.fp.buffer.readline(1<<16) # bytes
+            self.bytes_read += len(line)
             if not line:
                 self.done = -1
                 break
             self.__write(line)
 
     def read_lines_to_outerboundary(self):
-        """Internal: read lines until outerboundary."""
-        next = "--" + self.outerboundary
-        last = next + "--"
-        delim = ""
+        """Internal: read lines until outerboundary.
+        Data is read as bytes : boundaries and line ends must be converted
+        to bytes for comparisons"""
+        next = b"--" + self.outerboundary.encode(self.fp.encoding)
+        last = next + b"--"
+        delim = b""
         last_line_lfend = True
-        while 1:
-            line = self.fp.readline(1<<16)
+        print('limit',self.limit)
+        _read = 0
+        while True:
+            if _read >= self.limit:
+                print('limit reached')
+                break
+            line = self.fp.buffer.readline(1<<16) # bytes
+            self.bytes_read += len(line)
+            _read += len(line)
             if not line:
                 self.done = -1
                 break
-            if line[:2] == "--" and last_line_lfend:
+            if line[:2] == b"--" and last_line_lfend:
                 strippedline = line.strip()
                 if strippedline == next:
                     break
@@ -688,39 +730,41 @@
                     self.done = 1
                     break
             odelim = delim
-            if line[-2:] == "\r\n":
-                delim = "\r\n"
+            if line.endswith(b"\r\n"):
+                delim = b"\r\n"
                 line = line[:-2]
                 last_line_lfend = True
-            elif line[-1] == "\n":
-                delim = "\n"
+            elif line.endswith(b"\n"):
+                delim = b"\n"
                 line = line[:-1]
                 last_line_lfend = True
             else:
-                delim = ""
+                delim = b""
                 last_line_lfend = False
             self.__write(odelim + line)
+        print('end read to boundary',self.limit,_read,_read>=self.limit)
 
     def skip_lines(self):
         """Internal: skip lines until outer boundary if defined."""
         if not self.outerboundary or self.done:
             return
-        next = "--" + self.outerboundary
-        last = next + "--"
+        next = b"--" + self.outerboundary.encode(self.fp.encoding)
+        last = next + b"--"
         last_line_lfend = True
-        while 1:
-            line = self.fp.readline(1<<16)
+        while True:
+            line = self.fp.buffer.readline(1<<16)
+            self.bytes_read += len(line)
             if not line:
                 self.done = -1
                 break
-            if line[:2] == "--" and last_line_lfend:
+            if line.endswith(b"--") and last_line_lfend:
                 strippedline = line.strip()
                 if strippedline == next:
                     break
                 if strippedline == last:
                     self.done = 1
                     break
-            last_line_lfend = line.endswith('\n')
+            last_line_lfend = line.endswith(b'\n')
 
     def make_file(self):
         """Overridable: return a readable & writable file.
@@ -730,7 +774,8 @@
         - seek(0)
         - data is read from it
 
-        The file is always opened in text mode.
+        The file is opened in binary mode for files, in text mode
+        for other fields
 
         This version opens a temporary file for reading and writing,
         and immediately deletes (unlinks) it.  The trick (on Unix!) is
@@ -746,7 +791,10 @@
 
         """
         import tempfile
-        return tempfile.TemporaryFile("w+", encoding="utf-8", newline="\n")
+        if self.filename is not None:
+            return tempfile.TemporaryFile("wb+")
+        else:
+            return tempfile.TemporaryFile("w+")
 
 
 # Test/debug code
@@ -800,8 +848,8 @@
     list = traceback.format_tb(tb, limit) + \
            traceback.format_exception_only(type, value)
     print("<PRE>%s<B>%s</B></PRE>" % (
-        escape("".join(list[:-1])),
-        escape(list[-1]),
+        html.escape("".join(list[:-1])),
+        html.escape(list[-1]),
         ))
     del tb
 
@@ -812,7 +860,7 @@
     print("<H3>Shell Environment:</H3>")
     print("<DL>")
     for key in keys:
-        print("<DT>", escape(key), "<DD>", escape(environ[key]))
+        print("<DT>", html.escape(key), "<DD>", html.escape(environ[key]))
     print("</DL>")
     print()
 
@@ -825,10 +873,10 @@
         print("<P>No form fields.")
     print("<DL>")
     for key in keys:
-        print("<DT>" + escape(key) + ":", end=' ')
+        print("<DT>" + html.escape(key) + ":", end=' ')
         value = form[key]
-        print("<i>" + escape(repr(type(value))) + "</i>")
-        print("<DD>" + escape(repr(value)))
+        print("<i>" + html.escape(repr(type(value))) + "</i>")
+        print("<DD>" + html.escape(repr(value)))
     print("</DL>")
     print()
 
@@ -839,9 +887,9 @@
     try:
         pwd = os.getcwd()
     except os.error as msg:
-        print("os.error:", escape(str(msg)))
+        print("os.error:", html.escape(str(msg)))
     else:
-        print(escape(pwd))
+        print(html.escape(pwd))
     print()
 
 def print_arguments():
@@ -899,9 +947,9 @@
 # =========
 
 def escape(s, quote=None):
-    '''Replace special characters "&", "<" and ">" to HTML-safe sequences.
-    If the optional flag quote is true, the quotation mark character (")
-    is also translated.'''
+    """Deprecated API."""
+    warn("cgi.escape is deprecated, use html.escape instead",
+         PendingDeprecationWarning, stacklevel=2)
     s = s.replace("&", "&amp;") # Must be done first!
     s = s.replace("<", "&lt;")
     s = s.replace(">", "&gt;")
@@ -909,6 +957,7 @@
         s = s.replace('"', "&quot;")
     return s
 
+
 def valid_boundary(s, _vb_pattern="^[ -~]{0,200}[!-~]$"):
     import re
     return re.match(_vb_pattern, s)