--- C:\DOCUME~1\PROPRI~1\LOCALS~1\Temp\cgi.py-rev85530.svn000.tmp.py dim. janv. 2 20:52:20 2011 +++ C:\python-dev\cgi.py dim. janv. 2 20:52:00 2011 @@ -31,13 +31,13 @@ # Imports # ======= -from operator import attrgetter -from io import StringIO +from io import BytesIO,StringIO import sys import os import urllib.parse import email.parser from warnings import warn +import html __all__ = ["MiniFieldStorage", "FieldStorage", "parse", "parse_qs", "parse_qsl", "parse_multipart", @@ -352,9 +352,10 @@ value: the value as a *string*; for file uploads, this transparently reads the file every time you request the value + and returns *bytes* - file: the file(-like) object from which you can read the data; - None if the data is stored a simple string + file: the file(-like) object from which you can read the data *in + binary mode* ; None if the data is stored a simple string type: the content-type, or None if not specified @@ -377,7 +378,8 @@ """ def __init__(self, fp=None, headers=None, outerboundary="", - environ=os.environ, keep_blank_values=0, strict_parsing=0): + environ=os.environ, keep_blank_values=0, strict_parsing=0, + limit=None): """Constructor. Read multipart/* until last part. Arguments, all optional: @@ -436,6 +438,9 @@ self.fp = fp or sys.stdin self.headers = headers self.outerboundary = outerboundary + + self.bytes_read = 0 + self.limit = limit # Process content-disposition header cdisp, pdict = "", {} @@ -482,6 +487,9 @@ if maxlen and clen > maxlen: raise ValueError('Maximum content length exceeded') self.length = clen + print('length',clen) + if self.limit is None and clen: + self.limit = clen self.list = self.file = None self.done = 0 @@ -491,6 +499,7 @@ self.read_multi(environ, keep_blank_values, strict_parsing) else: self.read_single() + print('bytes read',self.bytes_read) def __repr__(self): """Return a printable representation.""" @@ -531,7 +540,7 @@ """Dictionary style get() method, including 'value' lookup.""" if key in self: value = self[key] - if type(value) is type([]): + if isinstance(value,list): return [x.value for x in value] else: return value.value @@ -542,7 +551,7 @@ """ Return the first value received.""" if key in self: value = self[key] - if type(value) is type([]): + if isinstance(value,list): return value[0].value else: return value.value @@ -553,7 +562,7 @@ """ Return list of received values.""" if key in self: value = self[key] - if type(value) is type([]): + if isinstance(value,list): return [x.value for x in value] else: return [value.value] @@ -584,10 +593,10 @@ qs = self.fp.read(self.length) if self.qs_on_post: qs += '&' + self.qs_on_post - self.list = list = [] + self.list = [] for key, value in urllib.parse.parse_qsl(qs, self.keep_blank_values, self.strict_parsing): - list.append(MiniFieldStorage(key, value)) + self.list.append(MiniFieldStorage(key, value)) self.skip_lines() FieldStorageClass = None @@ -600,23 +609,38 @@ self.list = [] if self.qs_on_post: for key, value in urllib.parse.parse_qsl(self.qs_on_post, - self.keep_blank_values, self.strict_parsing): + self.keep_blank_values, self.strict_parsing): self.list.append(MiniFieldStorage(key, value)) FieldStorageClass = None klass = self.FieldStorageClass or self.__class__ - parser = email.parser.FeedParser() - # Create bogus content-type header for proper multipart parsing - parser.feed('Content-Type: %s; boundary=%s\r\n\r\n' % (self.type, ib)) - parser.feed(self.fp.read()) - full_msg = parser.close() - # Get subparts - msgs = full_msg.get_payload() - for msg in msgs: - fp = StringIO(msg.get_payload()) - part = klass(fp, msg, ib, environ, keep_blank_values, - strict_parsing) + # data must be read as bytes, not strings, so we use the buffer attribute + first_line = self.fp.buffer.readline() + self.bytes_read += len(first_line) + # first line holds boundary ; ignore it, or check that + # "--"+ib == first_line.decode('ascii').strip() ? + while True: + parser = email.parser.FeedParser() + hdr_text = b"" + while True: + data = self.fp.buffer.readline() + hdr_text += data + if not data.strip(): + break + if not hdr_text: + break + # parser takes strings, not bytes + self.bytes_read += len(hdr_text) + parser.feed(hdr_text.decode(self.fp.encoding)) + headers = parser.close() + part = klass(self.fp, headers, ib, environ, keep_blank_values, + strict_parsing,self.limit-self.bytes_read) + self.bytes_read += part.bytes_read + print('after reading part,bytes read',self.bytes_read,self.length) self.list.append(part) + if self.bytes_read >= self.length: + print('limit reached') + break self.skip_lines() def read_single(self): @@ -636,7 +660,8 @@ todo = self.length if todo >= 0: while todo > 0: - data = self.fp.read(min(todo, self.bufsize)) + data = self.fp.buffer.read(min(todo, self.bufsize)) + self.bytes_read += len(data) if not data: self.done = -1 break @@ -645,42 +670,59 @@ def read_lines(self): """Internal: read lines until EOF or outerboundary.""" - self.file = self.__file = StringIO() + if self.filename is not None: + self.file = self.__file = BytesIO() # store data as bytes for files + else: + self.file = self.__file = StringIO() # as strings for other fields if self.outerboundary: self.read_lines_to_outerboundary() else: self.read_lines_to_eof() def __write(self, line): + """line is always bytes, not string""" if self.__file is not None: if self.__file.tell() + len(line) > 1000: self.file = self.make_file() data = self.__file.getvalue() self.file.write(data) self.__file = None - self.file.write(line) - + if self.filename is not None: + self.file.write(line) # keep bytes + else: + self.file.write(line.decode(self.fp.encoding)) # decode to string + def read_lines_to_eof(self): """Internal: read lines until EOF.""" - while 1: - line = self.fp.readline(1<<16) + while True: + line = self.fp.buffer.readline(1<<16) # bytes + self.bytes_read += len(line) if not line: self.done = -1 break self.__write(line) def read_lines_to_outerboundary(self): - """Internal: read lines until outerboundary.""" - next = "--" + self.outerboundary - last = next + "--" - delim = "" + """Internal: read lines until outerboundary. + Data is read as bytes : boundaries and line ends must be converted + to bytes for comparisons""" + next = b"--" + self.outerboundary.encode(self.fp.encoding) + last = next + b"--" + delim = b"" last_line_lfend = True - while 1: - line = self.fp.readline(1<<16) + print('limit',self.limit) + _read = 0 + while True: + if _read >= self.limit: + print('limit reached') + break + line = self.fp.buffer.readline(1<<16) # bytes + self.bytes_read += len(line) + _read += len(line) if not line: self.done = -1 break - if line[:2] == "--" and last_line_lfend: + if line[:2] == b"--" and last_line_lfend: strippedline = line.strip() if strippedline == next: break @@ -688,39 +730,41 @@ self.done = 1 break odelim = delim - if line[-2:] == "\r\n": - delim = "\r\n" + if line.endswith(b"\r\n"): + delim = b"\r\n" line = line[:-2] last_line_lfend = True - elif line[-1] == "\n": - delim = "\n" + elif line.endswith(b"\n"): + delim = b"\n" line = line[:-1] last_line_lfend = True else: - delim = "" + delim = b"" last_line_lfend = False self.__write(odelim + line) + print('end read to boundary',self.limit,_read,_read>=self.limit) def skip_lines(self): """Internal: skip lines until outer boundary if defined.""" if not self.outerboundary or self.done: return - next = "--" + self.outerboundary - last = next + "--" + next = b"--" + self.outerboundary.encode(self.fp.encoding) + last = next + b"--" last_line_lfend = True - while 1: - line = self.fp.readline(1<<16) + while True: + line = self.fp.buffer.readline(1<<16) + self.bytes_read += len(line) if not line: self.done = -1 break - if line[:2] == "--" and last_line_lfend: + if line.endswith(b"--") and last_line_lfend: strippedline = line.strip() if strippedline == next: break if strippedline == last: self.done = 1 break - last_line_lfend = line.endswith('\n') + last_line_lfend = line.endswith(b'\n') def make_file(self): """Overridable: return a readable & writable file. @@ -730,7 +774,8 @@ - seek(0) - data is read from it - The file is always opened in text mode. + The file is opened in binary mode for files, in text mode + for other fields This version opens a temporary file for reading and writing, and immediately deletes (unlinks) it. The trick (on Unix!) is @@ -746,7 +791,10 @@ """ import tempfile - return tempfile.TemporaryFile("w+", encoding="utf-8", newline="\n") + if self.filename is not None: + return tempfile.TemporaryFile("wb+") + else: + return tempfile.TemporaryFile("w+") # Test/debug code @@ -800,8 +848,8 @@ list = traceback.format_tb(tb, limit) + \ traceback.format_exception_only(type, value) print("
%s%s
" % ( - escape("".join(list[:-1])), - escape(list[-1]), + html.escape("".join(list[:-1])), + html.escape(list[-1]), )) del tb @@ -812,7 +860,7 @@ print("

Shell Environment:

") print("
") for key in keys: - print("
", escape(key), "
", escape(environ[key])) + print("
", html.escape(key), "
", html.escape(environ[key])) print("
") print() @@ -825,10 +873,10 @@ print("

No form fields.") print("

") for key in keys: - print("
" + escape(key) + ":", end=' ') + print("
" + html.escape(key) + ":", end=' ') value = form[key] - print("" + escape(repr(type(value))) + "") - print("
" + escape(repr(value))) + print("" + html.escape(repr(type(value))) + "") + print("
" + html.escape(repr(value))) print("
") print() @@ -839,9 +887,9 @@ try: pwd = os.getcwd() except os.error as msg: - print("os.error:", escape(str(msg))) + print("os.error:", html.escape(str(msg))) else: - print(escape(pwd)) + print(html.escape(pwd)) print() def print_arguments(): @@ -899,9 +947,9 @@ # ========= def escape(s, quote=None): - '''Replace special characters "&", "<" and ">" to HTML-safe sequences. - If the optional flag quote is true, the quotation mark character (") - is also translated.''' + """Deprecated API.""" + warn("cgi.escape is deprecated, use html.escape instead", + PendingDeprecationWarning, stacklevel=2) s = s.replace("&", "&") # Must be done first! s = s.replace("<", "<") s = s.replace(">", ">") @@ -909,6 +957,7 @@ s = s.replace('"', """) return s + def valid_boundary(s, _vb_pattern="^[ -~]{0,200}[!-~]$"): import re return re.match(_vb_pattern, s)