Message61046
Uploading large binary files using multipart/form-data can be very inefficient because LF character may occur too frequently, resulting in the read_line_to_outer_boundary looping too many times.
*** cgi.py.Py24 Thu Dec 7 18:46:13 2006
--- cgi.py Thu Dec 7 16:38:04 2006
***************
*** 707,713 ****
last = next + "--"
delim = ""
while 1:
! line = self.fp.readline()
if not line:
self.done = -1
break
--- 703,709 ----
last = next + "--"
delim = ""
while 1:
! line = self.fp_readline()
if not line:
self.done = -1
break
***************
*** 729,734 ****
--- 730,753 ----
delim = ""
self.__write(odelim + line)
+ def fp_readline(self):
+
+ tell = self.fp.tell()
+ buffer = self.fp.read(1 << 17)
+ parts = buffer.split("\n")
+ retlst = []
+ for part in parts:
+ if part.startswith("--"):
+ if retlst:
+ retval = "\n".join(retlst) + "\n"
+ else:
+ retval = part + "\n"
+ self.fp.seek(tell + len(retval))
+ return retval
+ else:
+ retlst.append(part)
+ return buffer
+
def skip_lines(self):
"""Internal: skip lines until outer boundary if defined."""
if not self.outerboundary or self.done:
The patch reads the file in larger increments. For my test file of 138 Mb, it reduced parsing time from 168 seconds to 19 seconds.
#------------ test script --------------------
import cgi
import cgi
import os
import profile
import stat
def run():
filename = 'body.txt'
size = os.stat(filename)[stat.ST_SIZE]
fp = open(filename,'rb')
environ = {}
environ["CONTENT_TYPE"] = open('content_type.txt','rb').read()
environ["REQUEST_METHOD"] = "POST"
environ["CONTENT_LENGTH"] = str(size)
fieldstorage = cgi.FieldStorage(fp, None, environ=environ)
return fieldstorage
import hotshot, hotshot.stats
import time
if 1:
t1 = time.time()
prof = hotshot.Profile("bug1718.prof")
# hotshot profiler will crash with the
# patch applied on windows xp
#prof_results = prof.runcall(run)
prof_results = run()
prof.close()
t2 = time.time()
print t2-t1
if 0:
for key in prof_results.keys():
if len(prof_results[key].value)> 100:
print key, prof_results[key].value[:80] + "..."
else:
print key, prof_results[key]
content_type.txt
----------------------------
multipart/form-data; boundary=----------ThIs_Is_tHe_bouNdaRY_$
|
|
Date |
User |
Action |
Args |
2008-01-20 09:59:08 | admin | link | issue1610654 messages |
2008-01-20 09:59:08 | admin | create | |
|