This issue tracker has been migrated to GitHub, and is currently read-only.
For more information, see the GitHub FAQs in the Python's Developer Guide.

Author haepal
Recipients
Date 2007-01-11.18:01:52
SpamBayes Score
Marked as misclassified
Message-id
In-reply-to
Content
Could someone check if the following patch fixes the problem?
This patch was made against revision 51854.

--- sgmllib.py.org	2006-11-06 02:31:12.000000000 -0500
+++ sgmllib.py	2007-01-11 12:39:30.000000000 -0500
@@ -16,6 +16,35 @@
 
 # Regular expressions used for parsing
 
+class MyMatch:
+    def __init__(self, i):
+        self._i = i
+    def start(self, i):
+        return self._i
+    
+class EndBracket:
+    def search(self, data, index):
+        s = data[index:]
+        bs = None
+        quote = None
+        for i,c in enumerate(s):
+            if bs:
+                bs = False
+            else:
+                if c == '<' or c == '>':
+                    if quote is None:
+                        break
+                elif c == "'" or c == '"':
+                    if c == quote:
+                        quote = None
+                    else:
+                        quote = c
+                elif c == '\\':
+                    bs = True
+        else:
+            return None
+        return MyMatch(i+index)
+        
 interesting = re.compile('[&<]')
 incomplete = re.compile('&([a-zA-Z][a-zA-Z0-9]*|#[0-9]*)?|'
                            '<([a-zA-Z][^<>]*|'
@@ -29,7 +58,8 @@
 shorttagopen = re.compile('<[a-zA-Z][-.a-zA-Z0-9]*/')
 shorttag = re.compile('<([a-zA-Z][-.a-zA-Z0-9]*)/([^/]*)/')
 piclose = re.compile('>')
-endbracket = re.compile('[<>]')
+#endbracket = re.compile('[<>]')
+endbracket = EndBracket()
 tagfind = re.compile('[a-zA-Z][-_.a-zA-Z0-9]*')
 attrfind = re.compile(
     r'\s*([a-zA-Z_][-:.a-zA-Z_0-9]*)(\s*=\s*'
History
Date User Action Args
2007-08-23 14:40:34adminlinkissue1504333 messages
2007-08-23 14:40:34admincreate