Index: Lib/sgmllib.py
===================================================================
--- Lib/sgmllib.py	(revision 61302)
+++ Lib/sgmllib.py	(working copy)
@@ -31,9 +31,14 @@
 piclose = re.compile('>')
 endbracket = re.compile('[<>]')
 tagfind = re.compile('[a-zA-Z][-_.a-zA-Z0-9]*')
-attrfind = re.compile(
+
+attrfind_quotestart = re.compile(
+    r'\s*([a-zA-Z_][-:.a-zA-Z_0-9]*)\s*=\s*[\'"]')
+attrfind_completedquote = re.compile(
+    r'\s*([a-zA-Z_][-:.a-zA-Z_0-9]*)(\s*=\s*(\'[^\']*\'|"[^"]*"))')
+attrfind_unquoted = re.compile(
     r'\s*([a-zA-Z_][-:.a-zA-Z_0-9]*)(\s*=\s*'
-    r'(\'[^\']*\'|"[^"]*"|[][\-a-zA-Z0-9./,:;+*%?!&$\(\)_#=~\'"@]*))?')
+    r'([][\-a-zA-Z0-9./,:;+*%?!&$\(\)_#=~\'"@]*))?')
 
 
 class SGMLParseError(RuntimeError):
@@ -249,42 +254,72 @@
             self.finish_shorttag(tag, data)
             self.__starttag_text = rawdata[start_pos:match.end(1) + 1]
             return k
-        # XXX The following should skip matching quotes (' or ")
-        # As a shortcut way to exit, this isn't so bad, but shouldn't
-        # be used to locate the actual end of the start tag since the
-        # < or > characters may be embedded in an attribute value.
-        match = endbracket.search(rawdata, i+1)
-        if not match:
-            return -1
-        j = match.start(0)
-        # Now parse the data between i+1 and j into a tag and attrs
-        attrs = []
+
+        j = i
+        k = i+1
         if rawdata[i:i+2] == '<>':
             # SGML shorthand: <> == <last open tag seen>
-            k = j
+            j = i+1
             tag = self.lasttag
         else:
+            # Now parse the data after i into a tag and attrs
             match = tagfind.match(rawdata, i+1)
             if not match:
                 self.error('unexpected call to parse_starttag')
             k = match.end(0)
             tag = rawdata[i+1:k].lower()
-            self.lasttag = tag
-        while k < j:
-            match = attrfind.match(rawdata, k)
-            if not match: break
-            attrname, rest, attrvalue = match.group(1, 2, 3)
-            if not rest:
-                attrvalue = attrname
-            else:
-                if (attrvalue[:1] == "'" == attrvalue[-1:] or
-                    attrvalue[:1] == '"' == attrvalue[-1:]):
-                    # strip quotes
-                    attrvalue = attrvalue[1:-1]
-                attrvalue = self.entity_or_charref.sub(
-                    self._convert_ref, attrvalue)
-            attrs.append((attrname.lower(), attrvalue))
-            k = match.end(0)
+
+            attrs = []
+            while True:
+                # This is the loop for finding attributes...
+                
+                # First, we find a new endbracket location (j),
+                # if the old location is behind the point
+                # we've parsed up to (k)
+                if j < k:
+                    match = endbracket.search(rawdata, k)
+                    if not match:
+                        return -1
+                    j = match.start(0)
+
+                # To handle quoted strings, we first check if there is a
+                # completed quote
+                match = attrfind_completedquote.match(rawdata, k)
+                if not match:
+                    # If not, we check if there was a quote started (but not
+                    # finished, since we already checked for that) - if so, we
+                    # have an incomplete expression, and return -1
+                    match = attrfind_quotestart.match(rawdata, k)
+                    if match:
+                        return -1
+
+                    # Otherwise, we look for an unquoted (and possibly
+                    # mal-formed) attribute
+                    match = attrfind_unquoted.match(rawdata, k)
+
+                if not match:
+                    # If we can't find an attribute (and don't have open
+                    # strings!), we've found all the attributes we can, so break
+                    # out of the loop, and close the tag
+                    break
+
+                # Process the attribute we found...
+                attrname, rest, attrvalue = match.group(1, 2, 3)
+                if not rest:
+                    attrvalue = attrname
+                else:
+                    if (attrvalue[:1] == "'" == attrvalue[-1:] or
+                        attrvalue[:1] == '"' == attrvalue[-1:]):
+                        # strip quotes
+                        attrvalue = attrvalue[1:-1]
+                    attrvalue = self.entity_or_charref.sub(
+                        self._convert_ref, attrvalue)
+                attrs.append((attrname.lower(), attrvalue))
+                k = match.end(0)
+
+        # Close up the tag, do housekeeping
+        self.lasttag = tag
+
         if rawdata[j] == '>':
             j = j+1
         self.__starttag_text = rawdata[start_pos:j]
Index: Lib/test/test_sgmllib.py
===================================================================
--- Lib/test/test_sgmllib.py	(revision 61302)
+++ Lib/test/test_sgmllib.py	(working copy)
@@ -284,6 +284,13 @@
             ('charref', 'convert', '42'),
             ('codepoint', 'convert', 42),
             ])
+        
+    def test_attr_values_quoted_markup(self):
+        """Multi-line and markup in attribute values"""
+        self.check_events("""<a title="foo\n<br>bar">text</a>""",
+            [("starttag", "a", [("title", "foo\n<br>bar")]),
+             ("data", "text"),
+             ("endtag", "a")])
 
     def test_attr_funky_names(self):
         self.check_events("""<a a.b='v' c:d=v e-f=v>""", [
@@ -373,16 +380,11 @@
             if len(data) != CHUNK:
                 break
 
-    # XXX These tests have been disabled by prefixing their names with
-    # an underscore.  The first two exercise outstanding bugs in the
-    # sgmllib module, and the third exhibits questionable behavior
-    # that needs to be carefully considered before changing it.
-
-    def _test_starttag_end_boundary(self):
+    def test_starttag_end_boundary(self):
         self.check_events("<a b='<'>", [("starttag", "a", [("b", "<")])])
         self.check_events("<a b='>'>", [("starttag", "a", [("b", ">")])])
 
-    def _test_buffer_artefacts(self):
+    def test_buffer_artefacts(self):
         output = [("starttag", "a", [("b", "<")])]
         self.check_events(["<a b='<'>"], output)
         self.check_events(["<a ", "b='<'>"], output)
@@ -412,6 +414,10 @@
         self.check_events(["<!--abc--", ">"], output)
         self.check_events(["<!--abc-->", ""], output)
 
+    # XXX These tests have been disabled by prefixing their names with an
+    # underscore.  The test exhibits questionable behavior that needs to be
+    # carefully considered before changing it.
+
     def _test_starttag_junk_chars(self):
         self.check_parse_error("<")
         self.check_parse_error("<>")