Index: Parser/parsetok.c
===================================================================
--- Parser/parsetok.c	(revision 86409)
+++ Parser/parsetok.c	(working copy)
@@ -118,6 +118,41 @@
 #endif
 #endif
 
+/* Count the number of valid unicode codepoints in an UTF-8 encoded string of
+ * bytes */
+
+static Py_ssize_t
+count_utf8_chars(const unsigned char* s, Py_ssize_t len)
+{
+    Py_ssize_t i = 0, count = 0, expected;
+    while (i < len) {
+        unsigned char b = s[i++];
+        if (b < 0x80) {         /* 0xxxxxxx */
+            count++;
+            continue;
+        }
+        if (b < 0xC0)       /* invalid byte */
+            return count;
+        if (b < 0xE0)           /* 110xxxxx */
+            expected = 1;
+        else if (b < 0xF0)      /* 1110xxxx */
+            expected = 2;
+        else if (b < 0xF8)      /* 11110xxx */
+            expected = 3;
+        else
+            return count;
+        i += expected;
+        if (i > len)
+            break;
+        for (; expected; expected--)
+            if (s[i - expected] < 0x80 || s[i - expected] >= 0xC0)
+                return count;
+        /* Count a valid code point. */
+        count++;
+    }
+    return count;
+}
+
 /* Parse input coming from the given tokenizer structure.
    Return error code. */
 
@@ -231,7 +266,7 @@
         if (tok->buf != NULL) {
             size_t len;
             assert(tok->cur - tok->buf < INT_MAX);
-            err_ret->offset = (int)(tok->cur - tok->buf);
+            err_ret->offset = count_utf8_chars(tok->buf, tok->cur - tok->buf);
             len = tok->inp - tok->buf;
             err_ret->text = (char *) PyObject_MALLOC(len + 1);
             if (err_ret->text != NULL) {