diff -r 3528cc01c4d9 Lib/test/test_ast.py --- a/Lib/test/test_ast.py Sun Dec 30 22:15:37 2012 +0100 +++ b/Lib/test/test_ast.py Sun Dec 30 22:23:34 2012 +0100 @@ -289,6 +289,25 @@ self.assertRaises(TypeError, ast.Num, 1, 2) self.assertRaises(TypeError, ast.Num, 1, 2, lineno=0) + def test_multi_line_docstring_col_offset_and_lineno_issue16806(self): + node = ast.parse( + '"""line one\nline two"""\n\n' + 'def foo():\n """line one\n line two"""\n\n' + ' def bar():\n """line one\n line two"""\n' + ' """line one\n line two"""\n' + '"""line one\nline two"""\n\n' + ) + self.assertEqual(node.body[0].col_offset, 0) + self.assertEqual(node.body[0].lineno, 1) + self.assertEqual(node.body[1].body[0].col_offset, 2) + self.assertEqual(node.body[1].body[0].lineno, 5) + self.assertEqual(node.body[1].body[1].body[0].col_offset, 4) + self.assertEqual(node.body[1].body[1].body[0].lineno, 9) + self.assertEqual(node.body[1].body[2].col_offset, 2) + self.assertEqual(node.body[1].body[2].lineno, 11) + self.assertEqual(node.body[2].col_offset, 0) + self.assertEqual(node.body[2].lineno, 13) + def test_module(self): body = [ast.Num(42)] x = ast.Module(body) diff -r 3528cc01c4d9 Parser/parsetok.c --- a/Parser/parsetok.c Sun Dec 30 22:15:37 2012 +0100 +++ b/Parser/parsetok.c Sun Dec 30 22:23:34 2012 +0100 @@ -155,6 +155,8 @@ size_t len; char *str; int col_offset; + int lineno; + int line_start; type = PyTokenizer_Get(tok, &a, &b); if (type == ERRORTOKEN) { @@ -190,13 +192,19 @@ #ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD #endif - if (a >= tok->line_start) - col_offset = a - tok->line_start; + /* Nodes of type STRING, especially multi line strings + must be handled differently in order to get both + the starting line number and the column offset right. + (cf. issue 16806) */ + lineno = type == STRING ? tok->first_lineno : tok->lineno; + line_start = type == STRING ? tok->multi_line_start : tok->line_start; + if (a >= line_start) + col_offset = a - line_start; else - col_offset = -1; + col_offset = -1; if ((err_ret->error = - PyParser_AddToken(ps, (int)type, str, tok->lineno, col_offset, + PyParser_AddToken(ps, (int)type, str, lineno, col_offset, &(err_ret->expected))) != E_OK) { if (err_ret->error != E_DONE) { PyObject_FREE(str); diff -r 3528cc01c4d9 Parser/tokenizer.c --- a/Parser/tokenizer.c Sun Dec 30 22:15:37 2012 +0100 +++ b/Parser/tokenizer.c Sun Dec 30 22:23:34 2012 +0100 @@ -1529,6 +1529,14 @@ int quote = c; int triple = 0; int tripcount = 0; + + /* Nodes of type STRING, especially multi line strings + must be handled differently in order to get both + the starting line number and the column offset right. + (cf. issue 16806) */ + tok->first_lineno = tok->lineno; + tok->multi_line_start = tok->line_start; + for (;;) { c = tok_nextc(tok); if (c == '\n') { diff -r 3528cc01c4d9 Parser/tokenizer.h --- a/Parser/tokenizer.h Sun Dec 30 22:15:37 2012 +0100 +++ b/Parser/tokenizer.h Sun Dec 30 22:23:34 2012 +0100 @@ -31,6 +31,8 @@ int pendin; /* Pending indents (if > 0) or dedents (if < 0) */ char *prompt, *nextprompt; /* For interactive prompting */ int lineno; /* Current line number */ + int first_lineno; /* First line of a single line or multi line string + expression (cf. issue 16806) */ int level; /* () [] {} Parentheses nesting level */ /* Used to allow free continuations inside them */ /* Stuff for checking on different tab sizes */ @@ -46,6 +48,9 @@ char *encoding; int cont_line; /* whether we are in a continuation line. */ const char* line_start; /* pointer to start of current line */ + const char* multi_line_start; /* pointer to start of first line of + a single line or multi line string + expression (cf. issue 16806) */ #ifndef PGEN PyObject *decoding_readline; /* codecs.open(...).readline */ PyObject *decoding_buffer;