diff -r 5ef49659935f Lib/test/test_ast.py --- a/Lib/test/test_ast.py Fri Dec 28 19:09:41 2012 +0100 +++ b/Lib/test/test_ast.py Sat Dec 29 11:58:45 2012 +0100 @@ -507,6 +507,25 @@ self.assertEqual(ast.get_docstring(node.body[0]), 'line one\nline two') + def test_multi_line_docstring_col_offset_and_lineno_issue16806(self): + node = ast.parse( + '"""line one\nline two"""\n\n' + 'def foo():\n """line one\n line two"""\n\n' + ' def bar():\n """line one\n line two"""\n' + ' """line one\n line two"""\n' + '"""line one\nline two"""\n\n' + ) + self.assertEqual(node.body[0].col_offset, 0) + self.assertEqual(node.body[0].lineno, 1) + self.assertEqual(node.body[1].body[0].col_offset, 2) + self.assertEqual(node.body[1].body[0].lineno, 5) + self.assertEqual(node.body[1].body[1].body[0].col_offset, 4) + self.assertEqual(node.body[1].body[1].body[0].lineno, 9) + self.assertEqual(node.body[1].body[2].col_offset, 2) + self.assertEqual(node.body[1].body[2].lineno, 11) + self.assertEqual(node.body[2].col_offset, 0) + self.assertEqual(node.body[2].lineno, 13) + def test_literal_eval(self): self.assertEqual(ast.literal_eval('[1, 2, 3]'), [1, 2, 3]) self.assertEqual(ast.literal_eval('{"foo": 42}'), {"foo": 42}) diff -r 5ef49659935f Parser/parsetok.c --- a/Parser/parsetok.c Fri Dec 28 19:09:41 2012 +0100 +++ b/Parser/parsetok.c Sat Dec 29 11:58:45 2012 +0100 @@ -154,6 +154,8 @@ size_t len; char *str; int col_offset; + int lineno; + int line_start; type = PyTokenizer_Get(tok, &a, &b); if (type == ERRORTOKEN) { @@ -204,14 +206,21 @@ } } #endif - if (a >= tok->line_start) - col_offset = a - tok->line_start; + + /* Nodes of type STRING, especially multi line strings + must be handled differently in order to get both + the starting line number and the column offset right. + (cf. issue 16806) */ + lineno = type == STRING ? tok->first_lineno : tok->lineno; + line_start = type == STRING ? tok->multi_line_start : tok->line_start; + if (a >= line_start) + col_offset = a - line_start; else col_offset = -1; if ((err_ret->error = PyParser_AddToken(ps, (int)type, str, - tok->lineno, col_offset, + lineno, col_offset, &(err_ret->expected))) != E_OK) { if (err_ret->error != E_DONE) { PyObject_FREE(str); diff -r 5ef49659935f Parser/tokenizer.c --- a/Parser/tokenizer.c Fri Dec 28 19:09:41 2012 +0100 +++ b/Parser/tokenizer.c Sat Dec 29 11:58:45 2012 +0100 @@ -1597,6 +1597,13 @@ int quote_size = 1; /* 1 or 3 */ int end_quote_size = 0; + /* Nodes of type STRING, especially multi line strings + must be handled differently in order to get both + the starting line number and the column offset right. + (cf. issue 16806) */ + tok->first_lineno = tok->lineno; + tok->multi_line_start = tok->line_start; + /* Find the quote size and start of string */ c = tok_nextc(tok); if (c == quote) { diff -r 5ef49659935f Parser/tokenizer.h --- a/Parser/tokenizer.h Fri Dec 28 19:09:41 2012 +0100 +++ b/Parser/tokenizer.h Sat Dec 29 11:58:45 2012 +0100 @@ -37,6 +37,8 @@ int pendin; /* Pending indents (if > 0) or dedents (if < 0) */ char *prompt, *nextprompt; /* For interactive prompting */ int lineno; /* Current line number */ + int first_lineno; /* First line of a single line or multi line string + expression (cf. issue 16806) */ int level; /* () [] {} Parentheses nesting level */ /* Used to allow free continuations inside them */ /* Stuff for checking on different tab sizes */ @@ -58,6 +60,9 @@ char *encoding; /* Source encoding. */ int cont_line; /* whether we are in a continuation line. */ const char* line_start; /* pointer to start of current line */ + const char* multi_line_start; /* pointer to start of first line of + a single line or multi line string + expression (cf. issue 16806) */ #ifndef PGEN PyObject *decoding_readline; /* open(...).readline */ PyObject *decoding_buffer;