import collections
import re

Token = collections.namedtuple('Token', 'typ value line column')

def tokenize(s):
    keywords = {'IF', 'THEN', 'FOR', 'NEXT', 'GOSUB', 'RETURN'}
    tok_spec = [
        ('NUMBER', r'\d+(\.\d*)?'), # Integer or decimal number
        ('ASSIGN', r':='),          # Assignment operator
        ('END', ';'),               # Statement terminator
        ('ID', r'[A-Za-z]+'),       # Identifiers
        ('OP', r'[+*\/\-]'),        # Arithmetic operators
        ('NEWLINE', r'\n'),         # Line endings
        ('SKIP', r'[ \t]'),         # Skip over spaces and tabs
    ]
    tok_re = '|'.join('(?P<%s>%s)' % pair for pair in tok_spec)
    gettok = re.compile(tok_re).match
    line = 1
    pos = line_start = 0
    mo = gettok(s)
    while mo is not None:
        typ = mo.lastgroup
        if typ == 'NEWLINE':
            line_start = pos
            line += 1
        elif typ != 'SKIP':
            val = mo.group(typ)
            if typ == 'ID' and val in keywords:
                typ = val
            yield Token(typ, val, line, mo.start()-line_start)
        pos = mo.end()
        mo = gettok(s, pos)
    if pos != len(s):
        raise RuntimeError('Unexpected character %r on line %d' %(s[pos], line))

statements = '''\
    total := total + price * quantity;
    tax := price * 0.05;
'''

for token in tokenize(statements):
    print(token)