#!/usr/bin/env python
'''
Check which C functions are not covered by the test suite and create a report

This script is meant to be run from a trunk/branch root, to gather information
coverage of C functions by the test suite. It needs a  "make coverage" build 
and running the test suite in some way.
 
The '-m' command line switch creates a workable dataset by calling
"make coverage test", but it's possible that functions flagged as not-covered
would be flexed by a more complete run of the test suite.

The '-r' command line switch runs gcov to tally the number of times each 
function is called, then picks those never called and cleans the data to 
create a concise report.

'''

import itertools
from os import path
from subprocess import Popen, PIPE
from optparse import OptionParser

DEVNULL = open('/dev/null', 'w')

# gcov executable and options
# -n means "don't write .gcov files for each analyzed C file", so remove
# it to get those per file coverage reports
# -f asks for function call summaries
GCOVARGS = ['gcov', '-n', '-f', '-o']

# find executable and options
FIND = ['find', '-name',  '*.c']

# find coverage data files
COVFIND = ['find', 'build', '-name',  '*.gcno']

# Functions that should be ignored (_Py_tok_dump too?)
BOGUS = set(('gnu_dev_major', 'getitem_idx', '*fstat64', '_Py_tok_dump',
             'mknod', 'gnu_dev_minor', 'mknodat', 'indenterror', 
             'gnu_dev_makedev','*stat64', '*fstatat64', '*lstat64',
             'ffi_prep_args_raw', 'ffi_prep_raw_closure_loc', 'ffi_raw_call',
             'initialize_aggregate', 'tipc_addr', 'tipc_cluster', 'tipc_node',
              'tipc_zone',
             ))

# Patterns for matching functions with 0 calls and lines about functions
# (as opposed to files, etc.). Probably depend on locale.
NOTCALLED = "'\nLines executed:0.00% of"
FUNC = "Function '"

# Match files that have coverage data under build/temp*
ERROR = 'cannot open graph file'

def match(funclist, checkall=False):
    ''' Check a list of functions to find those never called
    
    If check == False, no check against BOGUS is made
    Return a (cleaned) list of 'function_name #lines'
    '''
    result = []
    for function in funclist:
        hit = NOTCALLED in function and FUNC in function
        if not checkall:
            valid = all(bogus not in function for bogus in BOGUS)
        else:
            valid = True
        if hit and valid:
            cleanfunc = function.replace(FUNC, '').replace(NOTCALLED, ' ')
            result.append(cleanfunc)
    return result


def findcov():
    ''' Finds coverage data for modules built into the python binary
    
    For many (most?) file, coverage data is in the same dir as the source, but
    modules that are included in the python binary have coverage data under
    './build/temp*/path/Modules/*', so we need to find those and map them to
    source file names.
    '''
    cov = Popen(COVFIND, stdout = PIPE)
    lines = cov.stdout.readlines()
    covfiles = [line.decode().replace('\n', '') for line in lines]
    # Each item is {'filename.c':'path/to/coverage/data'}
    covdirs = {}
    for item in covfiles:
        covpath, filename = path.split(item)
        cfile = path.splitext(filename)[0] + '.c'
        covdirs[cfile] = covpath
    return covdirs

def rungcov(covdirs, checkall=False):
    ''' Finds .c files, calls gcov for each one and stores the output
    '''
    files = Popen(FIND, stdout = PIPE).stdout.readlines()
    # Each item is {'filename.c':['list', 'of', 'functions']}
    gcov_output = {}
    errors = []
    for line in files:
        entry = line.decode().replace('\n', '')
        cfile = path.basename(entry)
        # If gcov data not in covdirs, use the source dir
        edir = covdirs.get(cfile, path.dirname(entry))
        gcov = Popen(GCOVARGS + [edir, entry], stdout=PIPE, stderr=PIPE)
        if ERROR in ''.join(x.decode() for x in gcov.stderr.readlines()):
            errors.append(entry)
        funclines = [line.decode() for line in gcov.stdout.readlines()]
        funclist = "".join(funclines).split('\n\n')
        cleanlist = match(funclist, checkall)
        if cleanlist:
            gcov_output[entry] = cleanlist
    if __name__ == '__main__':
        print('No coverage data was found for these files:')
        print('\n'.join(errors))
    return gcov_output

def listbogus(gcov_output):
    ''' List functions never called but present in more than one file
    '''
    functions = list(itertools.chain(*gcov_output.values()))
    functions.sort()
    funccount = {}
    for function, grouped in itertools.groupby(functions):
        count = len(list(grouped))
        if count > 1:
            funcname = function.split(' ')[0]
            funccount[funcname] = count
    funcset = set(funccount.keys())
    msg = []
    msg.append('\nThese functions were found as hits in more than one file:')
    msg.append(', '.join(sorted(funcset)))
    msg.append('\nOf the above, these are missing from BOGUS:')
    msg.append(', '.join(sorted(funcset - BOGUS)))
    msg.append('\nThese functions are in BOGUS but were not found in this run:')
    msg.append(', '.join(sorted(BOGUS - funcset)))
    msg.append('')
    print('\n'.join(msg))

def write(formated, fname='uncovered.txt'):
    ''' Writes the dict res to the file fname
    '''
    end = '\n'
    spc = ' ' * 4
    msg = []
    for name, lines in sorted(formated.items()):
        if lines:
            msg.append(name + ':' + end)
            functions = end.join(spc + name_count for name_count in lines)
            msg.append(functions + end * 2)
    out = open(fname, 'w')
    out.writelines(msg)
    out.close()


def main():
    ''' Runs the show: collect data, process and report
    '''
    desc = 'Find C functions not covered by the test suite.'
    parser = OptionParser(description=desc)
    parser.add_option('-m', '--make', 
                      dest='make', action='store_true', default=False,
                      help='configure, compile and run the test suite')

    parser.add_option('-r', '--run', 
                      dest='run', action='store_true', default=False,
                      help='analyze coverage data and report to file')

    parser.add_option('-a', '--all', 
                      dest='checkall', action='store_true', default=False,
                      help='include "bogus" functions')
    parser.add_option('-g', '--gcov', 
                      dest='gcovfiles', action='store_true', default=False,
                      help='create .gcov files')

    options, args = parser.parse_args()
    nop = True
    if options.make:
        # Err, also makes sure you have enough data
        Popen(['./configure', '--with-pydebug'])
        Popen(['make', 'clean', 'coverage', 'test'])
        nop = False
    checkall = options.checkall
    if options.gcovfiles:
        GCOVARGS.pop(1)
    if options.run:
        covdirs = findcov()
        gcov_output = rungcov(covdirs, checkall)
        write(gcov_output)
        if checkall:
            listbogus(gcov_output)
        nop = False
    if nop:
        parser.print_help()

if __name__ == '__main__':
    main()