import os
import mmap
import argparse
import subprocess
import csv
import re
import zipfile

aidre = re.compile('/analysis/([0-9]+).csv')
FILE_SIZE_FOR_CHECK = 5000
FILE_SIZE_FOR_WARN = 2000

def print_warning(fname, fsize):
    print("""
Warning:
   File '%s' is only %d bytes in length, which is much smaller than expected.
   This may indicate that the log output has been directed to stderr/stdout.
   Examine the output of the "codesonar analyze" command to see the analysis log.
   You may wish to redirect the output to a file.  For example:
      codesonar analyze ... > analysis_log.txt 2>&1
""" % (fname, fsize))

def check_msg(fname):
    # Check the contents of the log for the -foreground
    # message indicating the log is not in the Hub
    msg = """The analysis log is not being sent to the hub because the -foreground command line
option or FOREGROUND .conf file setting was used.
The analysis log is being directly emitted as stdout/stderr of the "codesonar analyze" command.

Examine the output of the "codesonar analyze" command to see the analysis log.
You may wish to redirect the output to a file.  For example:
   codesonar analyze ... > analysis_log.txt 2>&1


"""
    f = open(fname)
    # Its OK to read the whole file since it is small.
    s = f.read()
    if s.find(msg) != -1:
        print('Warning: the log file is unexpectedly small. Its contents are as follows:')
        print(msg)
        return True
    return False

def check_analysis_log(fname):
    # Check for text that indicates the analysis was run
    # in the foreground. Alo check the size, since the
    # diagnostic text is a recent addition
    statinfo = os.stat(fname)
    if statinfo.st_size > FILE_SIZE_FOR_CHECK:
        return
    elif not check_msg(fname):
        if statinfo.st_size < FILE_SIZE_FOR_WARN:
            print_warning(fname, statinfo.st_size)

def extract(hub, item):
    print("Extracting '%s'" % item)
    cmdline = ['codesonar', 'get', '%s/%s' % (hub, item)]
    subprocess.call(cmdline)

def extract_info(hub):
    print("Extracting hub info from '%s'" % hub)
    extract(hub, 'command/info/')
    os.rename('index.html', 'info.txt')

def extract_logs(hub, aid):
    print("Extracting logs for analysis '%s' from hub '%s'" % (aid, hub))
    extract(hub, 'analysis/%s-native_log.txt' % aid)
    extract(hub, 'analysis/%s-parse_log.txt' % aid)
    extract(hub, 'analysis/%s-parse_logshowall.txt' % aid)
    extract(hub, 'analysis/%s-ad_log.txt' % aid)
    check_analysis_log('%s-ad_log.txt' % aid)

def extract_project_by_pid(hub, pid):
    bname = '%s.csv' % pid
    extract(hub, 'project/%s' % bname)
    nrows = extract_from_csv(hub, bname)
    if nrows == 0:
        print('Warning: an analysis with id "%s" was not found.' % pid)

def extract_project_by_name(hub, prj):
    # Do a search for the named project(s).
    # This will get all analyses that match the project name.
    extract(hub, 'project_search.csv?query="%s"' % prj)
    nrows = extract_from_csv(hub, 'project_search.csv', True)
    if nrows == 0:
        print('Warning: no analyses with names matching "%s" were found.' % prj)

def extract_from_csv(hub, csvfile, retrieve_all=False):
    nrows = 0
    with open(csvfile, newline='') as acsv:
        areader = csv.reader(acsv)
        # The url for the analysis is in the column whose name is 'url'.
        # Read the header and find that index.
        hdr = next(areader)
        urlindex = hdr.index('url')
        for row in areader:
            m = aidre.match(row[urlindex])
            if m is None:
                print('Warning: I could not find an analysis in the following entry:')
                print(' row: ', row)
            else:
                nrows = nrows+1
                extract_logs(hub, m.groups()[0])
                if not retrieve_all:
                    break
    os.remove(csvfile)
    return nrows
    
def zipdir(directory):
    print("Zipping '%s'" % directory)
    zf = zipfile.ZipFile(directory + '.zip', 'w', compression=zipfile.ZIP_DEFLATED)
    for f in os.listdir(directory):
        zf.write(os.path.join(directory,f))
    zf.close()

def go():
    parser = argparse.ArgumentParser(
        description=('Extract analysis logs from a CodeSonar hub for a set of analyses, ' +
                     'as specified by the command-line arguments.'))
        
    parser.add_argument("hub", help="The address of the hub")
    parser.add_argument("-a", "--analysis-id", nargs='*',
                        help="The analysis ids")
    parser.add_argument("-i", "--project-id", nargs='*',
                        help="The project ids.")
    parser.add_argument("-p", "--project", nargs='*',
                        help="The strings to use to search for projects by name.")
    parser.add_argument("-d", "--directory", help="The directory in which to place the extracted files",
                        default="logdir")
    parser.add_argument("-z", "--zip",
                        help="Create a zip file from the contents of the directory.",
                        action='store_true', default=False)
    args = parser.parse_args()

    print("CodeSonar log extractor")

    if not os.path.isdir(args.directory):
        print("Output directory '%s' does not exist, so I will attempt to create it." % args.directory)
        os.mkdir(args.directory)
    os.chdir(args.directory)

    extract_info(args.hub)

    if args.analysis_id is not None:
        for aid in args.analysis_id:
            extract_logs(args.hub, aid)

    if args.project is not None:
        for pat in args.project:
            extract_project_by_name(args.hub, pat)

    if args.project_id is not None:
        for pid in args.project_id:
            extract_project_by_pid(args.hub, pid)
        
    os.chdir("..")
    if (args.zip):
        zipdir(args.directory)

go()
