#!/usr/bin/env python3
"""Performance utilities for rocFFT.

Overview
========

General workflow:

- run: runs a suite of FFTs to collect timing information
- post: post processes timing information to compute various statistics
- plot: generate pdf or html plots of the results
- autodyna: clones, builds, dyna-runs, posts, and plots two rocFFT commits

Multiple runs can be compared at the post processing and plotting
stages.  Multiple runs may:

- be from different riders (eg, rocFFT, cuFFT, vkFFT etc)
- be from dyna-rocfft-rider.

Usually:

- a single rider (rocFFT) would be used to track performance over
  time;
- multiple riders (rocFFT, cuFFT) would be used to compare different
  FFT libraries;
- a dyna-rider with multiple libraries (rocFFT) would be used to
  compare two different rocFFT commits.

Runs/subprocesses are logged to `rocfft-perf.log`.


Run
===

The 'run' command drives FFT riders (if they accept the same command
line arguments as `rocfft-rider`).  The rider to use is specified by
the `--rider/-w` switch.

Test problems are generated using a `ProblemGenerator` and a filter.
The default generator is a simple radix based generator.

See

  $ rocfft-perf run -h

for more details.  To see which problems will be run without running
them, use `--list/-l`.

Using the `--suite/-S` option, problems are loaded from a "suites"
file.  The default suites file is `suites.py`.  Alternatively, you can
load the suite named "qa1" from a file called "mysuites.py" like this:

  $ rocfft-perf run -S mysuites:qa1 ...

That is, FILENAME:SUITENAME.

By default, output files are stored in the `out0` directory.  This can
be changed with the `--output/-o` agrument.


Dynamic testing
===============

Dynamic testing is enabled by specifying more than one `--lib/-i`
option.  These are passed down to the rider, and hence it is assumed
that the specific rider is a "dyna" rider.

Multiple output directories are used to store the results.


Post processing
===============

During the post processing stage, various statistics are computed and
saved:

  $ rocfft-perf post DOCDIR OUTPUT [OUTPUT ...]

The first directory is the 'document directory'.  When comparing
multiple runs, comparative statistics are saved here in `.sdat` files.

For each `.dat` file in the output directories, summary statistics are
saved in `.mdat` files.


Plotting
========

Based on the results from post processing, generate either an html or
pdf report:

  $ rocfft-perf html DOCDIR OUTPUT [OUTPUT ...]
  $ rocfft-perf pdf DOCDIR OUTPUT [OUTPUT ...]

"""

import argparse
import logging
import statistics
import scipy
import sys
import os

from pathlib import Path

from multiprocessing import Pool

top = Path(__file__).resolve().parent
sys.path.append(str(top))

import perflib

console = logging.StreamHandler()

import types

#
# Helpers
#

def update(attr, dst, src):
    """Set attribute `attr` on dst if it is not None on `src`."""
    value = getattr(src, attr, None)
    if value is not None:
        setattr(dst, attr, value)


#
# Commands
#

def command_moods(runs, percent, moods):
    """Find significant (Moods) regressions."""

    reference, *others = perflib.utils.read_runs(runs)
    results = perflib.analysis.moods(reference, others)

    regressions = []
    for sample, result in results.items():
        _, dname, length = sample
        m1, m2 = result.medians
        if m1 < m2 and abs(m1 - m2) / m1 > percent / 100.0:
            p = result.pval
            if p < moods:
                diff = 100 * abs(m1 - m2) / m1
                print(f"REGRESSION: length {str(length)}; median times {m1:.4f} vs {m2:.4f} ({diff:4.1f}%); Mood's p-value {p:.6f}; from {dname}.")
                regressions.append(length)

    print("Regressions found in lengths:")
    for length in sorted(set(regressions), key=perflib.utils.product):
        print("--length " + perflib.utils.sjoin(length))


def generate_mdat(dat):
    confidence = [['length', 'elements', 'median_sample', 'median_low', 'median_high']]
    for _, elements, sample in dat.sorted_samples():
        median = statistics.median(sample.times)
        low, high = perflib.analysis.confidence_interval(sample.times)
        confidence.append([sample.label, elements, median, low, high])
    path = dat.path.with_suffix('.mdat')
    perflib.utils.write_tsv(path, confidence, meta=dat.meta, overwrite=True)

def command_post(outdirs, docdir):
    """Post process results in directories listed in `outdirs`.

    Median confidence intervals for each run are written in 'mdat'
    files.

    Speedups and pvals are written in 'sdat' files.

    """

    outdirs = [Path(x) for x in outdirs]
    docdir = Path(docdir)
    docdir.mkdir(parents=True, exist_ok=True)

    all_runs = perflib.utils.read_runs(outdirs)

    # median confidence intervals
    for run in all_runs:
        with Pool(None) as p:
            p.map(generate_mdat, run.dats.values())

    # speedup and pvals
    if len(outdirs) > 1:
        runs = perflib.utils.by_dat(all_runs)
        refdir, *otherdirs = outdirs
        for dat_name, dat_runs in runs.items():
            refdat = dat_runs[refdir]
            for otherdat in [dat_runs[otherdir] for otherdir in otherdirs if otherdir in dat_runs]:
                speedups = [['length', 'elements', 'speedup', 'speedup_low', 'speedup_high', 'speedup_pval']]
                for length, _, _ in refdat.sorted_samples():
                    if length not in otherdat.samples:
                        continue
                    sample = refdat.samples[length]
                    Avals = refdat.samples[length].times
                    Bvals = otherdat.samples[length].times
                    speedup = statistics.median(Avals) / statistics.median(Bvals)
                    low, high = perflib.analysis.ratio_confidence_interval(Avals, Bvals)
                    _, pval, _, _ = scipy.stats.median_test(Avals, Bvals)
                    speedups.append([sample.label, perflib.utils.product(length), speedup, low, high, pval])
                path = docdir / (str(otherdat.path.parent.name) + '-over-' + str(refdat.path.parent.name) + '-' + dat_name + '.sdat')
                perflib.utils.write_tsv(path, speedups, meta=refdat.meta, overwrite=True)


def command_generate(runs=None, label=None, output=None, type='pdf', **kwargs):
    """Generate PDF/HTML/DOCX from run results."""

    import perflib.pdf
    import perflib.html

    Figure = {
        'pdf': perflib.pdf.PDFFigure,
        'html': perflib.html.HTMLFigure,
        'docx': perflib.pdf.PDFFigure,
        }[type]

    docdir = Path(output)
    docdir.mkdir(parents=True, exist_ok=True)

    outdirs = [Path(outdir) for outdir in runs]
    if label is None:
        label = [outdir.stem for outdir in outdirs]
    reference = perflib.utils.read_run(outdirs[0])

    figures = []
    for datname in perflib.utils.list_run(outdirs[0]):
        tag = datname.stem
        title = reference.dats[datname.stem].meta.get('title', tag)
        caption = reference.dats[datname.stem].meta.get('caption', title).replace('_', ' ')
        figtype = reference.dats[datname.stem].meta.get('figtype', 'linegraph')
        primary, secondary = perflib.utils.get_post_processed(tag, docdir, outdirs)
        figure = Figure(tag, title, caption, docdir, label, primary, secondary, figtype)
        figure.make()
        figures.append(figure)

    if type == 'pdf':
        perflib.pdf.make_tex(figures, docdir, outdirs)
    if type == 'html':
        title = f"Performance report: {perflib.utils.cjoin(outdirs)}"
        perflib.html.make_html(figures, title, docdir, outdirs)
    if type == 'docx':
        import perflib.docx
        perflib.docx.make_docx(figures, docdir, outdirs)


def command_run(arguments):
    """Run dyna-rider or rider."""

    # build generator
    generator = None
    if arguments.suite is not None:
        generator = perflib.generators.SuiteProblemGenerator(arguments.suite)
    else:
        generator = perflib.generators.RadixProblemGenerator()
        for attr in ['radix', 'xmin', 'xmax', 'ymin', 'ymax', 'zmin', 'zmax']:
            update(attr, generator, arguments)

    for attr in ['nbatch']:
        update(attr, generator, arguments)

    # build filter
    filtered = perflib.generators.FilteredProblemGenerator()
    if arguments.direction is not None:
        filtered.direction = [arguments.direction]
    if arguments.inplace:
        filtered.inplace = [True]
    if arguments.outplace:
        filtered.inplace = [False]
    if arguments.real:
        filtered.real = [True]
    if arguments.complex:
        filtered.real = [False]
    if arguments.precision:
        filtered.precision = arguments.precision
    if arguments.dimension:
        filtered.dimension = arguments.dimension

    if arguments.list:
        for test in filtered(generator).generate_problems():
            print(test)
        return

    # build timer
    if arguments.rider is None:
        print("No rider set... use -w /path/to/rider.")
        return
    dyna = 'dyna' in arguments.rider
    if dyna:
        if not arguments.lib:
            print("Need to set dynamically loaded library when using dyna-rider.")
            return
    if not arguments.out:
        nout = len(arguments.lib) if dyna else 1
        arguments.out = ['out' + str(i) for i in range(nout)]

    timer = perflib.timer.GroupedTimer()
    for attr in ['device', 'rider', 'lib', 'out', 'device', 'ntrial', 'verbose']:
        update(attr, timer, arguments)

    specs = perflib.specs.get_machine_specs(timer.device)
    for out in timer.out:
        specs_file = Path(out) / 'specs.txt'
        specs_file.parent.mkdir(exist_ok=True)
        specs_file.write_text(str(specs))

    timer.run_cases(filtered(generator))


def command_autodyna(workdir, reference_commit, reference_repository, reference_label, commit, repository, label, suite, format, **kwargs):
    """Compare performance of two builds automagically."""

    from perflib.build import build_rocfft

    if reference_repository is None:
        reference_repository = repository

    if reference_label is None:
        reference_label = reference_commit

    if label is None:
        label = commit

    top = Path(workdir).resolve()
    build1  = top / f'build-{reference_commit}'
    build2  = top / f'build-{commit}'
    output  = top / f'doc-{commit}'

    # build rocFFTs
    top.mkdir(parents=True, exist_ok=True)
    os.chdir(str(top))

    lib1 = build1 / 'lib' / 'librocfft.so'
    lib1.parent.mkdir(parents=True, exist_ok=True)
    if not lib1.exists():
        build_rocfft(reference_commit, dest=build1, repo=reference_repository, ccache=True)

    lib2 = build2 / 'lib' / 'librocfft.so'
    lib2.parent.mkdir(parents=True, exist_ok=True)
    if not lib2.exists():
        build_rocfft(commit, dest=build2, repo=repository, ccache=True)

    # run cases
    timer = perflib.timer.GroupedTimer()
    timer.rider = build1 / 'dyna-rocfft-rider'
    timer.lib = [lib1, lib2]
    timer.out = [build1, build2]

    specs = perflib.specs.get_machine_specs(timer.device)
    for out in timer.out:
        specs_file = Path(out) / 'specs.txt'
        specs_file.write_text(str(specs))

    generator = perflib.generators.SuiteProblemGenerator(suite)
    timer.run_cases(generator)

    # post-process results
    command_post([build1, build2], output)

    # generate report
    for report_type in format:
        command_generate(runs=[build1, build2], label=[reference_label, label], output=output, type=report_type)
#
# Main
#

def main():
    parser = argparse.ArgumentParser(prog='rocfft-perf', epilog="For a detailed usage overview, run: %(prog)s overview")
    subparsers = parser.add_subparsers(dest='command')
    parser.add_argument('-v', '--verbose', action='store_true', default=False)

    subparsers.add_parser('overview', help='print a general usage overview')
    subparsers.add_parser('specs', help='print machine specs')

    moods_parser = subparsers.add_parser('moods', help='perform moods test')
    moods_parser.add_argument('--significance', type=float, help='moods significance threshold', default=0.001)
    moods_parser.add_argument('--percent', type=float, help='percent difference', default=5.0)
    moods_parser.add_argument('runs', type=str, nargs='+')

    post_parser = subparsers.add_parser('post', help='post processing')
    post_parser.add_argument('output', type=str)
    post_parser.add_argument('runs', type=str, nargs='+')

    pdf_parser = subparsers.add_parser('pdf', help='generate pdf plots')
    html_parser = subparsers.add_parser('html', help='generate html plots')
    docx_parser = subparsers.add_parser('docx', help='generate docx plots')

    for p in [pdf_parser, html_parser, docx_parser]:
        p.add_argument('output', type=str)
        p.add_argument('runs', type=str, nargs='+')
        p.add_argument('-l', '--label', type=str, help='label (appendable)', action='append')

    run_parser = subparsers.add_parser('run', help='run!')
    run_parser.add_argument('-g', '--device', type=int, help='device number')
    run_parser.add_argument('-l', '--list', help='list runs (but do not run them)', action='store_true', default=False)
    run_parser.add_argument('-o', '--out', type=str, help='output (appendable)', action='append')
    run_parser.add_argument('-S', '--suite', type=str, help='test suite name (appendable)', action='append')
    run_parser.add_argument('-w', '--rider', type=str, help='test executable path')
    run_parser.add_argument('-i', '--lib', type=str, help='test library path (appendable)', action='append')
    run_parser.add_argument('-r', '--radix', type=int, help='radix')
    run_parser.add_argument('-x', '--xmin', type=int, help='minimum problem size in x direction')
    run_parser.add_argument('-X', '--xmax', type=int, help='maximum problem size in x direction')
    run_parser.add_argument('-y', '--ymin', type=int, help='minimum problem size in y direction')
    run_parser.add_argument('-Y', '--ymax', type=int, help='maximum problem size in y direction')
    run_parser.add_argument('-z', '--zmin', type=int, help='minimum problem size in z direction')
    run_parser.add_argument('-Z', '--zmax', type=int, help='maximum problem size in z direction')
    run_parser.add_argument('-D', '--direction', type=int, help='direction of transform')
    run_parser.add_argument('-I', '--inplace', help='make transform in-place', action='store_true', default=False)
    run_parser.add_argument('-O', '--outplace', help='make transform out-of-place', action='store_true', default=False)
    run_parser.add_argument('-R', '--real', help='make transform real/complex', action='store_true', default=False)
    run_parser.add_argument('-C', '--complex', help='make transform complex/complex', action='store_true', default=False)
    run_parser.add_argument('-d', '--dimension', type=int, help='dimension of transform', action='append')
    run_parser.add_argument('-b', '--nbatch', type=int, help='number of batches')
    run_parser.add_argument('-N', '--ntrial', type=int, help='number of trials')
    run_parser.add_argument('-f', '--precision', type=str, help='precision', action='append')

    autodyna_parser = subparsers.add_parser('autodyna', help='clone, build, dyna-run, post, and plot two rocFFT commits')
    autodyna_parser.add_argument('--workdir', type=str, help='Working directory', default='.')
    autodyna_parser.add_argument('--reference_commit', type=str, help='Reference commit', required=True)
    autodyna_parser.add_argument('--reference_repository', type=str, help='Reference repository (if different from repository)')
    autodyna_parser.add_argument('--reference_label', type=str, help='Reference label (if different from reference commit)')
    autodyna_parser.add_argument('--commit', type=str, help='Commit to test', required=True)
    autodyna_parser.add_argument('--repository', type=str, help='Repository to test', required=True)
    autodyna_parser.add_argument('--label', type=str, help='Test label (if different from test commit)')
    autodyna_parser.add_argument('--suite', type=str, help='Test suite name (appendable)', action='append', required=True)
    autodyna_parser.add_argument('--format', type=str, help='Output format (appendable)', action='append', default=['html'])

    arguments = parser.parse_args()

    if arguments.verbose:
        console.setLevel(logging.INFO)

    if arguments.command == 'specs':
        print(perflib.specs.get_machine_specs(0))

    if arguments.command == 'overview':
        print(globals()['__doc__'])

    if arguments.command == 'run':
        command_run(arguments)

    if arguments.command == 'moods':
        if len(arguments.runs) < 2:
            print('Need more than one output directory')
            return
        command_moods(arguments.runs, arguments.significance, arguments.percent)

    if arguments.command == 'post':
        command_post(arguments.runs, arguments.output)

    if arguments.command == 'pdf':
        command_generate(type='pdf', **vars(arguments))

    if arguments.command == 'html':
        command_generate(type='html', **vars(arguments))

    if arguments.command == 'docx':
        command_generate(type='docx', **vars(arguments))

    if arguments.command == 'autodyna':
        command_autodyna(**vars(arguments))

if __name__ == '__main__':
    logging.basicConfig(filename='rocfft-perf.log',
                        format='%(asctime)s %(levelname)s: %(message)s',
                        level=logging.DEBUG)

    console.setLevel(logging.WARNING)
    console.setFormatter(logging.Formatter('%(levelname)-8s: %(message)s'))
    logging.getLogger('').addHandler(console)

    main()