Source code for radvel.driver

"""
Driver functions for the radvel pipeline.\
These functions are meant to be used only with\
the `cli.py` command line interface.
"""
from __future__ import print_function

import radvel
from radvel.likelihood import GPLikelihood
from radvel.plot import orbit_plots, mcmc_plots
from radvel.mcmc import statevars

import os
import sys
import copy
import collections
from collections import OrderedDict

import pandas as pd
import numpy as np
from numpy import inf
from astropy import constants as c

if sys.version_info[0] < 3:
    import ConfigParser as configparser
else:
    import configparser
    from configparser import NoSectionError


[docs]def plots(args):
    """
    Generate plots

    Args:
        args (ArgumentParser): command line arguments
    """

    config_file = args.setupfn
    conf_base = os.path.basename(config_file).split('.')[0]
    statfile = os.path.join(
        args.outputdir, "{}_radvel.stat".format(conf_base)
    )

    status = load_status(statfile)

    P, post = radvel.utils.initialize_posterior(config_file,
                                                decorr=args.decorr)

    assert status.getboolean('fit', 'run'), \
        "Must perform max-liklihood fit before plotting"
    post = radvel.posterior.load(status.get('fit', 'postfile'))

    for ptype in args.type:
        print("Creating {} plot for {}".format(ptype, conf_base))

        if ptype == 'rv':
            args.plotkw['uparams'] = post.uparams
            args.plotkw['status'] = status
            if 'saveplot' not in args.plotkw:
                saveto = os.path.join(
                    args.outputdir, conf_base+'_rv_multipanel.pdf'
                )
            else:
                saveto = args.plotkw['saveplot']
                args.plotkw.pop('saveplot')
            P, _ = radvel.utils.initialize_posterior(config_file)
            if hasattr(P, 'bjd0'):
                args.plotkw['epoch'] = P.bjd0

            if args.gp or isinstance(post.likelihood, GPLikelihood):
                GPPlot = orbit_plots.GPMultipanelPlot(
                    post, saveplot=saveto, **args.plotkw
                )
                GPPlot.plot_multipanel()
            else:
                RVPlot = orbit_plots.MultipanelPlot(
                    post, saveplot=saveto, **args.plotkw
                )
                RVPlot.plot_multipanel()

                # check to make sure that Posterior is not GP, print warning if it is
                if isinstance(post.likelihood, radvel.likelihood.CompositeLikelihood):
                    like_list = post.likelihood.like_list
                else:
                    like_list = [post.likelihood]
                for like in like_list:
                    if isinstance(like, radvel.likelihood.GPLikelihood):
                        print("WARNING: GP Likelihood(s) detected. \
You may want to use the '--gp' flag when making these plots.")
                        break

        if ptype == 'corner' or ptype == 'auto' or ptype == 'trend':
            assert status.getboolean('mcmc', 'run'), \
                "Must run MCMC before making corner, auto, or trend plots"

            chains = pd.read_csv(status.get('mcmc', 'chainfile'))
            autocorr = pd.read_csv(status.get('mcmc', 'autocorrfile'))

        if ptype == 'auto':
            saveto = os.path.join(args.outputdir, conf_base+'_auto.pdf')
            Auto = mcmc_plots.AutoPlot(autocorr, saveplot=saveto)
            Auto.plot()

        if ptype == 'corner':
            saveto = os.path.join(args.outputdir, conf_base+'_corner.pdf')
            Corner = mcmc_plots.CornerPlot(post, chains, saveplot=saveto)
            Corner.plot()

        if ptype == 'trend':
            nwalkers = status.getint('mcmc', 'nwalkers')
            nensembles = status.getint('mcmc', 'nensembles')

            saveto = os.path.join(args.outputdir, conf_base+'_trends.pdf')
            Trend = mcmc_plots.TrendPlot(post, chains, nwalkers, nensembles, saveto)
            Trend.plot()

        if ptype == 'derived':
            assert status.has_section('derive'), \
                "Must run `radvel derive` before plotting derived parameters"

            P, _ = radvel.utils.initialize_posterior(config_file)
            chains = pd.read_csv(status.get('derive', 'chainfile'))
            saveto = os.path.join(
                args.outputdir, conf_base+'_corner_derived_pars.pdf'
            )

            Derived = mcmc_plots.DerivedPlot(chains, P, saveplot=saveto)
            Derived.plot()

        savestate = {'{}_plot'.format(ptype): os.path.relpath(saveto)}
        save_status(statfile, 'plot', savestate)


[docs]def fit(args):
    """Perform maximum a posteriori fit

    Args:
        args (ArgumentParser): command line arguments
    """

    config_file = args.setupfn
    conf_base = os.path.basename(config_file).split('.')[0]
    print("Performing maximum a posteriori fitting for {}".format(conf_base))

    P, post = radvel.utils.initialize_posterior(config_file, decorr=args.decorr)

    post = radvel.fitting.maxlike_fitting(post, verbose=True)

    postfile = os.path.join(args.outputdir,
                            '{}_post_obj.pkl'.format(conf_base))
    post.writeto(postfile)

    savestate = {'run': True,
                 'postfile': os.path.relpath(postfile)}
    save_status(os.path.join(args.outputdir,
                '{}_radvel.stat'.format(conf_base)),
                'fit', savestate)


[docs]def mcmc(args):
    """Perform MCMC error analysis

    Args:
        args (ArgumentParser): command line arguments
    """

    config_file = args.setupfn
    conf_base = os.path.basename(config_file).split('.')[0]
    statfile = os.path.join(args.outputdir,
                            "{}_radvel.stat".format(conf_base))

    if args.save or args.proceed:
        backend_loc = os.path.join(args.outputdir, conf_base+'_rawchain.h5')
    else:
        backend_loc = None

    status = load_status(statfile)
    P, post = radvel.utils.initialize_posterior(config_file,
                                                decorr=args.decorr)

    if status.getboolean('fit', 'run'):
        print("Loading starting positions from previous MAP fit")

        post = radvel.posterior.load(status.get('fit', 'postfile'))

    msg1 = (
            "Running MCMC for {}, N_walkers = {}, N_steps = {}, N_ensembles = {}, Min Auto Factor = {}, "
            ).format(conf_base, args.nwalkers, args.nsteps, args.ensembles, args.minAfactor)

    msg2 = (
            "Max Auto Relative-Change = {}, Max G-R = {}, Min Tz = {} ..."
            ).format(args.maxArchange, args.maxGR, args.minTz)

    print(msg1 + '\n' + msg2)

    chains = radvel.mcmc(post, nwalkers=args.nwalkers, nrun=args.nsteps, ensembles=args.ensembles,
                         minAfactor=args.minAfactor, maxArchange=args.maxArchange, burnAfactor=args.burnAfactor,
                         burnGR=args.burnGR, maxGR=args.maxGR, minTz=args.minTz, minsteps=args.minsteps,
                         minpercent=args.minpercent, thin=args.thin, serial=args.serial, save=args.save,
                         savename=backend_loc, proceed=args.proceed, proceedname=backend_loc, headless=args.headless)

    mintz = statevars.mintz
    maxgr = statevars.maxgr
    minafactor = statevars.minafactor
    maxarchange = statevars.maxarchange

    # Convert chains into synth basis
    synthchains = chains.copy()
    for par in post.params.keys():
        if not post.vector.vector[post.vector.indices[par]][1]:
            synthchains[par] = post.vector.vector[post.vector.indices[par]][0]

    synthchains = post.params.basis.to_synth(synthchains)
    synth_quantile = synthchains.quantile([0.159, 0.5, 0.841])

    # Get quantiles and update posterior object to median
    # values returned by MCMC chains
    post_summary = chains.quantile([0.159, 0.5, 0.841])

    for k in chains.columns:
        if k in post.params.keys():
            post.vector.vector[post.vector.indices[k]][0] = post_summary[k][0.5]

    post.vector.vector_to_dict()

    print("Performing post-MCMC maximum likelihood fit...")
    post = radvel.fitting.maxlike_fitting(post, verbose=False)

    final_logprob = post.logprob()
    final_residuals = post.likelihood.residuals().std()
    final_chisq = np.sum(post.likelihood.residuals()**2 / (post.likelihood.errorbars()**2))
    deg_of_freedom = len(post.likelihood.y) - len(post.likelihood.get_vary_params())
    final_chisq_reduced = final_chisq / deg_of_freedom
    post.vector.vector_to_dict()
    synthparams = post.params.basis.to_synth(post.params)

    print("Calculating uncertainties...")
    post.uparams = {}
    post.medparams = {}
    post.maxparams = {}
    for par in synthparams.keys():
        maxlike = synthparams[par].value
        med = synth_quantile[par][0.5]
        high = synth_quantile[par][0.841] - med
        low = med - synth_quantile[par][0.159]
        err = np.mean([high, low])
        if maxlike == -np.inf and med == -np.inf and np.isnan(low) and np.isnan(high):
            err = 0.0
        else:
            err = radvel.utils.round_sig(err)
        if err > 0.0:
            med, err, errhigh = radvel.utils.sigfig(med, err)
            maxlike, err, errhigh = radvel.utils.sigfig(maxlike, err)
        post.uparams[par] = err
        post.medparams[par] = med
        post.maxparams[par] = maxlike

    print("Final loglikelihood = %f" % final_logprob)
    print("Final RMS = %f" % final_residuals)
    print("Final reduced chi-square = {}".format(final_chisq_reduced))
    print("Best-fit parameters:")
    print(post)

    print("Saving output files...")
    saveto = os.path.join(args.outputdir, conf_base+'_post_summary.csv')
    post_summary.to_csv(saveto, sep=',')

    postfile = os.path.join(args.outputdir,
                            '{}_post_obj.pkl'.format(conf_base))
    post.writeto(postfile)

    csvfn = os.path.join(args.outputdir, conf_base+'_chains.csv.bz2')
    chains.to_csv(csvfn, compression='bz2')

    auto = pd.DataFrame()
    auto['autosamples'] = statevars.autosamples
    auto['automin'] = statevars.automin
    auto['automean'] = statevars.automean
    auto['automax'] = statevars.automax
    auto['factor'] = statevars.factor
    autocorr = os.path.join(args.outputdir, conf_base+'_autocorr.csv')
    auto.to_csv(autocorr, sep=',')

    savestate = {'run': True,
                 'postfile': os.path.relpath(postfile),
                 'chainfile': os.path.relpath(csvfn),
                 'autocorrfile': os.path.relpath(autocorr),
                 'summaryfile': os.path.relpath(saveto),
                 'nwalkers': statevars.nwalkers,
                 'nensembles': args.ensembles,
                 'maxsteps': args.nsteps*statevars.nwalkers*args.ensembles,
                 'nsteps': statevars.ncomplete,
                 'nburn': statevars.nburn,
                 'minafactor': minafactor,
                 'maxarchange': maxarchange,
                 'minTz': mintz,
                 'maxGR': maxgr}
    save_status(statfile, 'mcmc', savestate)

    statevars.reset()


[docs]def ic_compare(args):
    """Compare different models and comparative statistics including
          AIC and BIC statistics.

    Args:
        args (ArgumentParser): command line arguments
    """

    config_file = args.setupfn
    conf_base = os.path.basename(config_file).split('.')[0]
    statfile = os.path.join(args.outputdir,
                            "{}_radvel.stat".format(conf_base))

    status = load_status(statfile)

    P, post = radvel.utils.initialize_posterior(config_file,
                                                decorr=args.decorr)

    assert status.getboolean('fit', 'run'), \
        "Must perform max-liklihood fit before running Information Criteria comparisons"
    post = radvel.posterior.load(status.get('fit', 'postfile'))

    choices = ['nplanets', 'e', 'trend', 'jit', 'gp']
    statsdictlist = []
    paramlist = []
    compareparams = args.type

    ipost = copy.deepcopy(post)

    if args.simple:
        statsdictlist += radvel.fitting.model_comp(ipost, params=[], verbose=args.verbose)
    else:
        if hasattr(args, 'fixjitter') and args.fixjitter:
            for param in ipost.params:
                if len(param) >= 3 and param[0:3] == 'jit':
                    ipost.params[param].vary = False

        for compareparam in compareparams:
            assert compareparam in choices, \
                "Valid parameter choices for 'ic -t' are combinations of: "\
                + " ".join(choices)
            paramlist.append(compareparam)
            if hasattr(args, 'mixed') and not args.mixed:
                statsdictlist += radvel.fitting.model_comp(ipost, params=[compareparam], verbose=args.verbose)
        if hasattr(args, 'mixed') and not args.mixed:
            new_statsdictlist = []
            for dicti in statsdictlist:
                anymatch = False
                for seendict in new_statsdictlist:
                    if collections.Counter(dicti['Free Params'][0]) == \
                            collections.Counter(seendict['Free Params'][0]):
                        anymatch = True
                        continue
                if not anymatch:
                    new_statsdictlist.append(dicti)
            statsdictlist = new_statsdictlist

        if not hasattr(args, 'mixed') or (hasattr(args, 'mixed') and args.mixed):
            statsdictlist += radvel.fitting.model_comp(ipost, params=paramlist, verbose=args.verbose)

    savestate = {'ic': statsdictlist}
    save_status(statfile, 'ic_compare', savestate)


[docs]def tables(args):
    """Generate TeX code for tables in summary report

    Args:
        args (ArgumentParser): command line arguments
    """

    config_file = args.setupfn
    conf_base = os.path.basename(config_file).split('.')[0]
    statfile = os.path.join(args.outputdir,
                            "{}_radvel.stat".format(conf_base))
    status = load_status(statfile)

    assert status.getboolean('mcmc', 'run'), \
        "Must run MCMC before making tables"

    P, post = radvel.utils.initialize_posterior(config_file)
    post = radvel.posterior.load(status.get('fit', 'postfile'))
    chains = pd.read_csv(status.get('mcmc', 'chainfile'))
    minafactor = status.get('mcmc', 'minafactor')
    maxarchange = status.get('mcmc', 'maxarchange')
    maxgr = status.get('mcmc', 'maxgr')
    mintz = status.get('mcmc', 'mintz')
    if 'derive' in status.sections() and status.getboolean('derive', 'run'):
        dchains = pd.read_csv(status.get('derive', 'chainfile'))
        chains = chains.join(dchains, rsuffix='_derived')
        derived = True
    else:
        derived = False
    report = radvel.report.RadvelReport(P, post, chains, minafactor, maxarchange, maxgr, mintz, derived=derived)
    tabletex = radvel.report.TexTable(report)
    attrdict = {'priors': 'tab_prior_summary', 'rv': 'tab_rv',
                'params': 'tab_params', 'derived': 'tab_derived',
                'crit': 'tab_crit'}
    for tabtype in args.type:
        print("Generating LaTeX code for {} table".format(tabtype))

        if tabtype == 'ic_compare':
            assert status.has_option('ic_compare', 'ic'), \
                "Must run Information Criteria comparison before making comparison tables"

            compstats = eval(status.get('ic_compare', 'ic'))
            report = radvel.report.RadvelReport(
                P, post, chains, minafactor, maxarchange, maxgr, mintz, compstats=compstats
            )
            tabletex = radvel.report.TexTable(report)
            tex = tabletex.tab_comparison()
        elif tabtype == 'rv':
            tex = getattr(tabletex, attrdict[tabtype])(name_in_title=args.name_in_title, max_lines=None)
        elif tabtype == 'crit':
            tex = getattr(tabletex, attrdict[tabtype])(name_in_title=args.name_in_title)
        else:
            if tabtype == 'derived':
                assert status.has_option('derive', 'run'), \
                    "Must run `radvel derive` before making derived parameter table"
            assert tabtype in attrdict, 'Invalid Table Type %s ' % tabtype
            tex = getattr(tabletex, attrdict[tabtype])(name_in_title=args.name_in_title)

        saveto = os.path.join(
            args.outputdir, '{}_{}.tex'.format(conf_base, tabtype)
        )
        with open(saveto, 'w+') as f:
            f.write(tex)

        savestate = {'{}_tex'.format(tabtype): os.path.relpath(saveto)}
        save_status(statfile, 'table', savestate)


[docs]def derive(args):
    """Derive physical parameters from posterior samples

    Args:
        args (ArgumentParser): command line arguments
    """

    config_file = args.setupfn
    conf_base = os.path.basename(config_file).split('.')[0]
    statfile = os.path.join(args.outputdir,
                            "{}_radvel.stat".format(conf_base))
    status = load_status(statfile)

    msg = "Multiplying mcmc chains by physical parameters for {}".format(
        conf_base
    )
    print(msg)

    assert status.getboolean('mcmc', 'run'), "Must run MCMC before making tables"

    P, post = radvel.utils.initialize_posterior(config_file)
    post = radvel.posterior.load(status.get('fit', 'postfile'))
    chains = pd.read_csv(status.get('mcmc', 'chainfile'))

    try:
        mstar = np.random.normal(
            loc=P.stellar['mstar'], scale=P.stellar['mstar_err'],
            size=len(chains)
            )
    except AttributeError:
        print("Unable to calculate derived parameters, stellar parameters not defined the config file.")
        return

    if (mstar <= 0.0).any():
        num_nan = np.sum(mstar <= 0.0)
        nan_perc = float(num_nan) / len(chains)
        mstar[mstar <= 0] = np.abs(mstar[mstar <= 0])
        print("WARNING: {} ({:.2f} %) of Msini samples are NaN. The stellar mass posterior may contain negative \
values. Interpret posterior with caution.".format(num_nan, nan_perc))

    # Convert chains into synth basis
    synthchains = chains.copy()
    for par in post.params.keys():
        if not post.params[par].vary:
            synthchains[par] = post.params[par].value

    synthchains = post.params.basis.to_synth(synthchains)

    savestate = {'run': True}
    outcols = []
    for i in np.arange(1, P.nplanets + 1, 1):
        # Grab parameters from the chain
        def _has_col(key):
            cols = list(synthchains.columns)
            return cols.count('{}{}'.format(key, i)) == 1

        def _get_param(key):
            if _has_col(key):
                return synthchains['{}{}'.format(key, i)]
            else:
                return P.params['{}{}'.format(key, i)].value

        def _set_param(key, value):
            chains['{}{}'.format(key, i)] = value

        def _get_colname(key):
            return '{}{}'.format(key, i)

        per = _get_param('per')
        k = _get_param('k')
        e = _get_param('e')

        mpsini = radvel.utils.Msini(k, per, mstar, e, Msini_units='earth')
        _set_param('mpsini', mpsini)
        outcols.append(_get_colname('mpsini'))

        mtotal = mstar + (mpsini * c.M_earth.value) / c.M_sun.value      # get total star plus planet mass
        a = radvel.utils.semi_major_axis(per, mtotal)               # changed from mstar to mtotal
        
        _set_param('a', a)
        outcols.append(_get_colname('a'))

        musini = (mpsini * c.M_earth) / (mstar * c.M_sun)
        _set_param('musini', musini)
        outcols.append(_get_colname('musini'))

        try:
            rp = np.random.normal(
                loc=P.planet['rp{}'.format(i)],
                scale=P.planet['rp_err{}'.format(i)],
                size=len(chains)
            )

            _set_param('rp', rp)
            _set_param('rhop', radvel.utils.density(mpsini, rp))

            outcols.append(_get_colname('rhop'))
        except (AttributeError, KeyError):
            pass

    # Get quantiles and update posterior object to median
    # values returned by MCMC chains
    quantiles = chains.quantile([0.159, 0.5, 0.841])
    csvfn = os.path.join(args.outputdir, conf_base+'_derived_quantiles.csv')
    quantiles.to_csv(csvfn, columns=outcols)

    # saved derived paramters to posterior file
    postfile = os.path.join(args.outputdir,
                            '{}_post_obj.pkl'.format(conf_base))
    post.derived = quantiles[outcols]
    post.writeto(postfile)
    savestate['quantfile'] = os.path.relpath(csvfn)

    csvfn = os.path.join(args.outputdir, conf_base+'_derived.csv.bz2')
    chains.to_csv(csvfn, columns=outcols, compression='bz2')
    savestate['chainfile'] = os.path.relpath(csvfn)

    print("Derived parameters:", outcols)

    save_status(statfile, 'derive', savestate)


[docs]def report(args):
    """Generate summary report

    Args:
        args (ArgumentParser): command line arguments
    """

    config_file = args.setupfn
    conf_base = os.path.basename(config_file).split('.')[0]
    print("Assembling report for {}".format(conf_base))

    statfile = os.path.join(args.outputdir,
                            "{}_radvel.stat".format(conf_base))

    status = load_status(statfile)
    if 'ic_compare' in status.keys():
        status['ic_compare']['ic'] = status['ic_compare']['ic'].replace('-inf', '-np.inf')

    P, post = radvel.utils.initialize_posterior(config_file)
    post = radvel.posterior.load(status.get('fit', 'postfile'))
    chains = pd.read_csv(status.get('mcmc', 'chainfile'))
    minafactor = status.get('mcmc', 'minafactor')
    maxarchange = status.get('mcmc', 'maxarchange')
    maxgr = status.get('mcmc', 'maxgr')
    mintz = status.get('mcmc', 'mintz')
    if 'derive' in status.sections() and status.getboolean('derive', 'run'):
        dchains = pd.read_csv(status.get('derive', 'chainfile'))
        chains = chains.join(dchains, rsuffix='_derived')
        derived = True
    else:
        derived = False
    try:
        compstats = eval(status.get('ic_compare', 'ic'))
    except (KeyError, NoSectionError):
        print("WARNING: Could not find {} model comparison \
in {}.\nPlease make sure that you have run `radvel ic -t {}` (or, e.g., `radvel \
ic -t nplanets e trend jit gp`)\
\nif you would like to include the model comparison table in the \
report.".format(args.comptype,
                statfile,
                args.comptype))
        compstats = None

    report = radvel.report.RadvelReport(P, post, chains, minafactor, maxarchange, maxgr, mintz, compstats=compstats,
                                        derived=derived)
    report.runname = conf_base

    report_depfiles = []
    for ptype, pfile in status.items('plot'):
        report_depfiles.append(pfile)

    with radvel.utils.working_directory(args.outputdir):
        rfile = os.path.join(conf_base+"_results.pdf")
        report_depfiles = [os.path.basename(p) for p in report_depfiles]
        report.compile(
            rfile, depfiles=report_depfiles, latex_compiler=args.latex_compiler
        )


[docs]def save_status(statfile, section, statevars):
    """Save pipeline status

    Args:
        statfile (string): name of output file
        section (string): name of section to write
        statevars (dict): dictionary of all options to populate
           the specified section
    """

    config = configparser.RawConfigParser()

    if os.path.isfile(statfile):
        config.read(statfile)

    if not config.has_section(section):
        config.add_section(section)

    for key, val in statevars.items():
        config.set(section, key, val)

    with open(statfile, 'w') as f:
        config.write(f)


[docs]def load_status(statfile):
    """Load pipeline status

    Args:
        statfile (string): name of configparser file

    Returns:
        configparser.RawConfigParser
    """

    config = configparser.RawConfigParser()
    gl = config.read(statfile)

    return config