Source code for special.mcmc_sampling

#! /usr/bin/env python

"""
Module with the MCMC (``emcee``) sampling for model spectra parameter
estimation.

.. [FOR13]
   | Foreman-Mackey et al. 2013
   | **emcee: The MCMC Hammer**
   | *PASP, Volume 125, Issue 925, p. 306*
   | `https://arxiv.org/abs/1202.3665
     <https://arxiv.org/abs/1202.3665>`_

.. [FOR19]
   | Foreman-Mackey et al. 2019
   | **emcee v3: A Python ensemble sampling toolkit for affine-invariant MCMC**
   | *JOSS, Volume 4, Issue 43, p. 1864*
   | `https://arxiv.org/abs/1911.07688
     <https://arxiv.org/abs/1911.07688>`_

.. [GOO10]
   | Goodman & Weare 2010
   | **Ensemble samplers with affine invariance**
   | *Comm. App. Math. Comp. Sci., Vol. 5, Issue 1, pp. 65-80.*
   | `https://ui.adsabs.harvard.edu/abs/2010CAMCS...5...65G
     <https://ui.adsabs.harvard.edu/abs/2010CAMCS...5...65G>`_

"""

__author__ = 'V. Christiaens'
__all__ = ['mcmc_spec_sampling',
           'lnprob',
           'lnlike',
           'chain_zero_truncated',
           'show_corner_plot',
           'show_walk_plot',
           'confidence']

from astropy import constants as con
import numpy as np
import os
from os.path import isdir, isfile
import emcee
import inspect
import datetime
import corner
from matplotlib import pyplot as plt
from matplotlib.ticker import MaxNLocator
import pickle
from scipy.stats import norm
from .config import time_ini, timing, sep
from .fits import open_fits, write_fits
from .utils_mcmc import gelman_rubin, autocorr_test
from .chi import goodness_of_fit
from .model_resampling import make_model_from_params, make_resampled_models
from .utils_spec import mj_from_rj_and_logg
import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning)


def lnprior(params, labels, bounds, priors=None):
    """ Define the prior log-function.

    Parameters
    ----------
    params: tuple
        The model parameters.
    labels: Tuple of strings
        Tuple of labels in the same order as initial_state:
            - first all parameters related to loaded models (e.g. 'Teff', 'logg')
            - then the planet photometric radius 'R', in Jupiter radius
            - (optionally) the flux of emission lines (labels should match those \
            in the em_lines dictionary), in units of the model spectrum (times mu)
            - (optionally) the optical extinction 'Av', in mag
            - (optionally) the ratio of total to selective optical extinction 'Rv'
            - (optionally) 'Tbb1', 'Rbb1', 'Tbb2', 'Rbb2', etc. for each extra bb \
            contribution.
    bounds: dictionary
        Each entry should be associated with a tuple corresponding to lower and
        upper bounds respectively. Bounds should be provided for ALL model
        parameters, including 'R' (planet photometric radius). 'Av' (optical
        extinction) is optional. If provided here, Av will also be fitted.
        All keywords that are neither 'R', 'Av' nor 'M' will
        be considered model grid parameters.

        Examples:
            >>> bounds = {'Teff':(1000,2000), 'logg':(3.0,4.5), 'R':(0.1,5)}
            >>> bounds = {'Teff':(1000,2000), 'logg':(3.0,4.5), 'R':(0.1,5),
            >>>           'Av':(0.,2.5), 'Rv':(1,5)}
            >>> bounds = {'Teff':(1000,2000), 'logg':(3.0,4.5), 'R':(0.1,5),
            >>>           'Av':(0.,2.5), 'BrG':(1e-17,1e-15)}
            >>> bounds = {'Teff':(2000,3000), 'logg':(3.0,4.5), 'R':(1.,5.),
            >>>           'Tbb1':(500,1500), 'Rbb1':(0.1,1.)}

    priors: dictionary, opt
        If not None, sets prior estimates for each parameter of the model. Each
        entry should be set to either None (no prior) or a tuple of 2 elements
        containing prior estimate and uncertainty on the estimate.
        Missing entries (i.e. provided in ``bounds`` dictionary but not here)
        will be associated no prior.
        'M' can be used for a prior on the mass of the planet. In that case the
        corresponding prior log probability is computed from the values for
        parameters 'logg' and 'R'.

        Examples:
            >>> priors = {'logg':(3.5,0.2), 'Rv':(3.1,0.2)}
            >>> priors = {'Teff':(1600,100), 'logg':(3.5,0.5), 'R':(1.6,0.1),
            >>>           'Av':(1.8,0.2), 'M':(10,3)}

        Important: dictionary entry names should match exactly those of
        ``bounds``.

    Returns
    -------
    out: float.
        If all parameters are within bounds:
            * 0 if no prior,
            * the sum of gaussian log proba for each prior otherwise.
        If at least one model parameters is out of bounds:
        returns -np.inf
    """
    n_params = len(params)
    n_labs = len(labels)
    n_dico = len(bounds)
    if n_dico!=n_params or n_dico != n_labs:
        raise TypeError('params, labels and bounds should have same length')

    cond = True
    for pp in range(n_params):
        if not bounds[labels[pp]][0] <= params[pp] <= bounds[labels[pp]][1]:
            cond=False

    if cond:
        lnpri = 0.
        if priors is not None:
            for key, prior in priors.items():
                if key == 'M' and 'logg' in labels:
                    idx_logg =labels.index("logg")
                    idx_rp = labels.index("R")
                    rp = params[idx_rp]
                    logg = params[idx_logg]
                    mp = mj_from_rj_and_logg(rp, logg)
                    lnpri += -0.5 * (mp - prior[0])**2 / prior[1]**2
                else:
                    idx_prior = labels.index(key)
                    lnpri += -0.5*(params[idx_prior]-prior[0])**2/prior[1]**2
        return lnpri
    else:
        return -np.inf


[docs]def lnlike(params, labels, grid_param_list, lbda_obs, spec_obs, err_obs, dist, model_grid=None, model_reader=None, em_lines={}, em_grid={}, dlbda_obs=None, instru_corr=None, instru_res=None, instru_idx=None, use_weights=True, filter_reader=None, AV_bef_bb=False, units_obs='si', units_mod='si', interp_order=1): """ Define the likelihood log-function. Parameters ---------- params : tuple Set of models parameters for which the model grid has to be interpolated. labels: Tuple of strings Tuple of labels in the same order as initial_state: - first all parameters related to loaded models (e.g. 'Teff', 'logg') - then the planet photometric radius 'R', in Jupiter radius - (optionally) the flux of emission lines (labels should match those \ in the em_lines dictionary), in units of the model spectrum (times mu) - (optionally) the optical extinction 'Av', in mag - (optionally) the ratio of total to selective optical extinction 'Rv' - (optionally) 'Tbb1', 'Rbb1', 'Tbb2', 'Rbb2', etc. for each extra bb \ contribution. grid_param_list : list of 1d numpy arrays/lists Should contain list/numpy 1d arrays with available grid of model parameters (should only contain the sampled parameters, not the models themselves). The models will be loaded with ``model_reader``. lbda_obs : numpy 1d ndarray or list Wavelengths of observed spectrum. If several instruments were used, the wavelengths should be ordered per instrument, not necessarily as monotonically increasing wavelength. Hereafter, :math:`n_{ch}` is the length of ``lbda_obs``. spec_obs : numpy 1d ndarray or list Observed spectrum for each value of ``lbda_obs``. Should have a length of :math:`n_{ch}`. err_obs : numpy 1d/2d ndarray or list Uncertainties on the observed spectrum. The array (list) can have either a length of :math:`n_{ch}`, or a shape of :math:`(2,n_{ch})` for lower (first column) and upper (second column) uncertainties provided. dist : float Distance in parsec, used for flux scaling of the models. model_grid : numpy N-d array, optional If provided, should contain the grid of model spectra for each free parameter of the given grid. I.e. for a grid of :math:`n_T` values of :math:`T_{eff}` and :math:`n_g` values of log(:math:`g`), the numpy array should have a shape of :math:`(n_T, n_g, n_{ch}, 2)`, where the last 2 dimensions correspond to wavelength and fluxes respectively. If provided, ``model_grid`` takes precedence over ``model_name``/ ``model_reader``. model_reader : python routine, opt External routine that reads a model file and returns a 2D numpy array, where the first column corresponds to wavelengths, and the second contains model values. See example routine in ``special.model_resampling.interpolate_model`` description. em_lines: dictionary, opt Dictionary of emission lines to be added on top of the model spectrum. Each dict entry should be the name of the line, assigned to a tuple of 4 values: 1. the wavelength (in :math:`\mu` m); 2. a string indicating whether line intensity is expressed in flux \ ('F'), luminosity ('L') or log(L/LSun) ("LogL"); 3. the FWHM of the gaussian (or None if to be set automatically); 4. whether the FWHM is expressed in 'nm', 'mu' or 'km/s'. The third and fourth can also be set to None. In that case, the FWHM of the gaussian will automatically be set to the equivalent width of the line, calculated from the flux to be injected and the continuum level (measured in the grid model to which the line is injected). Examples: >>> em_lines = {'BrG':(2.1667,'F',None, None)}; >>> em_lines = {'BrG':(2.1667,'LogL', 100, 'km/s')} em_grid: dictionary pointing to lists, opt Dictionary where each entry corresponds to an emission line and points to a list of values to inject for emission line fluxes. For computation efficiency, interpolation will be performed between the points of this grid during the MCMC sampling. Dictionary entries should match those in ``labels`` and ``em_lines``. Examples: >>> BrGmin, BrGmax = -5, 5 >>> em_grid = {'BrG': np.arange(BrGmin, BrGmax, 20)} >>> BrGmin, BrGmax = -5, 5 >>> PaBmin, PaBmax = -2, 7 >>> em_grid = {'PaB': np.arange(PaBmin, PaBmax, 20), >>> 'BrG': np.arange(BrGmin, BrGmax, 20)} dlbda_obs : numpy 1d ndarray or list, optional Respective spectral channel width or FWHM of the photometric filters used for each point of the observed spectrum. This vector is used to infer which part(s) of a combined spectro+photometric spectrum should involve convolution+subsampling (model resolution higher than measurements), interpolation (the opposite), or convolution by the transmission curve of a photometric filter. If not provided, it will be inferred from the difference between consecutive lbda_obs points (i.e. inaccurate for a combined spectrum). It must be provided IF one wants to weigh each measurement based on the spectral resolution of each instrument (as in [OLO16]_), through the ``use_weights`` argument. instru_corr : numpy 2d ndarray, optional Spectral correlation between post-processed images in which the spectrum is measured. It is specific to the instrument, PSF subtraction algorithm and radial separation of the companion from the central star. Can be computed using ``special.spec_corr.spectral_correlation``. In case of a spectrum obtained with different instruments, it is recommended to construct the final spectral_correlation matrix with ``special.spec_corr.combine_corrs``. If ``instru_corr`` is not provided, the uncertainties in each spectral channel will be considered independent. See [GRE16]_ for more details. instru_res : float or list of floats/strings, optional The mean instrumental resolving power(s) OR filter names. This is used to convolve the model spectrum. If several instruments are used, provide a list of resolving power values / filter names, one for each instrument used. instru_idx: numpy 1d array, optional 1d array containing an index representing each instrument used to obtain the spectrum, label them from 0 to the number of instruments (:math:`n_{ins}`). Zero for points that don't correspond to any of the ``instru_res`` values provided, and i in :math:`[1,n_{ins}]` for points associated to instru_res[i-1]. This parameter must be provided if the spectrum consists of points obtained with different instruments. use_weights: bool, optional For the likelihood calculation, whether to weigh each point of the spectrum based on the spectral resolution or bandwidth of photometric filters used. Weights will be proportional to ``dlbda_obs/lbda_obs`` if ``dlbda_obs`` is provided, or set to 1 for all points otherwise. filter_reader: python routine, optional External routine that reads a filter file and returns a 2D numpy array, where the first column corresponds to wavelengths, and the second contains transmission values. Important: if not provided, but strings are detected in instru_res, the default file reader will be used. It assumes the following format for the files: - first row contains headers (titles of each column) - starting from 2nd row: 1st column: wavelength, 2nd col.: transmission - Unit of wavelength can be provided in parentheses of first header \ key name: e.g. "WL(AA)" for angstrom, "wavelength(mu)" for micrometer \ or "lambda(nm)" for nanometer. Note: only what is in parentheses \ matters for the units. AV_bef_bb: bool, optional If both extinction and an extra bb component are free parameters, whether to apply extinction before adding the BB component (e.g. extinction mostly from circumplanetary dust) or after the BB component (e.g. mostly insterstellar extinction). units_obs : str, opt {'si','cgs','jy'} Units of observed spectrum. 'si' for W/m^2/mu; 'cgs' for ergs/s/cm^2/mu or 'jy' for janskys. units_mod: str, opt {'si','cgs','jy'} Units of the model. 'si' for W/m^2/mu; 'cgs' for ergs/s/cm^2/mu or 'jy' for janskys. If different to units_obs, the spectrum units will be converted. interp_order: int or tuple of int, optional, {-1,0,1} Interpolation mode for model interpolation. If a tuple of integers, the length should match the number of grid dimensions and will trigger a different interpolation mode for the different parameters. - -1: Order 1 spline interpolation in logspace for the parameter - 0: nearest neighbour model - 1: Order 1 spline interpolation Returns ------- out: float The log of the likelihood. """ if grid_param_list is not None: if model_grid is None and model_reader is None: msg = "model_name and model_reader must be provided" raise TypeError(msg) lbda_mod, spec_mod = make_model_from_params(params, labels, grid_param_list, dist, lbda_obs, model_grid, model_reader, em_lines, em_grid, dlbda_obs, instru_res, instru_idx, filter_reader, AV_bef_bb, units_obs, units_mod, interp_order) # evaluate the goodness of fit indicator chi = goodness_of_fit(lbda_obs, spec_obs, err_obs, lbda_mod, spec_mod, dlbda_obs=dlbda_obs, instru_corr=instru_corr, instru_res=instru_res, instru_idx=instru_idx, use_weights=use_weights, filter_reader=filter_reader, plot=False, outfile=None) # log likelihood lnlikelihood = -0.5 * chi return lnlikelihood
[docs]def lnprob(params, labels, bounds, grid_param_list, lbda_obs, spec_obs, err_obs, dist, model_grid=None, model_reader=None, em_lines={}, em_grid={}, dlbda_obs=None, instru_corr=None, instru_res=None, instru_idx=None, use_weights=True, filter_reader=None, AV_bef_bb=False, units_obs='si', units_mod='si', interp_order=1, priors=None, physical=True): """ Define the probability log-function as the sum between the prior and likelihood log-functions. Parameters ---------- params: tuple The model parameters. labels: Tuple of strings Tuple of labels in the same order as initial_state: - first all parameters related to loaded models (e.g. 'Teff', 'logg') - then the planet photometric radius 'R', in Jupiter radius - (optionally) the flux of emission lines (labels should match those \ in the ``em_lines`` dictionary), in units of the model spectrum (times mu) - (optionally) the optical extinction 'Av', in mag - (optionally) the ratio of total to selective optical extinction 'Rv' - (optionally) 'Tbb1', 'Rbb1', 'Tbb2', 'Rbb2', etc. for each extra bb \ contribution. bounds: dictionary Each entry should be associated with a tuple corresponding to lower and upper bounds respectively. Bounds should be provided for ALL model parameters, including 'R' (planet photometric radius). 'Av' (optical extinction) is optional. If provided here, Av will also be fitted. All keywords that are neither 'R', 'Av' nor 'M' will be considered model grid parameters. Examples: >>> bounds = {'Teff':(1000,2000), 'logg':(3.0,4.5), 'R':(0.1,5)} >>> bounds = {'Teff':(1000,2000), 'logg':(3.0,4.5), 'R':(0.1,5), >>> 'Av':(0.,2.5), 'Rv':(1,5)} >>> bounds = {'Teff':(1000,2000), 'logg':(3.0,4.5), 'R':(0.1,5), >>> 'Av':(0.,2.5), 'BrG':(1e-17,1e-15)} >>> bounds = {'Teff':(2000,3000), 'logg':(3.0,4.5), 'R':(1.,5.), >>> 'Tbb1':(500,1500), 'Rbb1':(0.1,1.)} grid_param_list : list of 1d numpy arrays/lists Should contain list/numpy 1d arrays with available grid of model parameters (should only contain the sampled parameters, not the models themselves). The models will be loaded with ``model_reader``. lbda_obs : numpy 1d ndarray or list Wavelengths of observed spectrum. If several instruments were used, the wavelengths should be ordered per instrument, not necessarily as monotonically increasing wavelength. Hereafter, :math:`n_{ch}` is the length of ``lbda_obs``. spec_obs : numpy 1d ndarray or list Observed spectrum for each value of ``lbda_obs``. Should have a length of :math:`n_{ch}`. err_obs : numpy 1d/2d ndarray or list Uncertainties on the observed spectrum. The array (list) can have either a length of :math:`n_{ch}`, or a shape of :math:`(2,n_{ch})` for lower (first column) and upper (second column) uncertainties provided. dist : float Distance in parsec, used for flux scaling of the models. model_grid : numpy N-d array, optional If provided, should contain the grid of model spectra for each free parameter of the given grid. I.e. for a grid of :math:`n_T` values of :math:`T_{eff}` and :math:`n_g` values of log(:math:`g`), the numpy array should have a shape of :math:`(n_T, n_g, n_{ch}, 2)`, where the last 2 dimensions correspond to wavelength and fluxes respectively. If provided, ``model_grid`` takes precedence over ``model_name``/ ``model_reader``. model_reader : python routine, opt External routine that reads a model file and returns a 2D numpy array, where the first column corresponds to wavelengths, and the second contains model values. See example routine in ``special.model_resampling.interpolate_model`` description. em_lines: dictionary, opt Dictionary of emission lines to be added on top of the model spectrum. Each dict entry should be the name of the line, assigned to a tuple of 4 values: 1. the wavelength (in :math:`\mu` m); 2. a string indicating whether line intensity is expressed in flux ('F'), luminosity ('L') or log(L/LSun) ("LogL"); 3. the FWHM of the gaussian (or None if to be set automatically); 4. whether the FWHM is expressed in 'nm', 'mu' or 'km/s'. The third and fourth can also be set to None. In that case, the FWHM of the gaussian will automatically be set to the equivalent width of the line, calculated from the flux to be injected and the continuum level (measured in the grid model to which the line is injected). Examples: >>> em_lines = {'BrG':(2.1667,'F',None, None)}; >>> em_lines = {'BrG':(2.1667,'LogL', 100, 'km/s')} em_grid: dictionary pointing to lists, opt Dictionary where each entry corresponds to an emission line and points to a list of values to inject for emission line fluxes. For computation efficiency, interpolation will be performed between the points of this grid during the MCMC sampling. Dictionary entries should match those in ``labels`` and ``em_lines``. Examples: >>> BrGmin, BrGmax = -5, 5 >>> em_grid = {'BrG': np.arange(BrGmin, BrGmax, 20)} >>> BrGmin, BrGmax = -5, 5 >>> PaBmin, PaBmax = -2, 7 >>> em_grid = {'PaB': np.arange(PaBmin, PaBmax, 20), >>> 'BrG': np.arange(BrGmin, BrGmax, 20)} dlbda_obs : numpy 1d ndarray or list, optional Respective spectral channel width or FWHM of the photometric filters used for each point of the observed spectrum. This vector is used to infer which part(s) of a combined spectro+photometric spectrum should involve convolution+subsampling (model resolution higher than measurements), interpolation (the opposite), or convolution by the transmission curve of a photometric filter. If not provided, it will be inferred from the difference between consecutive lbda_obs points (i.e. inaccurate for a combined spectrum). It must be provided IF one wants to weigh each measurement based on the spectral resolution of each instrument (as in [OLO16]_), through the ``use_weights`` argument. instru_corr : numpy 2d ndarray, optional Spectral correlation between post-processed images in which the spectrum is measured. It is specific to the instrument, PSF subtraction algorithm and radial separation of the companion from the central star. Can be computed using ``special.spec_corr.spectral_correlation``. In case of a spectrum obtained with different instruments, it is recommended to construct the final spectral_correlation matrix with ``special.spec_corr.combine_corrs``. If ``instru_corr`` is not provided, the uncertainties in each spectral channel will be considered independent. See [GRE16]_ for more details. instru_res : float or list of floats/strings, optional The mean instrumental resolving power(s) OR filter names. This is used to convolve the model spectrum. If several instruments are used, provide a list of resolving power values / filter names, one for each instrument used. instru_idx: numpy 1d array, optional 1d array containing an index representing each instrument used to obtain the spectrum, label them from 0 to the number of instruments (:math:`n_{ins}`). Zero for points that don't correspond to any of the ``instru_res`` values provided, and i in :math:`[1,n_{ins}]` for points associated to instru_res[i-1]. This parameter must be provided if the spectrum consists of points obtained with different instruments. use_weights: bool, optional For the likelihood calculation, whether to weigh each point of the spectrum based on the spectral resolution or bandwidth of photometric filters used. Weights will be proportional to ``dlbda_obs/lbda_obs`` if ``dlbda_obs`` is provided, or set to 1 for all points otherwise. filter_reader: python routine, optional External routine that reads a filter file and returns a 2D numpy array, where the first column corresponds to wavelengths, and the second contains transmission values. Important: if not provided, but strings are detected in instru_res, the default file reader will be used. It assumes the following format for the files: - first row contains headers (titles of each column) - starting from 2nd row: 1st column: wavelength, 2nd col.: transmission - Unit of wavelength can be provided in parentheses of first header \ key name: e.g. "WL(AA)" for angstrom, "wavelength(mu)" for micrometer \ or "lambda(nm)" for nanometer. Note: only what is in parentheses \ matters for the units. AV_bef_bb: bool, optional If both extinction and an extra bb component are free parameters, whether to apply extinction before adding the BB component (e.g. extinction mostly from circumplanetary dust) or after the BB component (e.g. mostly insterstellar extinction). units_obs : str, opt {'si','cgs','jy'} Units of observed spectrum. 'si' for W/m^2/mu; 'cgs' for ergs/s/cm^2/mu or 'jy' for janskys. units_mod: str, opt {'si','cgs','jy'} Units of the model. 'si' for W/m^2/mu; 'cgs' for ergs/s/cm^2/mu or 'jy' for janskys. If different to units_obs, the spectrum units will be converted. interp_order: int, opt, {-1,0,1} Interpolation mode for model interpolation. - -1: log interpolation (i.e. linear interpolatlion on log(Flux)) - 0: nearest neighbour model - 1: Order 1 spline interpolation priors: dictionary, opt If not None, sets prior estimates for each parameter of the model. Each entry should be set to either None (no prior) or a tuple of 2 elements containing prior estimate and uncertainty on the estimate. Missing entries (i.e. provided in ``bounds`` dictionary but not here) will be associated no prior. 'M' can be used for a prior on the mass of the planet. In that case the corresponding prior log probability is computed from the values for parameters 'logg' and 'R'. Examples: >>> priors = {'logg':(3.5,0.2), 'Rv':(3.1,0.2)} >>> priors = {'Teff':(1600,100), 'logg':(3.5,0.5), 'R':(1.6,0.1), >>> 'Av':(1.8,0.2), 'M':(10,3)} Important: dictionary entry names should match exactly those of ``bounds``. physical: bool, opt In case of extra black body component(s) to a photosphere, whether to force lower temperature than the photosphere effective temperature. Returns ------- out: float The probability log-function. """ lp = lnprior(params, labels, bounds, priors) if np.isinf(lp): return -np.inf if 'Tbb1' in labels: condT = ('T' in labels or 'Teff' in labels) n_bb = 0 for label in labels: if 'Tbb' in label: n_bb+=1 idx_Tbb1 = labels.index("Tbb1") for ii in range(n_bb): idx = ii*2 if grid_param_list is not None and condT: if 'T' in labels: idx_Teff = labels.index("T") else: idx_Teff = labels.index("Teff") # COND 1: only allow Tbb < Teff if physical and params[idx_Tbb1+idx] >= params[idx_Teff]: return -np.inf elif physical: idx_R= labels.index("R") R = params[idx_R] Rbb = params[idx_Tbb1+idx+1] if Rbb<=R: pass else: Teq = params[idx_Teff] * np.sqrt(R/(2*Rbb)) # COND 2: only allow Tbb <= Teq at Rbb if params[idx_Tbb1+idx] >= Teq: return -np.inf if ii>0: # avoid unnecessary degenerecaies by only allowing: # Teff1 > Teff2, Teff2 > Teff3 if params[idx_Tbb1+idx] >= params[idx_Tbb1+idx-2]: return -np.inf return lp + lnlike(params, labels, grid_param_list, lbda_obs, spec_obs, err_obs, dist, model_grid, model_reader, em_lines, em_grid, dlbda_obs, instru_corr, instru_res, instru_idx, use_weights, filter_reader, AV_bef_bb, units_obs, units_mod, interp_order)
[docs]def mcmc_spec_sampling(lbda_obs, spec_obs, err_obs, dist, grid_param_list, initial_state, labels, bounds, resamp_before=True, model_grid=None, model_reader=None, em_lines={}, em_grid={}, dlbda_obs=None, instru_corr=None, instru_res=None, instru_idx=None, use_weights=True, filter_reader=None, AV_bef_bb=False, units_obs='si', units_mod='si', interp_order=1, priors=None, physical=True, interp_nonexist=True, ini_ball=1e-1, a=2.0, nwalkers=1000, niteration_min=10, niteration_limit=1000, niteration_supp=0, check_maxgap=20, conv_test='ac', ac_c=50, ac_count_thr=3, burnin=0.3, rhat_threshold=1.01, rhat_count_threshold=1, grid_name='resamp_grid.fits', output_dir='special/', output_file=None, nproc=1, display=False, verbosity=0, save=False): """ Runs an affine invariant MCMC sampling algorithm in order to determine the most likely parameters for given spectral model and observed spectrum. The code relies on ``emcee`` ([FOR13]_ & [FOR19]_). Allowed features: * Spectral models can either be read from a grid (e.g. BT-SETTL) or \ be purely parametric (e.g. a blackbody model). * Extinction (A_V) and total-to-selective optical extinction ratio \ (R_V) can be sampled. Default: A_V=0. If non-zero, default R_V=3.1 (ISM). * A dictionary of emission lines can be provided and their flux can \ be sampled too. * Gaussian priors can be provided for each parameter, including the \ mass of the object. The latter will be used if 'logg' is a parameter. * Spectral correlation between measurements will be taken into account \ if provided in 'instru_corr'. * Convolution of the model spectra with instrumental FWHM or \ photometric filter can be performed using 'instru_res' and/or \ 'filter_reader' (done before resampling to observed). * The weight of each observed point will be directly proportional to \ Delta lbda_i/lbda_i, where Delta lbda_i is either the FWHM of the \ photometric filter (imager) or the width of the spectral channel (IFS). * MCMC convergence criterion can either be based on auto-correlation \ time (default) or the Gelman-Rubin test. The result of this procedure is a chain with the samples from the posterior distributions of each of the free parameters in the model. Parameters ---------- lbda_obs : numpy 1d ndarray or list Wavelengths of observed spectrum. If several instruments were used, the wavelengths should be ordered per instrument, not necessarily as monotonically increasing wavelength. Hereafter, :math:`n_{ch}` is the length of ``lbda_obs``. spec_obs : numpy 1d ndarray or list Observed spectrum for each value of ``lbda_obs``. Should have a length of :math:`n_{ch}`. err_obs : numpy 1d/2d ndarray or list Uncertainties on the observed spectrum. The array (list) can have either a length of :math:`n_{ch}`, or a shape of :math:`(2,n_{ch})` for lower (first column) and upper (second column) uncertainties provided. dist : float Distance in parsec, used for flux scaling of the models. grid_param_list : list of 1d numpy arrays/lists Should contain list/numpy 1d arrays with available grid of model parameters (should only contain the sampled parameters, not the models themselves). The models will be loaded with ``model_reader``. initial_state: tuple of floats Initial guess on the best fit parameters of the spectral fit. Length of the tuple should match the total number of free parameters. Walkers will all be initialised in a small ball of parameter space around that first guess. - first all parameters related to loaded models (e.g. 'Teff', 'logg') - then the planet photometric radius 'R', in Jupiter radius - (optionally) the intensity of emission lines (labels must match \ those in the em_lines dict), in units of the model spectrum (x mu) - (optionally) the optical extinction 'Av', in mag - (optionally) the ratio of total to selective optical extinction 'Rv' - (optionally) 'Tbb1', 'Rbb1', 'Tbb2', 'Rbb2', etc. for each extra bb \ contribution labels: Tuple of strings Tuple of labels in the same order as initial_state: - first all parameters related to loaded models (e.g. 'Teff', 'logg') - then the planet photometric radius 'R', in Jupiter radius - (optionally) the flux of emission lines (labels should match those \ in the em_lines dictionary), in units of the model spectrum (times mu) - (optionally) the optical extinction 'Av', in mag - (optionally) the ratio of total to selective optical extinction 'Rv' - (optionally) 'Tbb1', 'Rbb1', 'Tbb2', 'Rbb2', etc. for each extra bb \ contribution. bounds: dictionary Each entry should be associated with a tuple corresponding to lower and upper bounds respectively. Bounds should be provided for ALL model parameters, including 'R' (planet photometric radius). 'Av' (optical extinction) is optional. If provided here, Av will also be fitted. All keywords that are neither 'R', 'Av' nor 'M' will be considered model grid parameters. Examples: >>> bounds = {'Teff':(1000,2000), 'logg':(3.0,4.5), 'R':(0.1,5)} >>> bounds = {'Teff':(1000,2000), 'logg':(3.0,4.5), 'R':(0.1,5), >>> 'Av':(0.,2.5), 'Rv':(1,5)} >>> bounds = {'Teff':(1000,2000), 'logg':(3.0,4.5), 'R':(0.1,5), >>> 'Av':(0.,2.5), 'BrG':(1e-17,1e-15)} >>> bounds = {'Teff':(2000,3000), 'logg':(3.0,4.5), 'R':(1.,5.), >>> 'Tbb1':(500,1500), 'Rbb1':(0.1,1.)} resamp_before: bool, optional Whether to prepare the whole grid of resampled models before entering the MCMC, i.e. to avoid doing it at every MCMC step. Recommended. Only reason not to: model grid is too large and individual models require being opened and resampled at each step. model_grid : numpy N-d array, optional If provided, should contain the grid of model spectra for each free parameter of the given grid. I.e. for a grid of :math:`n_T` values of :math:`T_{eff}` and :math:`n_g` values of log(:math:`g`), the numpy array should have a shape of :math:`(n_T, n_g, n_{ch}, 2)`, where the last 2 dimensions correspond to wavelength and fluxes respectively. If provided, ``model_grid`` takes precedence over ``model_name``/ ``model_reader``. model_reader : python routine, opt External routine that reads a model file and returns a 2D numpy array, where the first column corresponds to wavelengths, and the second contains model values. See example routine in ``special.model_resampling.interpolate_model`` description. em_lines: dictionary, opt Dictionary of emission lines to be added on top of the model spectrum. Each dict entry should be the name of the line, assigned to a tuple of 4 values: 1. the wavelength (in :math:`\mu` m); 2. a string indicating whether line intensity is expressed in flux ('F'), luminosity ('L') or log(L/LSun) ("LogL"); 3. the FWHM of the gaussian (or None if to be set automatically); 4. whether the FWHM is expressed in 'nm', 'mu' or 'km/s'. The third and fourth can also be set to None. In that case, the FWHM of the gaussian will automatically be set to the equivalent width of the line, calculated from the flux to be injected and the continuum level (measured in the grid model to which the line is injected). Examples: >>> em_lines = {'BrG':(2.1667,'F',None, None)} >>> em_lines = {'BrG':(2.1667,'LogL', 100, 'km/s')} em_grid: dictionary pointing to lists, opt Dictionary where each entry corresponds to an emission line and points to a list of values to inject for emission line fluxes. For computation efficiency, interpolation will be performed between the points of this grid during the MCMC sampling. Dictionary entries should match those in ``labels`` and ``em_lines``. Examples: >>> BrGmin, BrGmax = -5, 5 >>> em_grid = {'BrG': np.arange(BrGmin, BrGmax, 20)} >>> BrGmin, BrGmax = -5, 5 >>> PaBmin, PaBmax = -2, 7 >>> em_grid = {'PaB': np.arange(PaBmin, PaBmax, 20), >>> 'BrG': np.arange(BrGmin, BrGmax, 20)} dlbda_obs : numpy 1d ndarray or list, optional Respective spectral channel width or FWHM of the photometric filters used for each point of the observed spectrum. This vector is used to infer which part(s) of a combined spectro+photometric spectrum should involve convolution+subsampling (model resolution higher than measurements), interpolation (the opposite), or convolution by the transmission curve of a photometric filter. If not provided, it will be inferred from the difference between consecutive lbda_obs points (i.e. inaccurate for a combined spectrum). It must be provided IF one wants to weigh each measurement based on the spectral resolution of each instrument (as in [OLO16]_), through the ``use_weights`` argument. instru_corr : numpy 2d ndarray, optional Spectral correlation between post-processed images in which the spectrum is measured. It is specific to the instrument, PSF subtraction algorithm and radial separation of the companion from the central star. Can be computed using ``special.spec_corr.spectral_correlation``. In case of a spectrum obtained with different instruments, it is recommended to construct the final spectral_correlation matrix with ``special.spec_corr.combine_corrs``. If ``instru_corr`` is not provided, the uncertainties in each spectral channel will be considered independent. See [GRE16]_ for more details. instru_res : float or list of floats/strings, optional The mean instrumental resolving power(s) OR filter names. This is used to convolve the model spectrum. If several instruments are used, provide a list of resolving power values / filter names, one for each instrument used. instru_idx: numpy 1d array, optional 1d array containing an index representing each instrument used to obtain the spectrum, label them from 0 to the number of instruments (:math:`n_{ins}`). Zero for points that don't correspond to any of the ``instru_res`` values provided, and i in :math:`[1,n_{ins}]` for points associated to instru_res[i-1]. This parameter must be provided if the spectrum consists of points obtained with different instruments. use_weights: bool, optional For the likelihood calculation, whether to weigh each point of the spectrum based on the spectral resolution or bandwidth of photometric filters used. Weights will be proportional to ``dlbda_obs/lbda_obs`` if ``dlbda_obs`` is provided, or set to 1 for all points otherwise. filter_reader: python routine, optional External routine that reads a filter file and returns a 2D numpy array, where the first column corresponds to wavelengths, and the second contains transmission values. Important: if not provided, but strings are detected in instru_res, the default file reader will be used. It assumes the following format for the files: - first row contains headers (titles of each column) - starting from 2nd row: 1st column: wavelength, 2nd col.: transmission - Unit of wavelength can be provided in parentheses of first header \ key name: e.g. "WL(AA)" for angstrom, "wavelength(mu)" for micrometer \ or "lambda(nm)" for nanometer. Note: only what is in parentheses \ matters for the units. AV_bef_bb: bool, optional If both extinction and an extra bb component are free parameters, whether to apply extinction before adding the BB component (e.g. extinction mostly from circumplanetary dust) or after the BB component (e.g. mostly insterstellar extinction). units_obs : str, opt {'si','cgs','jy'} Units of observed spectrum. 'si' for W/m^2/mu; 'cgs' for ergs/s/cm^2/mu or 'jy' for janskys. units_mod: str, opt {'si','cgs','jy'} Units of the model. 'si' for W/m^2/mu; 'cgs' for ergs/s/cm^2/mu or 'jy' for janskys. If different to units_obs, the spectrum units will be converted. interp_order: int, opt, {-1,0,1} Interpolation mode for model interpolation. - -1: log interpolation (i.e. linear interpolatlion on log(Flux)) - 0: nearest neighbour model - 1: Order 1 spline interpolation priors: dictionary, opt If not None, sets prior estimates for each parameter of the model. Each entry should be set to either None (no prior) or a tuple of 2 elements containing prior estimate and uncertainty on the estimate. Missing entries (i.e. provided in ``bounds`` dictionary but not here) will be associated no prior. 'M' can be used for a prior on the mass of the planet. In that case the corresponding prior log probability is computed from the values for parameters 'logg' and 'R'. Examples: >>> priors = {'logg':(3.5,0.2), 'Rv':(3.1,0.2)} >>> priors = {'Teff':(1600,100), 'logg':(3.5,0.5), 'R':(1.6,0.1), >>> 'Av':(1.8,0.2), 'M':(10,3)} Important: dictionary entry names should match exactly those of ``bounds``. physical: bool, opt In case of extra black body component(s) to a photosphere, whether to force lower temperature than the photosphere effective temperature. interp_nonexist: bool, opt Whether to interpolate non-existing models in the grid. Only used if resamp_before is set to True. ini_ball: float or string, default=1e-1 Size of the initial ball in parameter space from which walkers start their chain. If "uniform" is provided, a uniform ini_ball spanning the bounds interval will be used to initialise walkers. a: float, default=2.0 The proposal scale parameter. See notes. nwalkers: int, default: 1000 Number of walkers niteration_min: int, optional Steps per walker lower bound. The simulation will run at least this number of steps per walker. niteration_limit: int, optional Steps per walker upper bound. If the simulation runs up to 'niteration_limit' steps without having reached the convergence criterion, the run is stopped. niteration_supp: int, optional Number of iterations to run after having "reached the convergence". burnin: float, default=0.3 The fraction of a walker which is discarded. rhat_threshold: float, default=0.01 The Gelman-Rubin threshold used for the test for nonconvergence. rhat_count_threshold: int, optional The Gelman-Rubin test must be satisfied 'rhat_count_threshold' times in a row before claiming that the chain has converged. check_maxgap: int, optional Maximum number of steps per walker between two convergence tests. conv_test: str, optional {'gb','autocorr'} Method to check for convergence: - 'gb' for gelman-rubin test \ (http://digitalassets.lib.berkeley.edu/sdtr/ucb/text/305.pdf) - 'autocorr' for autocorrelation analysis \ (https://emcee.readthedocs.io/en/stable/tutorials/autocorr/) nproc: int, optional The number of processes to use for parallelization. grid_name: str, optional Name of the fits file containing the model grid (numpy array) AFTER convolution+resampling as the observed spectrum given as input. If provided, will read it if it exists (and resamp_before is set to True), or make it and write it if it doesn't. output_dir: str, optional The name of the output directory which contains the output files in the case ``save`` is True. output_file: str, optional The name of the output file which contains the MCMC results in the case ``save`` is True. display: bool, optional If True, the walk plot is displayed at each evaluation of the Gelman- Rubin test. verbosity: 0, 1 or 2, optional Verbosity level. 0 for no output and 2 for full information. save: bool, optional If True, the MCMC results are pickled. Returns ------- out : numpy.array The MCMC samples after truncation of zeros. lnprobability: emcee sample object The corresponding probabilities for each sample Note ---- - The parameter `a` must be > 1. For more theoretical information concerning this parameter, see [GOO10]_. - The parameter 'rhat_threshold' can be a numpy.array with individual threshold value for each model parameter. - If several filter filenames are provided in ``instru_res``, the filter files must all have the same format and wavelength units (for reading by the same ``filter_reader`` snippet or default function). - ``grid_param_list`` and ``model_grid`` shouldn't contain grids on radius and Av. For a combined grid model + black body fit, just provide the list of parameters probed by the grid to ``grid_param_list``, and provide values for 'Tbbn' and 'Rbbn' to ``initial_state``, ``labels`` and ``bounds``. Examples -------- This is a minimal example for the file_reader routine to be provided \ as argument to model_interpolation. The routine should only take as \ inputs grid parameters, and returns as output: both the wavelengths and \ model values as a 2D numpy array.\ This example assumes the model is in a fits file, that is already a 2D\ numpy array, where the first column is the wavelength, and 2nd column \ is the corresponding model values. >>> def _example_file_reader(params): >>> model = open_fits(filename.format(params[0],params[1])) >>> return model """ nparams = len(initial_state) if grid_param_list is not None: if model_grid is None and model_reader is None: msg = "Either model_grid or model_reader have to be provided" raise TypeError(msg) n_gparams = len(grid_param_list) gp_dims = [] for nn in range(n_gparams): gp_dims.append(len(grid_param_list[nn])) gp_dims = tuple(gp_dims) else: n_gparams = 0 # format emission line dictionary and em grid if len(em_grid)>0: n_em = len(em_grid) em_dims = [] for lab in labels: if lab in em_grid.keys(): em_dims.append(len(em_grid[lab])) em_dims = tuple(em_dims) # update the grids depending on input units => make it surface flux idx_R = labels.index('R') for key, val in em_lines.items(): if val[1] == 'L': idx_line = labels.index(key) # adapt grid R_si = initial_state[idx_R]*con.R_jup.value conv_fac = 4*np.pi*R_si**2 tmp = np.array(em_grid[key])/conv_fac em_grid[key] = tmp.tolist() # adapt ini state initial_state = list(initial_state) initial_state[idx_line] /= conv_fac initial_state = tuple(initial_state) #adapt bounds bounds_ori = list(bounds[key]) bounds[key] = (bounds_ori[0]/conv_fac, bounds_ori[1]/conv_fac) elif val[1] == 'LogL': idx_line = labels.index(key) R_si = initial_state[idx_R]*con.R_jup.value conv_fac = con.L_sun.value/(4*np.pi*R_si**2) tmp = np.power(10,np.array(em_grid[key]))*conv_fac em_grid[key] = tmp.tolist() # adapt ini state initial_state = list(initial_state) initial_state[idx_line] = conv_fac*10**initial_state[idx_line] initial_state = tuple(initial_state) #adapt bounds bounds_ori = list(bounds[key]) bounds[key] = (conv_fac*10**bounds_ori[0], conv_fac*10**bounds_ori[1]) if em_lines[key][2] is not None: if em_lines[key][-1] == 'km/s': v = em_lines[key][2] em_lines_tmp = list(em_lines[key]) em_lines_tmp[2] = (1000*v/con.c.value)*em_lines[key][0] em_lines_tmp[3] = 'mu' em_lines[key] = tuple(em_lines_tmp) elif em_lines[key][-1] == 'nm': em_lines_tmp = list(em_lines[key]) em_lines_tmp[2] = em_lines[key][2]/1000 em_lines_tmp[3] = 'mu' em_lines[key] = tuple(em_lines_tmp) elif em_lines[key][-1] != 'mu': msg = "Invalid unit of FWHM for line injection" raise ValueError(msg) if model_grid is not None and grid_param_list is not None: if model_grid.ndim-2 != n_gparams: msg = "Ndim-2 of model_grid should match len(grid_param_list)" raise TypeError(msg) if verbosity > 0: start_time = time_ini() print(" MCMC sampler for spectral fitting ") print(sep) # If required, create the output folder. if save or (resamp_before and grid_param_list is not None): if not output_dir: raise ValueError('Please provide an output directory path') if not isdir(output_dir): os.makedirs(output_dir) if output_dir[-1] != '/': output_dir = output_dir+'/' #output_file_tmp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S") # Check model grid parameters extend beyond bounds to avoid extrapolation if grid_param_list is not None: for pp in range(n_gparams): if grid_param_list[pp][0]>bounds[labels[pp]][0]: msg= "Grid has to extend beyond bounds for {}." msg+="\n Consider increasing the lower bound to >{}." raise ValueError(msg.format(labels[pp],grid_param_list[pp][0])) if grid_param_list[pp][-1]<bounds[labels[pp]][1]: msg= "Grid has to extend beyond bounds for {}." msg+="\n Consider decreasing the upper bound to <{}." raise ValueError(msg.format(labels[pp],grid_param_list[pp][1])) # Check initial state is within bounds for all params (not only grid) for pp in range(nparams): if initial_state[pp]<bounds[labels[pp]][0]: msg= "Initial state has to be within bounds for {}." msg+="\n Consider decreasing the lower bound to <{}." raise ValueError(msg.format(labels[pp],initial_state[pp])) if initial_state[pp]>bounds[labels[pp]][1]: msg= "Initial state has to be within bounds for {}." msg+="\n Consider decreasing the upper bound to >{}." raise ValueError(msg.format(labels[pp],initial_state[pp])) # Prepare model grid: convolve+resample models as observations if resamp_before and grid_param_list is not None: if isfile(output_dir+grid_name): model_grid = open_fits(output_dir+grid_name) # check its shape is consistent with grid_param_list if model_grid.shape[:n_gparams] != gp_dims: msg="the loaded model grid ({}) doesn't have expected dims ({})" raise TypeError(msg.format(model_grid.shape,gp_dims)) elif model_grid.shape[-2] != len(lbda_obs): msg="the loaded model grid doesn't have expected WL dimension" raise TypeError(msg) elif model_grid.shape[-1] != 2: msg="the loaded model grid doesn't have expected last dimension" raise TypeError(msg) elif len(em_grid) > 0: if model_grid.shape[n_gparams:n_gparams+n_em] != em_dims: msg="loaded model grid ({}) doesn't have expected dims ({})" raise TypeError(msg.format(model_grid.shape,em_dims)) else: model_grid = make_resampled_models(lbda_obs, grid_param_list, model_grid, model_reader, em_lines, em_grid, dlbda_obs, instru_res, instru_idx, filter_reader, interp_nonexist) if output_dir and grid_name: write_fits(output_dir+grid_name, model_grid) # note: if model_grid is provided, it is still resampled to the # same wavelengths as observed spectrum. However, if a fits name is # provided in grid_name and that file exists, it is assumed the model # grid in it is already resampled to match lbda_obs. if save and output_file is None: output_file = 'MCMC_results' # ######################################################################### # Initialization of the MCMC variables # # ######################################################################### dim = len(labels) itermin = niteration_min limit = niteration_limit supp = niteration_supp maxgap = check_maxgap initial_state = np.array(initial_state) if itermin > limit: itermin = 0 fraction = 0.3 geom = 0 lastcheck = 0 konvergence = np.inf rhat_count = 0 ac_count = 0 chain = np.empty([nwalkers, 1, dim]) ar_frac = np.empty([nwalkers, 1]) nIterations = limit + supp rhat = np.zeros(dim) stop = np.inf # initialise ball of walkers if ini_ball == "uniform": pos = np.zeros([nwalkers, dim]) for ii in range(dim): low_b = bounds[labels[ii]][0] up_b = (bounds[labels[ii]][1]) pos[:,ii] = np.random.uniform(low_b + 0.01*(up_b-low_b), up_b, nwalkers) elif isinstance(ini_ball,float): pos = initial_state + np.random.normal(0, ini_ball, (nwalkers, dim)) else: raise ValueError("ini_ball must be string or float") sampler = emcee.EnsembleSampler(nwalkers, dim, lnprob, a=a, args=([labels, bounds, grid_param_list, lbda_obs, spec_obs, err_obs, dist, model_grid, model_reader, em_lines, em_grid, dlbda_obs, instru_corr, instru_res, instru_idx, use_weights, filter_reader, AV_bef_bb, units_obs, units_mod, interp_order, priors, physical]), threads=nproc) start = datetime.datetime.now() # ######################################################################### # Affine Invariant MCMC run # ######################################################################### if verbosity > 1: print('\nStart of the MCMC run ...') print('Step | Duration/step (sec) | Remaining Estimated Time (sec)') for k, res in enumerate(sampler.sample(pos, iterations=nIterations)): elapsed = (datetime.datetime.now()-start).total_seconds() if verbosity > 1: if k == 0: q = 0.5 else: q = 1 print('{}\t\t{:.5f}\t\t\t{:.5f}'.format(k, elapsed * q, elapsed * (limit-k-1) * q)) start = datetime.datetime.now() # --------------------------------------------------------------------- # Store the state manually in order to handle with dynamical sized chain # --------------------------------------------------------------------- # Check if the size of the chain is long enough. s = chain.shape[1] if k+1 > s: # if not, one doubles the chain length empty = np.zeros([nwalkers, 2*s, dim]) chain = np.concatenate((chain, empty), axis=1) empty = np.zeros([nwalkers, 2*s]) ar_frac = np.concatenate((ar_frac, empty), axis=1) # Store the state of the chain chain[:, k] = res[0] ar_frac[:,k] = sampler.acceptance_fraction # --------------------------------------------------------------------- # If k meets the criterion, one tests the non-convergence. # --------------------------------------------------------------------- criterion = int(np.amin([np.ceil(itermin*(1+fraction)**geom), lastcheck+np.floor(maxgap)])) if k == criterion: geom += 1 lastcheck = k if display: show_walk_plot(chain) # We only test the rhat if we have reached the min # of steps if (k+1) >= itermin and konvergence == np.inf: if verbosity > 1: print('\n{} convergence test in progress...'.format(conv_test)) if conv_test == 'gb': thr0 = int(np.floor(burnin*k)) thr1 = int(np.floor((1-burnin)*k*0.25)) # We calculate the rhat for each model parameter. for j in range(dim): part1 = chain[:, thr0:thr0 + thr1, j].reshape(-1) part2 = chain[:, thr0 + 3 * thr1:thr0 + 4 * thr1, j ].reshape(-1) series = np.vstack((part1, part2)) rhat[j] = gelman_rubin(series) if verbosity > 0: print(' r_hat = {}'.format(rhat)) cond = rhat <= rhat_threshold print(' r_hat <= threshold = {} \n'.format(cond)) # We test the rhat. if (rhat <= rhat_threshold).all(): rhat_count += 1 if rhat_count < rhat_count_threshold: if verbosity > 0: msg = "Gelman-Rubin test OK {}/{}" print(msg.format(rhat_count, rhat_count_threshold)) elif rhat_count >= rhat_count_threshold: if verbosity > 0: print('... ==> convergence reached') konvergence = k stop = konvergence + supp else: rhat_count = 0 elif conv_test == 'ac': # We calculate the auto-corr test for each model parameter. for j in range(dim): rhat[j] = autocorr_test(chain[:,:k,j]) thr = 1./ac_c if verbosity > 0: print('Auto-corr tau/N = {}'.format(rhat)) print('tau/N <= {} = {} \n'.format(thr, rhat<thr)) if (rhat <= thr).all(): ac_count+=1 if verbosity > 0: msg = "Auto-correlation test passed for all params!" msg+= "{}/{}".format(ac_count,ac_count_thr) print(msg) if ac_count >= ac_count_thr: msg='\n ... ==> convergence reached' print(msg) stop = k else: ac_count = 0 else: raise ValueError('conv_test value not recognized') if save and verbosity == 3: ac_time = [] for j in range(dim): ac_time.append(k*autocorr_test(chain[:,:k,j])) fname = '{d}/{f}_temp_k{k}'.format(d=output_dir, f=output_file, k=k) data = {'chain': sampler.chain, 'lnprob': sampler.lnprobability, 'ac_time': ac_time, 'AR': ar_frac} with open(fname, 'wb') as fileSave: pickle.dump(data, fileSave) # We have reached convergence if k+1 >= stop: if verbosity > 0: print('We break the loop because we have reached convergence') break # We have reached the maximum number of steps for our Markov chain. if k+1 >= nIterations-1: # break to avoid a bug related to font type break isamples, ln_proba, ar = chain_zero_truncated(chain, sampler.lnprobability, ar_frac) # update units in the chain if needed if len(em_grid)>0: for key, val in em_lines.items(): idx_line = labels.index(key) if val[1] == 'L': R_si = isamples[:,:,idx_R]*con.R_jup.value isamples[:,:,idx_line] *= 4*np.pi*R_si**2 elif val[1] == 'LogL': R_si = isamples[:,:,idx_R]*con.R_jup.value conv_fac = 4*np.pi*R_si**2/con.L_sun.value isamples[:,:,idx_line] = np.log10(isamples[:,:,idx_line]*conv_fac) if k == nIterations-1: if verbosity > 0: print("We have reached the limit # of steps without convergence") print("You may have to increase niteration_limit") # ######################################################################### # Construction of the independent samples # ######################################################################### if save: frame = inspect.currentframe() args, _, _, values = inspect.getargvalues(frame) input_parameters = {j: values[j] for j in args[1:]} ac_time = [] for j in range(dim): ac_time.append(isamples.shape[1]*autocorr_test(isamples[:,:,j])) output = {'isamples': isamples, #'chain': chain, 'input_parameters': input_parameters, 'AR': ar, 'ac_time': ac_time, 'lnprobability': ln_proba} with open(output_dir+output_file, 'wb') as fileSave: pickle.dump(output, fileSave) msg = "\nThe file MCMC_results has been stored in the folder {}" print(msg.format(output_dir)) if verbosity > 0: timing(start_time) return isamples, ln_proba
[docs]def chain_zero_truncated(chain, ln_proba=None, ar=None): """ Return the Markov chain with the dimension: walkers x steps* x parameters, where steps* is the last step before having 0 (not yet constructed chain). Parameters ---------- chain: numpy.array The MCMC chain. ln_proba: numpy.array, opt Corresponding ln-probabilities. Returns ------- out: numpy.array The truncated MCMC chain, that is to say, the chain which only contains relevant information. out_ln_proba: numpy.array If ln_proba is provided as input, out_ln_proba contains the zero-truncated ln-proba (i.e. matching shape with non-zero samples) """ try: idxzero = np.where(chain[0, :, 0] == 0.0)[0][0] except: idxzero = chain.shape[1] res = [chain[:, 0:idxzero, :]] if ln_proba is not None: res.append(ln_proba[:,0:idxzero]) if ar is not None: res.append(ar[:,0:idxzero]) if len(res) == 1: return res[0] else: return tuple(res)
[docs]def show_walk_plot(chain, labels, save=False, output_dir='', ntrunc=100, **kwargs): """ Display/save a figure showing the path of each walker during the MCMC run. Parameters ---------- chain: numpy.array The Markov chain. The shape of chain must be nwalkers x length x dim. If a part of the chain is filled with zero values, the method will discard these steps. labels: Tuple of strings Tuple of labels in the same order as initial_state: - first all parameters related to loaded models (e.g. 'Teff', 'logg') - then the planet photometric radius 'R', in Jupiter radius - (optionally) the flux of emission lines (labels should match those \ in the em_lines dictionary), in units of the model spectrum (times mu) - (optionally) the optical extinction 'Av', in mag - (optionally) the ratio of total to selective optical extinction 'Rv' - (optionally) 'Tbb1', 'Rbb1', 'Tbb2', 'Rbb2', etc. for each extra bb \ contribution. save: boolean, default: False If True, a pdf file is created. output_dir: str, optional The name of the output directory which contains the output files in the case ``save`` is True. ntrunc: int, opt max number of walkers plotted. If too many walkers the plot will become too voluminous and too crowded. Plot will be truncated to only ntrunc first walkers kwargs: Additional attributes are passed to the matplotlib plot method. Returns ------- Display the figure or create a pdf file named walk_plot.pdf in the working directory. """ npar = chain.shape[-1] if len(labels) != npar: raise ValueError("Labels length should match chain last dimension") temp = np.where(chain[0, :, 0] == 0.0)[0] if len(temp) != 0: chain = chain[:, :temp[0], :] color = kwargs.pop('color', 'k') alpha = kwargs.pop('alpha', 0.1) filename = kwargs.pop('filename','walk_plot.pdf') #labels = kwargs.pop('labels', ["$r$", r"$\theta$", "$f$"]) fig, axes = plt.subplots(npar, 1, sharex=True, figsize=kwargs.pop('figsize', (8, int(2*npar)))) if npar>1: axes[-1].set_xlabel(kwargs.pop('xlabel', 'step number')) axes[-1].set_xlim(kwargs.pop('xlim', [0, chain.shape[1]])) for j in range(npar): axes[j].plot(chain[:ntrunc, :, j].T, color=color, alpha=alpha, **kwargs) axes[j].yaxis.set_major_locator(MaxNLocator(5)) axes[j].set_ylabel(labels[j]) else: axes.set_xlabel(kwargs.pop('xlabel', 'step number')) axes.set_xlim(kwargs.pop('xlim', [0, chain.shape[1]])) for j in range(npar): axes.plot(chain[:ntrunc, :, j].T, color=color, alpha=alpha, **kwargs) axes.yaxis.set_major_locator(MaxNLocator(5)) axes.set_ylabel(labels[j]) fig.tight_layout(h_pad=0) if save: plt.savefig(output_dir+filename) plt.close(fig) else: plt.show()
[docs]def show_corner_plot(chain, burnin=0.5, save=False, output_dir='', mcmc_res=None, units=None, ndig=None, labels_plot=None, plot_name='corner_plot.pdf', **kwargs): """ Display/save a figure showing the corner plot (pdfs + correlation plots). Parameters ---------- chain: numpy.array The Markov chain. The shape of chain must be nwalkers x length x dim. If a part of the chain is filled with zero values, the method will discard these steps. burnin: float, default: 0 The fraction of a walker we want to discard. save: boolean, default: False If True, a pdf file is created. output_dir: str, optional The name of the output directory which contains the output files in the case ``save`` is True. mcmc_res: numpy array OR tuple of 2 dictionaries/np.array, opt Values to be printed on top of each 1d posterior distribution * if numpy array: \ - shape :math:`(n_{par},3)`, with :math:`(n_{par}` the number of \ parameters, with the first, second and third rows containing the \ most likely value of each parameter and the lower and upper \ uncertainties at the desired quantiles, respectively. - shape :math:`(n_{par},2)`: same as above but with a single value \ for the uncertainty. * if tuple of 2 dictionaries: output of \ ``special.mcm_sampling.spec_confidence`` without gaussian fit. * if tuple of 2 np.array: output of \ ``special.mcm_sampling.spec_confidence`` with gaussian fit. units: tuple, opt Tuple of strings containing units for each parameter. If provided, mcmc_res will be printed on top of each 1d posterior distribution along with these units. ndig: tuple, opt Number of digits precision for each printed parameter labels_plot: tuple, opt Labels corresponding to parameter names, used for the plot. If None, will use ``label`` passed in kwargs. kwargs: Additional attributes passed to the ``corner.corner`` method. (e.g. ``labels``, ``labels_tit``, ``labels_tit_unit``, ``title_kwargs``) Returns ------- Display the figure or create a pdf file named walk_plot.pdf in the working directory. Raises ------ ImportError """ npar = chain.shape[-1] if "labels" in kwargs.keys(): labels = kwargs.pop('labels') if labels_plot is None: labels_plot = labels else: labels = None # allows for different title/units on top of 1D posterior distributions labels_tit = kwargs.pop('labels_tit', labels_plot) labels_tit_unit = kwargs.pop('labels_tit_unit', units) try: temp = np.where(chain[0, :, 0] == 0.0)[0] if len(temp) != 0: chain = chain[:, :temp[0], :] length = chain.shape[1] indburn = int(np.floor(burnin*(length-1))) chain = chain[:, indburn:length, :].reshape((-1, npar)) except IndexError: pass if chain.shape[0] == 0: raise ValueError("It seems the chain is empty. Have you run the MCMC?") else: fig = corner.corner(chain, labels=labels_plot, **kwargs) if mcmc_res is not None and labels is not None: title_kwargs = kwargs.pop('title_kwargs', {}) if isinstance(mcmc_res,tuple): if len(mcmc_res) != 2: msg = "mcmc_res should have 2 elements" raise TypeError(msg) if len(mcmc_res[0]) != npar or len(mcmc_res[1]) != npar: msg = "dict should have as many entries as there are params" raise TypeError(msg) if labels is None: msg = "labels should be provided" raise TypeError(msg) elif isinstance(mcmc_res,np.ndarray): if mcmc_res.shape[0] != npar: msg = "mcmc_res first dim should be equal to number of params" raise TypeError(msg) else: msg = "type of mcmc_res not recognised" raise TypeError(msg) # Extract the axes axes = np.array(fig.axes).reshape((npar, npar)) # Loop over the diagonal for i in range(npar): ax = axes[i, i] title = None if isinstance(mcmc_res,tuple): if isinstance(mcmc_res[0],dict): q_50 = mcmc_res[0][labels[i]] q_m = mcmc_res[1][labels[i]][0] q_p = mcmc_res[1][labels[i]][1] else: q_50 = mcmc_res[0][i][0] q_m = mcmc_res[1][i][0] q_p = q_m else: q_50 = mcmc_res[i,0] q_m = mcmc_res[i,1] if mcmc_res.shape[1]==3: q_p = mcmc_res[i,2] else: q_p = np.abs(q_m) q_m = -np.abs(q_m) # Format the quantile display. try: fmt = "{{:.{0}f}}".format(ndig[i]).format except: fmt = "{{:.2f}}".format title = r"${{{0}}}_{{{1}}}^{{+{2}}}$" title = title.format(fmt(q_50), fmt(q_m), fmt(q_p)) # Add in the column name if it's given. try: title = "{0} = {1} {2}".format(labels_tit[i], title, labels_tit_unit[i]) except: title = "{0} = {1}".format(labels_tit[i], title) ax.set_title(title, **title_kwargs) if save: plt.savefig(output_dir+plot_name) plt.close(fig) else: plt.show()
[docs]def confidence(isamples, labels, cfd=68.27, bins=100, gaussian_fit=False, weights=None, verbose=True, save=False, output_dir='', bounds=None, priors=None, **kwargs): """ Determine the highly probable value for each model parameter, as well as the 1-sigma confidence interval. Parameters ---------- isamples: numpy.array The independent samples for each model parameter. labels: Tuple of strings Tuple of labels in the same order as initial_state: - first all parameters related to loaded models (e.g. 'Teff', 'logg') - then the planet photometric radius 'R', in Jupiter radius - (optionally) the flux of emission lines (labels should match those \ in the em_lines dictionary), in units of the model spectrum (times mu) - (optionally) the optical extinction 'Av', in mag - (optionally) the ratio of total to selective optical extinction 'Rv' - (optionally) 'Tbb1', 'Rbb1', 'Tbb2', 'Rbb2', etc. for each extra bb \ contribution. cfd: float, optional The confidence level given in percentage. bins: int, optional The number of bins used to sample the posterior distributions. gaussian_fit: boolean, optional If True, a gaussian fit is performed in order to determine (\mu,\sigma) weights : 1d numpy ndarray or None, optional An array of weights for each sample. verbose: boolean, optional Display information in the shell. save: boolean, optional If "True", a txt file with the results is saved in the output repository. bounds: dictionary, opt Only used if a text file is saved summarizing results+bounds+priors. Should be the same ``bounds`` as provided to the MCMC. priors: dictionary, opt Only used if a text file is saved summarizing results+bounds+priors. Should be the same ``priors`` as provided to the MCMC. kwargs: optional Additional attributes are passed to the matplotlib hist() method. Returns ------- out: tuple A 2 elements tuple with the highly probable solution and the confidence interval. """ title = kwargs.pop('title', None) output_file = kwargs.pop('filename', 'confidence.txt') if gaussian_fit: output_pdf = kwargs.pop('pdfname','confi_hist_spec_params_gaussfit.pdf') else: output_pdf = kwargs.pop('pdfname','confi_hist_spec_params.pdf') try: l = isamples.shape[1] except: l = 1 confidenceInterval = {} val_max = {} pKey = labels #['r', 'theta', 'f'] if l != len(pKey): raise ValueError("Labels length should match chain last dimension") if cfd == 100: cfd = 99.9 ######################################### ## Determine the confidence interval ## ######################################### if gaussian_fit: mu = np.zeros(l) sigma = np.zeros_like(mu) if gaussian_fit: fig, ax = plt.subplots(2, l, figsize=(int(4*l),8)) else: fig, ax = plt.subplots(1, l, figsize=(int(4*l),4)) for j in range(l): if gaussian_fit: n, bin_vertices, _ = ax[0][j].hist(isamples[:,j], bins=bins, weights=weights, histtype='step', edgecolor='gray') else: n, bin_vertices, _ = ax[j].hist(isamples[:,j], bins=bins, weights=weights, histtype='step', edgecolor='gray') bins_width = np.mean(np.diff(bin_vertices)) surface_total = np.sum(np.ones_like(n)*bins_width * n) n_arg_sort = np.argsort(n)[::-1] test = 0 pourcentage = 0 for k, jj in enumerate(n_arg_sort): test = test + bins_width*n[int(jj)] pourcentage = test/surface_total*100 if pourcentage > cfd: if verbose: msg = 'percentage for {}: {}%' print(msg.format(pKey[j], pourcentage)) break n_arg_min = int(n_arg_sort[:k].min()) n_arg_max = int(n_arg_sort[:k+1].max()) if n_arg_min == 0: n_arg_min += 1 if n_arg_max == bins: n_arg_max -= 1 val_max[pKey[j]] = bin_vertices[int(n_arg_sort[0])]+bins_width/2. confidenceInterval[pKey[j]] = np.array([bin_vertices[n_arg_min-1], bin_vertices[n_arg_max+1]] - val_max[pKey[j]]) arg = (isamples[:, j] >= bin_vertices[n_arg_min - 1]) * \ (isamples[:, j] <= bin_vertices[n_arg_max + 1]) if gaussian_fit: ax[0][j].hist(isamples[arg,j], bins=bin_vertices, facecolor='gray', edgecolor='darkgray', histtype='stepfilled', alpha=0.5) ax[0][j].vlines(val_max[pKey[j]], 0, n[int(n_arg_sort[0])], linestyles='dashed', color='red') ax[0][j].set_xlabel(pKey[j]) if j == 0: ax[0][j].set_ylabel('Counts') mu[j], sigma[j] = norm.fit(isamples[:, j]) n_fit, bins_fit = np.histogram(isamples[:, j], bins, density=1, weights=weights) ax[1][j].hist(isamples[:, j], bins, density=1, weights=weights, facecolor='gray', edgecolor='darkgray', histtype='step') y = norm.pdf(bins_fit, mu[j], sigma[j]) ax[1][j].plot(bins_fit, y, 'r--', linewidth=2, alpha=0.7) ax[1][j].set_xlabel(pKey[j]) if j == 0: ax[1][j].set_ylabel('Counts') if title is not None: msg = r"{} $\mu$ = {:.4f}, $\sigma$ = {:.4f}" ax[1][j].set_title(msg.format(title, mu[j], sigma[j]), fontsize=10) else: ax[j].hist(isamples[arg,j],bins=bin_vertices, facecolor='gray', edgecolor='darkgray', histtype='stepfilled', alpha=0.5) ax[j].vlines(val_max[pKey[j]], 0, n[int(n_arg_sort[0])], linestyles='dashed', color='red') ax[j].set_xlabel(pKey[j]) if j == 0: ax[j].set_ylabel('Counts') if title is not None: msg = r"{} - {:.3f} {:.3f} +{:.3f}" ax[1].set_title(msg.format(title, val_max[pKey[j]], confidenceInterval[pKey[j]][0], confidenceInterval[pKey[j]][1]), fontsize=10) plt.tight_layout(w_pad=0.1) if save: plt.savefig(output_dir+output_pdf) if verbose: for j in range(l): print("******* Results for {} ***** ".format(pKey[j])) print('\n\nConfidence intervals:') print('{}: {} [{},{}]'.format(pKey[j], val_max[pKey[j]], confidenceInterval[pKey[j]][0], confidenceInterval[pKey[j]][1])) if gaussian_fit: print('Gaussian fit results:') print('{}: {} +-{}'.format(pKey[j], mu[j], sigma[j])) ############################################## ## Write inference results in a text file ## ############################################## if save: with open(output_dir+output_file, "w") as f: f.write('######################################\n') f.write('#### MCMC results (confidence) ###\n') f.write('######################################\n') f.write(' \n') f.write('Results of the MCMC fit\n') f.write('----------------------- \n') f.write(' \n') if bounds is not None: f.write('Bounds\n') f.write('------') for j in range(l): f.write('\n{}: [{},{}]'.format(pKey[j], bounds[pKey[j]][0], bounds[pKey[j]][1])) f.write(' \n') f.write(' \n') f.write('Priors\n') f.write('------') if priors is not None: for key, prior in priors.items(): f.write('\n{}: {}+-{}'.format(key, prior[0], prior[1])) else: f.write('\n None') f.write(' \n') f.write(' \n') f.write('>> Spectral parameters for a ') f.write('{} % confidence interval:\n'.format(cfd)) for j in range(l): f.write('{}: {} [{},{}]'.format(pKey[j], val_max[pKey[j]], confidenceInterval[pKey[j]][0], confidenceInterval[pKey[j]][1])) if gaussian_fit: f.write('Gaussian fit results:') f.write('{}: {} +-{}'.format(pKey[j], mu[j], sigma[j])) if gaussian_fit: return mu, sigma else: return val_max, confidenceInterval