Source code for special.model_resampling

#! /usr/bin/env python

"""
Functions useful for spectral fitting of companions, and model interpolation.

"""

__author__ = 'Valentin Christiaens'
__all__ = ['make_model_from_params',
           'make_resampled_models',
           'resample_model',
           'interpolate_model']

import numpy as np
import astropy.constants as con
from astropy.convolution import Gaussian1DKernel, convolve_fft
from astropy.stats import gaussian_fwhm_to_sigma
import itertools
from scipy.interpolate import InterpolatedUnivariateSpline
import pandas as pd
import pdb
from scipy.ndimage import map_coordinates
from .fits import open_fits
from .utils_spec import (convert_F_units, blackbody, find_nearest, extinction,
                         inject_em_line)


[docs]def make_model_from_params(params, labels, grid_param_list, dist, lbda_obs=None, model_grid=None, model_reader=None, em_lines={}, em_grid={}, dlbda_obs=None, instru_res=None, instru_idx=None, filter_reader=None, AV_bef_bb=False, units_obs='si', units_mod='si', interp_order=1): """ Routine to make the model from input parameters. Parameters ---------- params : tuple Set of models parameters for which the model grid has to be interpolated. labels: Tuple of strings Tuple of labels in the same order as initial_state: - first all parameters related to loaded models (e.g. 'Teff', 'logg') - then the planet photometric radius 'R', in Jupiter radius - (optionally) the flux of emission lines (labels should match those \ in the em_lines dictionary), in units of the model spectrum (times mu) - (optionally) the optical extinction 'Av', in mag - (optionally) the ratio of total to selective optical extinction 'Rv' - (optionally) 'Tbb1', 'Rbb1', 'Tbb2', 'Rbb2', etc. for each extra bb \ contribution. grid_param_list : list of 1d numpy arrays/lists Should contain list/numpy 1d arrays with available grid of model parameters (should only contain the sampled parameters, not the models themselves). The models will be loaded with ``model_reader``. dist : float Distance in parsec, used for flux scaling of the models. lbda_obs : numpy 1d ndarray or list Wavelengths of observed spectrum. If several instruments were used, the wavelengths should be ordered per instrument, not necessarily as monotonically increasing wavelength. Hereafter, :math:`n_{ch}` is the length of ``lbda_obs``. model_grid : numpy N-d array, optional If provided, should contain the grid of model spectra for each free parameter of the given grid. I.e. for a grid of :math:`n_T` values of :math:`T_{eff}` and :math:`n_g` values of log(:math:`g`), the numpy array should have a shape of :math:`(n_T, n_g, n_{ch}, 2)`, where the last 2 dimensions correspond to wavelength and fluxes respectively. If provided, ``model_grid`` takes precedence over ``model_name``/ ``model_reader``. model_reader : python routine, opt External routine that reads a model file and returns a 2D numpy array, where the first column corresponds to wavelengths, and the second contains model values. See example routine in ``special.model_resampling.interpolate_model`` description. em_lines: dictionary, opt Dictionary of emission lines to be added on top of the model spectrum. Each dict entry should be the name of the line, assigned to a tuple of 4 values: 1. the wavelength (in :math:`\mu` m); 2. a string indicating whether line intensity is expressed in flux \ ('F'), luminosity ('L') or log(L/LSun) ("LogL"); 3. the FWHM of the gaussian (or None if to be set automatically); 4. whether the FWHM is expressed in 'nm', 'mu' or 'km/s'. The third and fourth can also be set to None. In that case, the FWHM of the gaussian will automatically be set to the equivalent width of the line, calculated from the flux to be injected and the continuum level (measured in the grid model to which the line is injected). Examples: >>> em_lines = {'BrG':(2.1667,'F',None, None)}; >>> em_lines = {'BrG':(2.1667,'LogL', 100, 'km/s')} em_grid: dictionary pointing to lists, opt Dictionary where each entry corresponds to an emission line and points to a list of values to inject for emission line fluxes. For computation efficiency, interpolation will be performed between the points of this grid during the MCMC sampling. Dictionary entries should match those in ``labels`` and ``em_lines``. Examples: >>> BrGmin, BrGmax = -5, 5 >>> em_grid = {'BrG': np.arange(BrGmin, BrGmax, 20)} >>> BrGmin, BrGmax = -5, 5 >>> PaBmin, PaBmax = -2, 7 >>> em_grid = {'PaB': np.arange(PaBmin, PaBmax, 20), >>> 'BrG': np.arange(BrGmin, BrGmax, 20)} dlbda_obs : numpy 1d ndarray or list, optional Respective spectral channel width or FWHM of the photometric filters used for each point of the observed spectrum. This vector is used to infer which part(s) of a combined spectro+photometric spectrum should involve convolution+subsampling (model resolution higher than measurements), interpolation (the opposite), or convolution by the transmission curve of a photometric filter. If not provided, it will be inferred from the difference between consecutive lbda_obs points (i.e. inaccurate for a combined spectrum). It must be provided IF one wants to weigh each measurement based on the spectral resolution of each instrument (as in [OLO16]_), through the ``use_weights`` argument. instru_res : float or list of floats/strings, optional The mean instrumental resolving power(s) OR filter names. This is used to convolve the model spectrum. If several instruments are used, provide a list of resolving power values / filter names, one for each instrument used. instru_idx: numpy 1d array, optional 1d array containing an index representing each instrument used to obtain the spectrum, label them from 0 to the number of instruments (:math:`n_{ins}`). Zero for points that don't correspond to any of the ``instru_res`` values provided, and i in :math:`[1,n_{ins}]` for points associated to instru_res[i-1]. This parameter must be provided if the spectrum consists of points obtained with different instruments. filter_reader: python routine, optional External routine that reads a filter file and returns a 2D numpy array, where the first column corresponds to wavelengths, and the second contains transmission values. Important: if not provided, but strings are detected in instru_res, the default file reader will be used. It assumes the following format for the files: - first row contains headers (titles of each column) - starting from 2nd row: 1st column: wavelength, 2nd col.: transmission - Unit of wavelength can be provided in parentheses of first header \ key name: e.g. "WL(AA)" for angstrom, "wavelength(mu)" for micrometer \ or "lambda(nm)" for nanometer. Note: only what is in parentheses \ matters for the units. AV_bef_bb: bool, optional If both extinction and an extra bb component are free parameters, whether to apply extinction before adding the BB component (e.g. extinction mostly from circumplanetary dust) or after the BB component (e.g. mostly insterstellar extinction). units_obs : str, opt {'si','cgs','jy'} Units of observed spectrum. 'si' for W/m^2/mu; 'cgs' for ergs/s/cm^2/mu or 'jy' for janskys. units_mod: str, opt {'si','cgs','jy'} Units of the model. 'si' for W/m^2/mu; 'cgs' for ergs/s/cm^2/mu or 'jy' for janskys. If different to units_obs, the spectrum units will be converted. interp_order: int or tuple of int, optional, {-1,0,1} Interpolation mode for model interpolation. If a tuple of integers, the length should match the number of grid dimensions and will trigger a different interpolation mode for the different parameters. - -1: Order 1 spline interpolation in logspace for the parameter - 0: nearest neighbour model - 1: Order 1 spline interpolation Returns ------- out: numpy array The model wavelength and spectrum Note ---- ``grid_param_list`` and ``model_grid`` shouldn't contain grids on radius and Av. For a combined grid model + black body fit, just provide the list of parameters probed by the grid to ``grid_param_list``, and provide values for 'Tbbn' and 'Rbbn' to ``initial_state``, ``labels`` and ``bounds``. """ if 'Tbb1' in labels and grid_param_list is None and lbda_obs is None: raise ValueError("lbda_obs should be provided because there is no grid") if grid_param_list is None: lbda_mod = lbda_obs spec_mod = np.zeros_like(lbda_obs) else: npar_grid = len(grid_param_list) params_grid = [params[i] for i in range(npar_grid)] params_grid = tuple(params_grid) if len(em_grid) == 0: p_em_grid = None else: # first update units of params if needed (i.e. if model_grid is None) em_params = list(params) if model_grid is None: idx_R = labels.index('R') for key, val in em_lines.items(): if val[1] == 'L': idx_line = labels.index(key) R_si = em_params[idx_R]*con.R_jup.value conv_fac = 4*np.pi*R_si**2 em_params[idx_line] /= conv_fac elif val[1] == 'LogL': idx_line = labels.index(key) R_si = em_params[idx_R]*con.R_jup.value conv_fac = con.L_sun.value/(4*np.pi*R_si**2) em_params[idx_line] = conv_fac*10**em_params[idx_line] # then build em. lines dictionary p_em_grid = {} for key, _ in em_grid.items(): j = labels.index(key) p_em_grid[key] = em_params[j] # interpolate model to requested parameters lbda_mod, spec_mod = interpolate_model(params_grid, grid_param_list, p_em_grid, em_grid, em_lines, labels, model_grid, model_reader, interp_order) # resample to lbda_obs if needed if lbda_obs is not None: cond = False if len(lbda_obs) != len(lbda_mod): cond = True elif not np.allclose(lbda_obs, lbda_mod): cond = True if cond: lbda_mod, spec_mod = resample_model(lbda_obs, lbda_mod, spec_mod, dlbda_obs, instru_res, instru_idx, filter_reader) # convert model to same units as observed spectrum if necessary if units_mod != units_obs: spec_mod = convert_F_units(spec_mod, lbda_mod, in_unit=units_mod, out_unit=units_obs) # scale by (R/dist)**2 idx_R = labels.index("R") dilut_fac = ((params[idx_R]*con.R_jup.value)/(dist*con.pc.value))**2 spec_mod *= dilut_fac # apply extinction if requested if 'Av' in labels and AV_bef_bb: # so far only assume Cardelli extinction law idx_AV = labels.index("Av") if 'Rv' in labels: idx_RV = labels.index("Rv") RV = params[idx_RV] else: RV = 3.1 extinc_curve = extinction(lbda_mod, params[idx_AV], RV) flux_ratio_ext = np.power(10., -extinc_curve/2.5) spec_mod *= flux_ratio_ext # TBD: add more options # add n blackbody component(s) if requested if 'Tbb1' in labels: n_bb = 0 for label in labels: if 'Tbb' in label: n_bb += 1 idx_Tbb1 = labels.index("Tbb1") Rj = con.R_jup.value pc = con.pc.value for ii in range(n_bb): idx = ii*2 Omega = np.pi*((params[idx_Tbb1+idx+1]*Rj)/(dist*pc))**2 bb = Omega*blackbody(lbda_mod, params[idx_Tbb1+idx]) spec_mod += bb # apply extinction if requested if 'Av' in labels and not AV_bef_bb: # so far only assume Cardelli extinction law idx_AV = labels.index("Av") if 'Rv' in labels: idx_RV = labels.index("Rv") RV = params[idx_RV] else: RV = 3.1 extinc_curve = extinction(lbda_mod, params[idx_AV], RV) flux_ratio_ext = np.power(10., -extinc_curve/2.5) spec_mod *= flux_ratio_ext # TBD: add more options return lbda_mod, spec_mod
[docs]def make_resampled_models(lbda_obs, grid_param_list, model_grid=None, model_reader=None, em_lines={}, em_grid=None, dlbda_obs=None, instru_res=None, instru_idx=None, filter_reader=None, interp_nonexist=True): """ Returns a cube of models after convolution and resampling as in the observations. Parameters ---------- lbda_obs : numpy 1d ndarray or list Wavelengths of observed spectrum. If several instruments were used, the wavelengths should be ordered per instrument, not necessarily as monotonically increasing wavelength. Hereafter, :math:`n_{ch}` is the length of ``lbda_obs``. grid_param_list : list of 1d numpy arrays/lists Should contain list/numpy 1d arrays with available grid of model parameters (should only contain the sampled parameters, not the models themselves). The models will be loaded with ``model_reader``. model_grid : numpy N-d array, optional If provided, should contain the grid of model spectra for each free parameter of the given grid. I.e. for a grid of :math:`n_T` values of :math:`T_{eff}` and :math:`n_g` values of log(:math:`g`), the numpy array should have a shape of :math:`(n_T, n_g, n_{ch}, 2)`, where the last 2 dimensions correspond to wavelength and fluxes respectively. If provided, ``model_grid`` takes precedence over ``model_name``/ ``model_reader``. model_reader : python routine External routine that reads a model file, converts it to required units and returns a 2D numpy array, where the first column corresponds to wavelengths, and the second contains model values. Example below. em_lines: dictionary, opt Dictionary of emission lines to be added on top of the model spectrum. Each dict entry should be the name of the line, assigned to a tuple of 4 values: 1. the wavelength (in :math:`\mu` m); 2. a string indicating whether line intensity is expressed in flux \ ('F'), luminosity ('L') or log(L/LSun) ("LogL"); 3. the FWHM of the gaussian (or None if to be set automatically); 4. whether the FWHM is expressed in 'nm', 'mu' or 'km/s'. The third and fourth can also be set to None. In that case, the FWHM of the gaussian will automatically be set to the equivalent width of the line, calculated from the flux to be injected and the continuum level (measured in the grid model to which the line is injected). Examples: >>> em_lines = {'BrG':(2.1667,'F',None, None)}; >>> em_lines = {'BrG':(2.1667,'LogL', 100, 'km/s')} em_grid: dictionary pointing to lists, opt Dictionary where each entry corresponds to an emission line and points to a list of values to inject for emission line fluxes. For computation efficiency, interpolation will be performed between the points of this grid during the MCMC sampling. Dictionary entries should match those in ``labels`` and ``em_lines``. Examples: >>> BrGmin, BrGmax = -5, 5 >>> em_grid = {'BrG': np.arange(BrGmin, BrGmax, 20)} >>> BrGmin, BrGmax = -5, 5 >>> PaBmin, PaBmax = -2, 7 >>> em_grid = {'PaB': np.arange(PaBmin, PaBmax, 20), >>> 'BrG': np.arange(BrGmin, BrGmax, 20)} lbda_mod : numpy 1d ndarray or list Wavelength of tested model. Should have a wider wavelength extent than the observed spectrum. spec_mod : numpy 1d ndarray Model spectrum. It does not require the same wavelength sampling as the observed spectrum. If higher spectral resolution, it will be convolved with the instrumental spectral PSF (if ``instru_res`` is provided) and then binned to the same sampling. If lower spectral resolution, a linear interpolation is performed to infer the value at the observed spectrum wavelength sampling. dlbda_obs : numpy 1d ndarray or list, optional Respective spectral channel width or FWHM of the photometric filters used for each point of the observed spectrum. This vector is used to infer which part(s) of a combined spectro+photometric spectrum should involve convolution+subsampling (model resolution higher than measurements), interpolation (the opposite), or convolution by the transmission curve of a photometric filter. If not provided, it will be inferred from the difference between consecutive lbda_obs points (i.e. inaccurate for a combined spectrum). It must be provided IF one wants to weigh each measurement based on the spectral resolution of each instrument (as in [OLO16]_), through the ``use_weights`` argument. instru_res : float or list of floats/strings, optional The mean instrumental resolving power(s) OR filter names. This is used to convolve the model spectrum. If several instruments are used, provide a list of resolving power values / filter names, one for each instrument used. instru_idx: numpy 1d array, optional 1d array containing an index representing each instrument used to obtain the spectrum, label them from 0 to the number of instruments (:math:`n_{ins}`). Zero for points that don't correspond to any of the ``instru_res`` values provided, and i in :math:`[1,n_{ins}]` for points associated to instru_res[i-1]. This parameter must be provided if the spectrum consists of points obtained with different instruments. filter_reader: python routine, optional External routine that reads a filter file and returns a 2D numpy array, where the first column corresponds to wavelengths, and the second contains transmission values. Important: if not provided, but strings are detected in instru_res, the default file reader will be used. It assumes the following format for the files: - first row contains headers (titles of each column) - starting from 2nd row: 1st column: wavelength, 2nd col.: transmission - Unit of wavelength can be provided in parentheses of first header \ key name: e.g. "WL(AA)" for angstrom, "wavelength(mu)" for micrometer \ or "lambda(nm)" for nanometer. Note: only what is in parentheses \ matters for the units. interp_nonexist: bool, opt Whether to interpolate if models do not exist, based on closest model(s) Returns ------- resamp_mod: 1d numpy array Grid of model spectra resampled at wavelengths matching the observed spectrum. Note ---- ``grid_param_list`` and ``model_grid`` shouldn't contain grids on radius and Av. For a combined grid model + black body fit, just provide the list of parameters probed by the grid to ``grid_param_list``, and provide values for 'Tbbn' and 'Rbbn' to ``initial_state``, ``labels`` and ``bounds``. """ n_params = len(grid_param_list) n_mods = len(grid_param_list[0]) dims = [len(grid_param_list[0])] if n_params > 1: for pp in range(1, n_params): n_mods *= len(grid_param_list[pp]) dims.append(len(grid_param_list[pp])) if em_grid is None: n_em = 0 final_dims = dims+[len(lbda_obs)]+[2] else: n_em = len(em_grid) n_em_mods = 1 dims_em = [] for key, _ in em_grid.items(): n_em_mods *= len(em_grid[key]) dims_em.append(len(em_grid[key])) final_dims = dims+dims_em+[len(lbda_obs)]+[2] dims_em = tuple(dims_em) final_dims = tuple(final_dims) dims = tuple(dims) resamp_mod = [] # Loop on all models whose parameters are provided in model grid for nn in range(n_mods): if model_grid is not None: indices = [] idx = np.unravel_index(nn, dims) for pp in range(n_params): indices.append(idx[pp]) indices = tuple(indices) tmp = model_grid[indices] lbda_mod = tmp[:, 0] spec_mod = tmp[:, 1] else: params_tmp = [] idx = np.unravel_index(nn, dims) for pp in range(n_params): params_tmp.append(grid_param_list[pp][idx[pp]]) try: lbda_mod, spec_mod = model_reader(params_tmp) if np.sum(np.isnan(spec_mod)) > 0: print("There are nan values in spec for params: ") pdb.set_trace() except: msg = "Model does not exist for param combination ({})" print(msg.format(params_tmp)) if interp_nonexist: print( "Press c if you wish to interpolate that model from neighbours") pdb.set_trace() # find for which dimension the model doesn't exist; for qq in range(n_params): interp_params1 = [] interp_params2 = [] for pp in range(n_params): if pp == qq: try: interp_params1.append( grid_param_list[pp][idx[pp]-1]) interp_params2.append( grid_param_list[pp][idx[pp]+1]) except: continue else: interp_params1.append( grid_param_list[pp][idx[pp]]) interp_params2.append( grid_param_list[pp][idx[pp]]) try: lbda_mod1, spec_mod1 = model_reader(interp_params1) lbda_mod2, spec_mod2 = model_reader(interp_params2) lbda_mod = np.mean([lbda_mod1, lbda_mod2], axis=0) spec_mod = np.mean([spec_mod1, spec_mod2], axis=0) msg = "Model was interpolated based on models: {} " msg += "and {}" print(msg.format(interp_params1, interp_params2)) break except: pass if qq == n_params-1: msg = "Impossible to interpolate model!" msg += "Consider reducing bounds." raise ValueError(msg) else: msg = "Model interpolation not allowed for non existing " msg += "models in the grid." raise ValueError(msg) # inject emission lines if any if n_em > 0: flux_grids = [] wls = [] widths = [] for key, flux_grid in em_grid.items(): flux_grids.append(flux_grid) wls.append(em_lines[key][0]) widths.append(em_lines[key][2]) # recursively inject em lines for fluxes in itertools.product(*flux_grids): for ff, flux in enumerate(fluxes): spec_mod = inject_em_line(wls[ff], flux, lbda_mod, spec_mod, widths[ff]) # interpolate OR convolve+bin model spectrum if required if len(lbda_obs) != len(lbda_mod): res = resample_model(lbda_obs, lbda_mod, spec_mod, dlbda_obs, instru_res, instru_idx, filter_reader) elif not np.allclose(lbda_obs, lbda_mod): res = resample_model(lbda_obs, lbda_mod, spec_mod, dlbda_obs, instru_res, instru_idx, filter_reader) else: res = np.array([lbda_obs, spec_mod]) resamp_mod.append(res) else: # interpolate OR convolve+bin model spectrum if not same sampling if len(lbda_obs) != len(lbda_mod): res = resample_model(lbda_obs, lbda_mod, spec_mod, dlbda_obs, instru_res, instru_idx, filter_reader) elif not np.allclose(lbda_obs, lbda_mod): res = resample_model(lbda_obs, lbda_mod, spec_mod, dlbda_obs, instru_res, instru_idx, filter_reader) else: res = np.array([lbda_obs, spec_mod]) resamp_mod.append(res) resamp_mod = np.array(resamp_mod) resamp_mod = np.swapaxes(resamp_mod, -1, -2) return resamp_mod.reshape(final_dims)
[docs]def resample_model(lbda_obs, lbda_mod, spec_mod, dlbda_obs=None, instru_res=None, instru_idx=None, filter_reader=None, no_constraint=False, verbose=False): """ Convolve or interpolate, and resample, a model spectrum to match observed spectrum. Parameters ---------- lbda_obs : numpy 1d ndarray or list Wavelengths of observed spectrum. If several instruments were used, the wavelengths should be ordered per instrument, not necessarily as monotonically increasing wavelength. Hereafter, :math:`n_{ch}` is the length of ``lbda_obs``. lbda_mod : numpy 1d ndarray or list Wavelength of tested model. Should have a wider wavelength extent than the observed spectrum. spec_mod : numpy 1d ndarray Model spectrum. It does not require the same wavelength sampling as the observed spectrum. If higher spectral resolution, it will be convolved with the instrumental spectral PSF (if ``instru_res`` is provided) and then binned to the same sampling. If lower spectral resolution, a linear interpolation is performed to infer the value at the observed spectrum wavelength sampling. dlbda_obs : numpy 1d ndarray or list, optional Respective spectral channel width or FWHM of the photometric filters used for each point of the observed spectrum. This vector is used to infer which part(s) of a combined spectro+photometric spectrum should involve convolution+subsampling (model resolution higher than measurements), interpolation (the opposite), or convolution by the transmission curve of a photometric filter. If not provided, it will be inferred from the difference between consecutive lbda_obs points (i.e. inaccurate for a combined spectrum obtained with different instruments). instru_res : float or list of floats/strings, optional The mean instrumental resolving power(s) OR filter names. This is used to convolve the model spectrum. If several instruments/resolving powere are to be considered, provide a list of resolving power values or filter names. instru_idx: numpy 1d array, optional 1d array containing an index representing each instrument/resolving power used to obtain the spectrum. Label them from 0 to the number of instruments (:math:`n_{ins}`): zero for points that don't correspond to any of the ``instru_res`` values provided, and i in :math:`[1,n_{ins}]` for points associated to instru_res[i-1]. This parameter must be provided if the spectrum consists of points obtained with different instruments/resolving powers. filter_reader: python routine, optional External routine that reads a filter file and returns a 2D numpy array, where the first column corresponds to wavelengths, and the second contains transmission values. Must be provided if instru_res contains strings (filter filenames). Important: if not provided, but strings are detected in instru_res, the default file reader will be used. It assumes the following format for the files: - first row contains headers (titles of each column) - starting from 2nd row: 1st column: wavelength, 2nd col.: transmission - Unit of wavelength can be provided in parentheses of first header \ key name: e.g. "WL(AA)" for angstrom, "wavelength(mu)" for micrometer \ or "lambda(nm)" for nanometer. Note: only what is in parentheses \ matters for the units. no_constraint: bool, optional If set to True, will not use 'floor' and 'ceil' constraints when cropping the model wavelength ranges, i.e. faces the risk of extrapolation. May be useful, if the bounds of the wavelength ranges are known to match exactly. verbose: bool, optional Whether to print more information during resampling. Returns ------- lbda_obs, spec_mod_res: 2d numpy array Observed wavelengths, and resampled model spectrum at those wavelengths. """ def _default_file_reader(filter_name): """ Default file reader if no filter file reader is provided. """ filt_table = pd.read_csv(filter_name, sep=' ', header=0, skipinitialspace=True) keys = filt_table.keys() lbda_filt = np.array(filt_table[keys[0]]) if '(AA)' in keys[0]: lbda_filt /= 10000 elif '(mu)' in keys[0]: pass elif '(nm)' in keys[0]: lbda_filt /= 10000 else: raise ValueError('Wavelength unit not recognised in filter file') trans = np.array(filt_table[keys[1]]) return lbda_filt, trans n_ch = len(lbda_obs) spec_mod_res = np.zeros_like(lbda_obs) if dlbda_obs is None: # if dlbda_obs is not provided, estimate it to trim out useless WL # ranges from the model spectrum, hence significantly # improving speed for large (>1M pts) models (e.g. BT-SETTL). # 0.3 factor to consider possible broad-band filters. dlbda_obs1 = [min(0.3*lbda_obs[0], lbda_obs[1]-lbda_obs[0])] dlbda_obs2 = [(lbda_obs[i+2]-lbda_obs[i])/2 for i in range(n_ch-2)] dlbda_obs3 = [min(0.3*lbda_obs[-1], lbda_obs[-1]-lbda_obs[-2])] dlbda_obs = np.array(dlbda_obs1+dlbda_obs2+dlbda_obs3) if verbose: print("checking whether WL samplings are the same for obs and model") if np.isscalar(instru_res) and not isinstance(instru_res, str): instru_res = [instru_res] cond = False if len(lbda_obs) != len(lbda_mod): cond = True elif not np.allclose(lbda_obs, lbda_mod): cond = True if cond: lbda_min = lbda_obs[0]-2*dlbda_obs[0] if instru_res is not None: if np.isscalar(instru_res[0]) and not isinstance(instru_res[0], str): if instru_idx is None: instru_idx = np.array([1]*len(lbda_obs)) lbda_instru = lbda_obs[np.where(instru_idx == 1)] instru_fwhm = np.mean(lbda_instru)/instru_res[0] lbda_min = max(lbda_obs[0]-3*instru_fwhm, lbda_mod[0]) lbda_max = lbda_obs[-1]+2*dlbda_obs[-1] if instru_res is not None: if np.isscalar(instru_res[-1]) and not isinstance(instru_res[-1], str): if instru_idx is None: instru_idx = np.array([1]*len(lbda_obs)) lbda_instru = lbda_obs[np.where( instru_idx == np.amax(instru_idx))] instru_fwhm = np.mean(lbda_instru)/instru_res[-1] lbda_max = min(lbda_obs[-1]+3*instru_fwhm, lbda_mod[-1]) if no_constraint: idx_ini = find_nearest(lbda_mod, lbda_min) idx_fin = find_nearest(lbda_mod, lbda_max) else: idx_ini = find_nearest(lbda_mod, lbda_min, constraint='floor') idx_fin = find_nearest(lbda_mod, lbda_max, constraint='ceil') lbda_mod = lbda_mod[idx_ini:idx_fin+1] spec_mod = spec_mod[idx_ini:idx_fin+1] nmod = lbda_mod.shape[0] # compute the wavelength sampling of the model dlbda_mod1 = [lbda_mod[1]-lbda_mod[0]] dlbda_mod2 = [(lbda_mod[i+1]-lbda_mod[i-1])/2 for i in range(1, nmod-1)] dlbda_mod3 = [lbda_mod[-1]-lbda_mod[-2]] dlbda_mod = np.array(dlbda_mod1+dlbda_mod2+dlbda_mod3) if verbose: print("testing whether observed spectral res could be > than model's ") print("(in at least parts of the spectrum)") dlbda_obs_min = np.amin(dlbda_obs) idx_obs_min = np.argmin(dlbda_obs) idx_near = find_nearest(lbda_mod, lbda_obs[idx_obs_min]) dlbda_mod_tmp = (lbda_mod[idx_near+1]-lbda_mod[idx_near-1])/2 do_interp = np.zeros(n_ch, dtype='int32') if dlbda_mod_tmp > dlbda_obs_min and dlbda_obs_min > 0: if verbose: print("checking where obs spec res is < or > than model's") # check where obs spec res is < or > than model's nchunks_i = 0 for ll in range(n_ch): idx_near = find_nearest(lbda_mod, lbda_obs[ll]) do_interp[ll] = (dlbda_obs[ll] < dlbda_mod[idx_near]) if ll > 0: if do_interp[ll] and not do_interp[ll-1]: nchunks_i += 1 elif do_interp[ll]: nchunks_i = 1 # interpolate model if the observed spectrum has higher resolution # and is monotonically increasing if np.sum(do_interp) and dlbda_obs_min > 0: if verbose: print("interpolating model where obs spectrum has higher res") idx_0 = 0 for nc in range(nchunks_i): idx_1 = np.argmax(do_interp[idx_0:])+idx_0 idx_0 = np.argmin(do_interp[idx_1:])+idx_1 if idx_0 == idx_1: idx_0 = -1 if nc != nchunks_i-1: pdb.set_trace() # should not happen idx_ini = find_nearest(lbda_mod, lbda_obs[idx_1], constraint='floor') idx_fin = find_nearest(lbda_mod, lbda_obs[idx_0], constraint='ceil') spl = InterpolatedUnivariateSpline(lbda_mod[idx_ini:idx_fin], spec_mod[idx_ini:idx_fin], k=min(3, idx_fin-idx_ini-1)) spec_mod_res[idx_1:idx_0] = spl(lbda_obs[idx_1:idx_0]) # convolve+bin where the model spectrum has higher resolution (most likely) if np.sum(do_interp) < n_ch or dlbda_obs_min < 0: if instru_res is None: msg = "Warning! No resolving power nor filter file provided" msg += " => binning without convolution" print(msg) for ll, lbda in enumerate(lbda_obs): mid_lbda_f = lbda_obs-dlbda_obs/2. mid_lbda_l = lbda_obs+dlbda_obs/2. i_f = find_nearest(lbda_mod, mid_lbda_f[ll]) i_l = find_nearest(lbda_mod, mid_lbda_l[ll]) spec_mod_res[ll] = np.mean(spec_mod[i_f:i_l+1]) if dlbda_obs_min < 0: msg = "instru_res not provided, but dlbda_obs_min < 0 means " msg += "several instruments are used with overlapping WL ranges" raise ValueError(msg) else: if verbose: print("convolving+binning where model spectrum has higher res") if isinstance(instru_idx, list): instru_idx = np.array(instru_idx) elif not isinstance(instru_idx, np.ndarray): instru_idx = np.array([1]*n_ch) for i in range(1, len(instru_res)+1): if isinstance(instru_res[i-1], str): if filter_reader is not None: lbda_filt, trans = filter_reader(instru_res[i-1]) else: lbda_filt, trans = _default_file_reader(instru_res[i-1]) idx_ini = find_nearest(lbda_mod, lbda_filt[0], constraint='ceil') idx_fin = find_nearest(lbda_mod, lbda_filt[-1], constraint='floor') interp_trans = np.interp(lbda_mod[idx_ini:idx_fin], lbda_filt, trans) num = np.sum( interp_trans*dlbda_mod[idx_ini:idx_fin]*spec_mod[idx_ini:idx_fin]) denom = np.sum(interp_trans*dlbda_mod[idx_ini:idx_fin]) spec_mod_res[np.where(instru_idx == i)] = num/denom elif np.isscalar(instru_res[i-1]): lbda_instru = lbda_obs[np.where(instru_idx == i)] instru_fwhm = np.mean(lbda_instru)/instru_res[i-1] ifwhm = instru_fwhm/(np.mean(dlbda_mod)) stddev = ifwhm*gaussian_fwhm_to_sigma gau_ker = Gaussian1DKernel(stddev=stddev) idx0 = find_nearest(lbda_mod, lbda_instru[0]) idx1 = find_nearest(lbda_mod, lbda_instru[-1]) idx_ini = max(0, int(idx0-10*stddev)) idx_fin = max(len(spec_mod)-1, int(idx1+10*stddev)) spec_mod_conv = convolve_fft(spec_mod[idx_ini:idx_fin+1], gau_ker, preserve_nan=True) tmp = np.zeros_like(lbda_obs[np.where(instru_idx == i)]) for ll, lbda in enumerate(lbda_obs[np.where(instru_idx == i)]): mid_lbda_f = lbda_obs-dlbda_obs/2. mid_lbda_l = lbda_obs+dlbda_obs/2. i_f = find_nearest(lbda_mod[idx_ini:idx_fin+1], mid_lbda_f[np.where(instru_idx == i)][ll]) i_l = find_nearest(lbda_mod[idx_ini:idx_fin+1], mid_lbda_l[np.where(instru_idx == i)][ll]) tmp[ll] = np.mean(spec_mod_conv[i_f:i_l+1]) spec_mod_res[np.where(instru_idx == i)] = tmp else: msg = "instru_res is a {}, while it should be either a" msg += " scalar or a string" raise TypeError(msg.format(type(instru_res[i-1]))) return np.array([lbda_obs, spec_mod_res])
[docs]def interpolate_model(params, grid_param_list, params_em={}, em_grid={}, em_lines={}, labels=None, model_grid=None, model_reader=None, interp_order=1, max_dlbda=2e-4, verbose=False): """ Parameters ---------- params : tuple Set of models parameters for which the model grid has to be interpolated. grid_param_list : list of 1d numpy arrays/lists Should contain list/numpy 1d arrays with available grid of model parameters (should only contain the sampled parameters, not the models themselves). The models will be loaded with ``model_reader``. params_em : dictionary, opt Set of emission line parameters (typically fluxes) for which the model grid has to be interpolated. em_grid: dictionary pointing to lists, opt Dictionary where each entry corresponds to an emission line and points to a list of values to inject for emission line fluxes. For computation efficiency, interpolation will be performed between the points of this grid during the MCMC sampling. Dictionary entries should match those in ``labels`` and ``em_lines``. Examples: >>> BrGmin, BrGmax = -5, 5 >>> em_grid = {'BrG': np.arange(BrGmin, BrGmax, 20)} >>> BrGmin, BrGmax = -5, 5 >>> PaBmin, PaBmax = -2, 7 >>> em_grid = {'PaB': np.arange(PaBmin, PaBmax, 20), >>> 'BrG': np.arange(BrGmin, BrGmax, 20)} em_lines: dictionary, opt Dictionary of emission lines to be added on top of the model spectrum. Each dict entry should be the name of the line, assigned to a tuple of 4 values: 1. the wavelength (in :math:`\mu` m); 2. a string indicating whether line intensity is expressed in flux \ ('F'), luminosity ('L') or log(L/LSun) ("LogL"); 3. the FWHM of the gaussian (or None if to be set automatically); 4. whether the FWHM is expressed in 'nm', 'mu' or 'km/s'. The third and fourth can also be set to None. In that case, the FWHM of the gaussian will automatically be set to the equivalent width of the line, calculated from the flux to be injected and the continuum level (measured in the grid model to which the line is injected). Examples: >>> em_lines = {'BrG':(2.1667,'F',None, None)}; >>> em_lines = {'BrG':(2.1667,'LogL', 100, 'km/s')} labels: Tuple of strings Tuple of labels in the same order as initial_state: - first all parameters related to loaded models (e.g. 'Teff', 'logg') - then the planet photometric radius 'R', in Jupiter radius - (optionally) the flux of emission lines (labels should match those \ in the em_lines dictionary), in units of the model spectrum (times mu) - (optionally) the optical extinction 'Av', in mag - (optionally) the ratio of total to selective optical extinction 'Rv' - (optionally) 'Tbb1', 'Rbb1', 'Tbb2', 'Rbb2', etc. for each extra bb \ contribution. Note: only necessary if an emission line dictionary ``em_lines`` is provided. model_grid : numpy N-d array, optional If provided, should contain the grid of model spectra for each free parameter of the given grid. I.e. for a grid of :math:`n_T` values of :math:`T_{eff}` and :math:`n_g` values of log(:math:`g`), the numpy array should have a shape of :math:`(n_T, n_g, n_{ch}, 2)`, where the last 2 dimensions correspond to wavelength and fluxes respectively. If provided, ``model_grid`` takes precedence over ``model_name``/ ``model_reader``. model_reader : python routine, opt External routine that reads a model file and returns a 2D numpy array, where the first column corresponds to wavelengths, and the second contains model values. See example routine in ``special.model_resampling.interpolate_model`` description. interp_order: int, opt, {-1,0,1} Interpolation mode for model interpolation. - -1: log interpolation (i.e. linear interpolatlion on log(Flux)) - 0: nearest neighbour model - 1: Order 1 spline interpolation max_dlbda: float, opt Maximum delta lbda in mu allowed if binning of lbda_model is necessary. This is necessary for grids of models (e.g. BT-SETTL) where the wavelength sampling is not the same depending on parameters (e.g. between 4000K and 4100K models for BT-SETTL): resampling preserving original resolution is too prohibitive computationally. verbose: bool, optional Whether to print more information during resampling. Returns ------- model : 2d numpy array Interpolated model for input parameters. First column corresponds to wavelengths, and the second contains model values. """ def _example_file_reader(filename): """ This is a minimal example for the file_reader routine to be provided as argument to model_interpolation. The routine should take as input a template filename format with blanks and parameters, and return as output the wavelengths and model values as a 2D numpy array. This example assumes the model is in a fits file, that is already a 2D numpy array, where the first column is the wavelength, and second column is the corresponding model values. """ model = open_fits(filename.format(params[0], params[1])) return model def _den_to_bin(denary, ndigits=3): """ Convert denary to binary number, keeping n digits for binary (i.e. padding with zeros if necessary) """ binary = "" while denary > 0: # A left shift in binary means /2 binary = str(denary % 2) + binary denary = denary//2 if len(binary) < ndigits: pad = '0'*(ndigits-len(binary)) else: pad = '' return pad+binary n_params = len(grid_param_list) n_em = len(em_grid) n_params_tot = n_params+n_em if isinstance(interp_order, (int, bool)): interp_order = [interp_order]*n_params_tot interp_order = tuple(interp_order) if np.sum(np.abs(interp_order)) == 0: if model_grid is None: params_tmp = np.zeros(n_params) for nn in range(n_params): params_tmp[nn] = find_nearest(grid_param_list[nn], params[nn], output='value') lbda, spec = model_reader(params_tmp) if n_em > 0: for ll in range(len(labels)): if labels[ll] in em_grid.keys(): key = labels[ll] spec = inject_em_line(em_lines[key][0], params_em[key], lbda, spec, em_lines[key][2]) return lbda, spec else: idx_tmp = [] counter = 0 for nn in range(n_params_tot): if nn < n_params: idx_tmp.append(find_nearest(grid_param_list[nn], params[nn], output='index')) else: for ll in range(len(labels)): if labels[counter+ll] in em_grid.keys(): key = labels[counter+ll] counter += 1 break idx_tmp.append(find_nearest(em_grid[key], params_em[key], output='index')) idx_tmp = tuple(idx_tmp) tmp = model_grid[idx_tmp] return tmp[:, 0], tmp[:, 1] else: if len(interp_order) != n_params_tot: msg = "if a tuple, interp_order should have same length as the " msg += "number of grid dimensions" raise TypeError(msg) else: for i in range(n_params_tot): if interp_order[i] not in [-1, 0, 1]: msg = "interp_order values should be -1, 0, or 1" raise TypeError(msg) # first compute new subgrid "coords" for interpolation if verbose: print("Computing new coords for interpolation") constraints = ['floor', 'ceil'] new_coords = np.zeros([n_params_tot, 1]) sub_grid_param = np.zeros([n_params_tot, 2]) counter = 0 for nn in range(n_params_tot): if nn < n_params: grid_tmp = grid_param_list[nn] params_tmp = params[nn] else: for ll in range(len(labels)): if labels[counter+ll] in em_grid.keys(): key = labels[counter+ll] grid_tmp = em_grid[key] params_tmp = params_em[key] counter += 1 break for ii in range(2): try: sub_grid_param[nn, ii] = find_nearest(grid_tmp, params_tmp, constraint=constraints[ii], output='value') except: pdb.set_trace() if interp_order[nn] == -1: num = np.log(params_tmp/sub_grid_param[nn, 0]) denom = np.log(sub_grid_param[nn, 1]/sub_grid_param[nn, 0]) else: num = (params_tmp-sub_grid_param[nn, 0]) denom = (sub_grid_param[nn, 1]-sub_grid_param[nn, 0]) new_coords[nn, 0] = num/denom if interp_order[nn] == 0: new_coords[nn, 0] = round(new_coords[nn, 0]) if verbose: print("Making sub-grid of models") sub_grid = [] sub_grid_lbda = [] if model_grid is None: ntot_subgrid = 2**n_params_tot for dd in range(ntot_subgrid): str_indices = _den_to_bin(dd, n_params_tot) params_tmp = [] for nn in range(n_params): params_tmp.append(sub_grid_param[nn, int(str_indices[nn])]) params_tmp = np.array(params_tmp) lbda, spec = model_reader(params_tmp) if n_em > 0: for nn in range(len(labels)): if labels[nn] in em_grid.keys(): key = labels[nn] spec = inject_em_line(em_lines[key][0], params_em[key], lbda, spec, em_lines[key][2]) sub_grid.append(spec) sub_grid_lbda.append(lbda) # resample to match sparser sampling if required nch = np.amin([len(sub_grid_lbda[i]) for i in range(ntot_subgrid)]) nch_max = np.amax([len(sub_grid_lbda[i]) for i in range(ntot_subgrid)]) if nch_max != nch: min_i = np.argmin([len(sub_grid_lbda[i]) for i in range(ntot_subgrid)]) min_dlbda = np.amin( sub_grid_lbda[min_i][1:]-sub_grid_lbda[min_i][:-1]) if min_dlbda < max_dlbda: bin_fac = int(max_dlbda/min_dlbda) if verbose: msg = "Models will be binned in WL by a factor {} to " msg += "min dlbda = {}mu" print(msg.format(bin_fac, max_dlbda)) nch = int(len(sub_grid_lbda[min_i])/bin_fac) tmp_spec = [] tmp_lbda = [] for bb in range(nch): idx_ini = bb*bin_fac idx_fin = (bb+1)*bin_fac tmp_spec.append( np.mean(sub_grid[min_i][idx_ini:idx_fin])) tmp_lbda.append( np.mean(sub_grid_lbda[min_i][idx_ini:idx_fin])) sub_grid[min_i] = np.array(tmp_spec) sub_grid_lbda[min_i] = np.array(tmp_lbda) for dd in range(ntot_subgrid): cond = False if len(sub_grid_lbda[dd]) != nch: cond = True else: # np.allclose() or np.array_equal are TOO slow dlbda = sub_grid_lbda[min_i][-1]-sub_grid_lbda[min_i][0] dlbda /= nch if np.sum(sub_grid_lbda[dd]-sub_grid_lbda[min_i]) > dlbda: cond = True if cond: if verbose: msg = "Resampling model of different WL sampling. " msg += "This may take a while for high-res/large WL" msg += " ranges..." print(msg) res = resample_model(sub_grid_lbda[min_i], sub_grid_lbda[dd], sub_grid[dd], no_constraint=True, verbose=verbose) sub_grid_lbda[dd], sub_grid[dd] = res # Create array with dimensions 'dims' for each wavelength final_dims = tuple([nch]+[2]*n_params_tot) sub_grid = np.array(sub_grid) sub_grid_lbda = np.array(sub_grid_lbda) sub_grid = np.swapaxes(sub_grid, 0, 1) sub_grid_lbda = np.swapaxes(sub_grid_lbda, 0, 1) sub_grid = sub_grid.reshape(final_dims) sub_grid_lbda = sub_grid_lbda.reshape(final_dims) else: constraints = ['floor', 'ceil'] sub_grid_idx = np.zeros([n_params_tot, 2], dtype=np.int32) #list_idx = [] counter = 0 for nn in range(n_params_tot): if nn < n_params: grid_tmp = grid_param_list[nn] params_tmp = params[nn] else: for ll in range(len(labels)): if labels[counter+ll] in em_grid.keys(): key = labels[counter+ll] grid_tmp = em_grid[key] params_tmp = params_em[key] counter += 1 break for ii in range(2): sub_grid_idx[nn, ii] = find_nearest(grid_tmp, params_tmp, constraint=constraints[ii], output='index') for dd in range(2**n_params_tot): str_indices = _den_to_bin(dd, n_params_tot) idx_tmp = [] for nn in range(n_params_tot): idx_tmp.append(sub_grid_idx[nn, int(str_indices[nn])]) #idx_tmp = sub_grid_idx[nn,int(str_indices[nn])] # list_idx.append(idx_tmp) # list_idx=np.array(list_idx) sub_grid.append(model_grid[tuple(idx_tmp)]) # first reshape sub_grid = np.array(sub_grid) dims = tuple([2]*n_params_tot+[sub_grid.shape[-2]] + [sub_grid.shape[-1]]) sub_grid = sub_grid.reshape(dims) # make last dim (lbda vs flux) come first sub_grid = np.moveaxis(sub_grid, -1, 0) sub_grid_lbda = sub_grid[0] sub_grid = sub_grid[1] # move again axis to have nch as first axis sub_grid = np.moveaxis(sub_grid, -1, 0) sub_grid_lbda = np.moveaxis(sub_grid_lbda, -1, 0) nch = sub_grid.shape[0] interp_model = np.zeros(nch) interp_lbdas = np.zeros(nch) for cc in range(nch): interp_model[cc] = map_coordinates(sub_grid[cc], new_coords, order=1) interp_lbdas[cc] = map_coordinates(sub_grid_lbda[cc], new_coords, order=1) return interp_lbdas, interp_model