Source code for infobs.util.ism_lines_helpers

"""
Implements helpers to process spectral lines formatted according to Meudon PDR code standards.
"""

import re
from typing import List, Optional, Tuple, Union
from warnings import warn

__all__ = [
    "Settings",
    "molecule_and_transition",
    "molecule",
    "transition",
    "is_line_of",
    "filter_molecules",
    "molecules_among_lines",
    "molecule_to_latex",
    "transition_to_latex",
    "line_to_latex",
    "remove_hyperfine",
    "is_hyperfine",
]


## Global settings


[docs] class Settings: math_mode: bool = ( True # Controls whether latex code will be embedded in a math mode ) ignored_transitions: List[ str ] = [] # Defines transitions to ignores (see _energy_to_latex) ignore_electronic: bool = ( False # Choose whether to ignore electronic configurations ) ignore_litterals: bool = False # Choose whether to ignore other configurations only_rotational: bool = ( False # Choose a simplified description with only the rotational transition (J) )
## Dicts # Molecular names to latex _molecules_to_latex = { "h": "H", "h2": "H_2", "hd": "HD", "co": "^{12}CO", "13c_o": "^{13}CO", "c_18o": "C^{18}O", "13c_18o": "^{13}C^{18}O", "c": "C", "n": "N", "o": "O", "s": "S", "si": "Si", "cs": "^{12}CS", "cn": "^{12}CN", "hcn": "HCN", "hnc": "HNC", "oh": "OH", "h2o": "H_2O", "h2_18o": "H_2^{18}O", "c2h": "C_2H", "c_c3h2": "c-C_3H_2", "so": "SO", "cp": "C^+", "sp": "S^+", "hcop": "HCO^+", "chp": "CH^+", "ohp": "OH^+", "shp": "SH^+", } # Molecular names aliases _molecules_aliases = { "13co": "13c_o", "c18o": "c_18o", "13c18o": "13c_18o", "cc3h2": "c_c3h2", } # Energy to LaTeX _energy_to_latex = { "j": "J={}", "v": "\\nu={}", "f": "F={}", "n": "n={}", "ka": "k_a={}", "kc": "k_c={}", "fif": "F_i=F_{}", } # Electronic state to LaTeX _elstate_to_latex = { "s": "{}s", "p": "{}p", "d": "{}d", "f": "{}f", "so": "{}S_o", "po": "{}P_o", "do": "{}D_o", "fo": "{}F_o", } # Literal to LaTeX _literal_to_latex = {"pp": "p=+", "pm": "p=-"} ## Public functions
[docs] def molecule_and_transition(line_name: str) -> Tuple[str, str]: """ Returns the raw strings of the molecule name and the transition. Parameters ---------- line_name : str Formatted line. Returns ------- str Raw string representing the molecule. str Raw string representing the transition. """ # Check if the input is in the good format if not "_" in line_name: raise ValueError( f"line_name {line_name} is not in the appropriate format molecule_transition" ) line_name = line_name.lower().strip() # Search for all matching prefixes prefixes = [s for s in _molecules_to_latex if line_name.startswith(s)] if len(prefixes) == 0: return tuple(line_name.split("_", maxsplit=1)) # Select the longest prefix idxmax = lambda ls: max(range(len(ls)), key=ls.__getitem__) prefix = prefixes[idxmax([len(s) for s in prefixes])] # Select the remaining suffix suffix = line_name[len(prefix) + 1 :] return prefix, suffix
[docs] def molecule(line_name: str) -> str: """ Returns the raw strings of the molecule name. Parameters ---------- line_name : str Formatted line. Returns ------- str Raw string representing the molecule. """ return molecule_and_transition(line_name)[0]
[docs] def transition(line_name: str) -> str: """ Returns the raw strings of the transition. Parameters ---------- line_name : str Formatted line. Returns ------- str Raw string representing the transition. """ return molecule_and_transition(line_name)[1]
[docs] def molecules_among_lines(names: List[str]) -> List[str]: """ Returns the list of molecules (without duplicates) of lines in `names`. Parameters ---------- names : list of str List of formatted lines. Returns ------- List of str List of formatted molecules without duplicates. """ return list(dict.fromkeys([molecule_and_transition(name)[0] for name in names]))
[docs] def is_line_of(name: str, mol: str) -> bool: """ Returns True if `name` is a line of the chemical species `mol`, else False. Parameters ---------- name: str Formatted line. mol: str Molecule. Returns ------- bool Whether `name` is a line of `mol`. """ mol = mol.strip().lower() return molecule(name) == mol
[docs] def filter_molecules(names: List[str], mols: Union[str, List[str], None]) -> List[str]: """ Returns a sublist of `names` with only lines of molecules contained in `mols`. Parameters ---------- names : list of str List of formatted lines. mols : str or List of str or None Molecule or list of molecules that you want to select. If None, the function just returns the input list `names`. Returns ------- List of str Sublist of `names`. """ if mols is None: return names if isinstance(mols, str): mols = [mols] for i, mol in enumerate(mols): mols[i] = mol.strip().lower() mols = [ (_molecules_aliases[mol] if mol in _molecules_aliases else mol) for mol in mols ] lines_mols = [molecule(name) for name in names] indices = [i for i, line_mol in enumerate(lines_mols) if line_mol in mols] return [names[i] for i in indices]
[docs] def molecule_to_latex(molecule: str) -> str: """ Returns a well displayed version of the formatted molecule or radical `molecule`. Parameters ---------- molecule : str Formatted molecule or radical. Returns ------- str LaTeX string representing `molecule`. """ if molecule in _molecules_to_latex: latex_molecule = "\\mathrm{{{}}}".format(_molecules_to_latex[molecule]) else: latex_molecule = molecule.translate(None, "_^") if Settings.math_mode: return "$" + latex_molecule + "$" return latex_molecule
[docs] def transition_to_latex(trans: str) -> str: """ Returns a well displayed version of the formatted transition `trans`. Parameters ---------- trans : str Formatted transition. Returns ------- str LaTeX string representing `trans`. """ names, high_lvls, low_lvls = _list_transitions(trans) if len(names) == 0: return "" if Settings.only_rotational: latex_transition = _simplified_transition(names, high_lvls, low_lvls) else: latex_transition = _sort_transitions(names, high_lvls, low_lvls) if Settings.math_mode: return "$" + latex_transition + "$" return latex_transition
[docs] def line_to_latex(line_name: str) -> str: """ Returns a well displayed version of the formatted line `line_name`. Parameters ---------- line_name : str Formatted line. Returns ------- str LaTeX string representing `line_name`. """ prefix, suffix = molecule_and_transition(line_name) # Convert the prefix in LaTeX latex_prefix = molecule_to_latex(prefix).replace("$", "") # Convert the suffix in LaTeX latex_suffix = transition_to_latex(suffix).replace("$", "") out = latex_prefix + "\\," + latex_suffix # out = out.replace(" ", " ") # Remove double spaces if Settings.math_mode: return "$" + out + "$" return out
[docs] def remove_hyperfine(line_name: str) -> str: """ Returns the formatted line `line_name` without the degenerate energy levels. If there is no such levels, this function returns a copy of the input. Parameters ---------- line_name : str Formatted line. Returns ------- str New formatted line name without degenerate energy levels. """ mol, trans = molecule_and_transition(line_name) if trans.count("__") != 1: raise ValueError( f"{transition} is not a valid transition because it does not contain one occurence of the double underscore" ) trans_high, trans_low = trans.split("__") for prefix in ["f"]: trans_high = "_".join( [s for s in trans_high.split("_") if not s.startswith(prefix)] ) trans_low = "_".join( [s for s in trans_low.split("_") if not s.startswith(prefix)] ) return f"{mol}_{trans_high}__{trans_low}"
[docs] def is_hyperfine(line: str, other: Optional[str] = None) -> bool: """ Returns whether the formatted line `line` contains hyperfine levels. If `other` is not None, returns whether the two lines correspond to the same hyperfine structure. If `line` and `other` are the exact same line, returns True. Parameters ---------- line : str Formatted line. other : str, optional Other formatted line. Default: None. Returns ------- bool Whether `line` contains hyperfine levels or whether line and other belongs to the same hyperfine structure. """ _line = remove_hyperfine(line) if other is None: return line != _line _other = remove_hyperfine(other) return _line == _other
# Local functions def _list_transitions(trans: str) -> Tuple[List[str], List[float], List[float]]: """ Returns the lists of energy level names, high energy level and low energy level. """ if trans.count("__") != 1: raise ValueError( f"{trans} is not a valid transition because it does not contain one occurence of the double underscore" ) high, low = trans.split("__") names = [] high_lvls, low_lvls = [], [] while high != "" and low != "": # Match energy levels res_high = re.match("\A(fif|j|v|n|f|ka|kc)(\d*_\d\d*|\d*d\d*|\d*)", high) res_low = re.match("\A(fif|j|v|n|f|ka|kc)(\d*_\d\d*|\d*d\d*|\d*)", low) if res_high is not None and res_low is not None: e_high, e_low = high[: res_high.end()], low[: res_low.end()] n_high = re.match("\A(fif|j|v|n|f|ka|kc)", e_high).group() n_low = re.match("\A(fif|j|v|n|f|ka|kc)", e_low).group() if n_high != n_low: raise ValueError( f"{trans} is not a valid transition because the energy levels are not in the same order in the description of the high and low levels" ) if ( not Settings.only_rotational and not n_high in Settings.ignored_transitions ) or (Settings.only_rotational and n_high == "j"): names.append(n_high) high_lvls.append( _removeprefixes(e_high, n_high).replace("_", "/").replace("d", ".") ) low_lvls.append( _removeprefixes(e_low, n_low).replace("_", "/").replace("d", ".") ) high = _removeprefixes(high, e_high, "_") low = _removeprefixes(low, e_low, "_") continue # Match electronic state res_high = re.match("\Ael\d*(po|so|do|p|s|d)", high) res_low = re.match("\Ael\d*(po|so|do|p|s|d)", low) if res_high is not None and res_low is not None: e_high, e_low = high[: res_high.end()], low[: res_low.end()] if not Settings.ignore_electronic and not Settings.only_rotational: names.append("el") high_lvls.append(_removeprefixes(e_high, "el")) low_lvls.append(_removeprefixes(e_low, "el")) high = _removeprefixes(high, e_high, "_") low = _removeprefixes(low, e_low, "_") continue # Match literals res_high = re.match("\A(pp|pm)", high) res_low = re.match("\A(pp|pm)", low) if res_high is not None and res_low is not None: e_high, e_low = high[: res_high.end()], low[: res_low.end()] if not Settings.ignore_litterals and not Settings.only_rotational: names.append("lit") high_lvls.append(e_high) low_lvls.append(e_low) high = _removeprefixes(high, e_high, "_") low = _removeprefixes(low, e_low, "_") continue if high == "" and low != "" or high != "" and low == "": raise RuntimeError( "high and low levels does not contain the same number of variables" ) raise ValueError(f"transition {trans} is not a valid formatted line") return names, high_lvls, low_lvls def _removeprefixes(string: str, *prefixes: str) -> str: """ Returns a str with the given prefix string removed if present. Return a copy of `string` with the prefixes `prefixes` removed iteratively if they exists. Note ---- This method doesn't use the builtin method `removeprefix` to ensure the code to be available to users with `Python < 3.9`. """ for prefix in prefixes: if string.startswith(prefix): string = string[len(prefix) :] return string[:] def _numerical_to_latex(num: str) -> str: """ Returns a LaTeX string representing a numerical value `num`. This value can be formatted in several ways: `'a'`, `'a/b'` or `'a.b'` where a and b are integers, potentially over several digits. Parameters ---------- num : str Formatted number. Returns ------- str LaTeX representation of the number. """ if re.match("\A\d*[/.]\d*\Z", num) is None: return num if "/" in num: a, b = num.split("/") n, d = int(a), int(b) else: a, b = num.split(".") if b == "0": n, d = int(a), 1 elif b == "5": n, d = 2 * int(a) + 1, 2 else: warn(f"x.{b} floats has not been implemented. Ignoring the floating part.") n, d = int(a), 1 # Default behavior if n % d == 0: num_latex = f"{n // d}" else: num_latex = r"\frac{" + str(n) + r"}{" + str(d) + r"}" return num_latex def _transition(name: str, high_lvl: str, low_lvl: str) -> Tuple[str, str]: """ Returns a LaTeX string representing a non electronic transition. Parameters ---------- name : str Energy name. high_lvl : str Higher energy level. low_lvl : str Lower energy level. Can be the same as `high_lvl`. Returns ------- str Higher energy level formatted in LaTeX. str Lower energy level formatted in LaTeX. May be the same as the higher level. """ if name in _energy_to_latex: name_latex = _energy_to_latex[name] else: name_latex = name + "={}" # Default behavior for unknown name high_lvl_latex = _numerical_to_latex(high_lvl) low_lvl_latex = _numerical_to_latex(low_lvl) return (name_latex.format(high_lvl_latex), name_latex.format(low_lvl_latex)) def _eltransition(high: str, low: str) -> Tuple[str, str]: """ Returns a LaTeX string representing an electronic transition. Parameters ---------- high : str Higher energy electronic configuration. low : str Lower energy electronic configuration. Can be the same as `high`. Returns ------- str Higher energy electronic configuration formatted in LaTeX. str Lower energy electronic configuration formatted in LaTeX. May be the same as the higher configuration. """ num_high, orb_high = high[0], high[1:] num_low, orb_low = low[0], low[1:] return ( ( _elstate_to_latex[orb_high].format(num_high) if orb_high in _elstate_to_latex else high ), ( _elstate_to_latex[orb_low].format(num_low) if orb_low in _elstate_to_latex else low ), ) def _littransition(high: str, low: str) -> Tuple[str, str]: """ Returns a LaTeX string representing whatever transition. Parameters ---------- high : str Higher configuration. low : str Lower configuration. Can be the same as `high`. Returns ------- str Higher configuration formatted in LaTeX. str Lower configuration formatted in LaTeX. """ return ( _literal_to_latex[high] if high in _literal_to_latex else high, _literal_to_latex[low] if low in _literal_to_latex else low, ) def _sort_transitions( names: List[str], high_lvls: List[int], low_lvls: List[int] ) -> str: """ Returns a LaTeX string representing the energy transitions. This string first display the constant energy levels and then the energy transitions. Parameters ---------- names : list of str Energies names. high_lvls : list of int List of higher level for each energy. low_lvls : List of int. List of lower level for each energy. Returns ------- str String representing first the constant energy levels and then the energy transitions. """ if len(high_lvls) != len(names) or len(low_lvls) != len(names): raise ValueError("names, high_lvls and low_lvls must have the same length") if len(names) == 0: return "" descr_a, descr_b = "", "" for name, high, low in zip(names, high_lvls, low_lvls): if name == "lit": descr = _littransition(high, low) elif name == "el": descr = _eltransition(high, low) else: descr = _transition(name, high, low) descr_a += descr[0] + ",\\," descr_b += descr[1] + ",\\," return "({}\\,\\to\\,{})".format(descr_a[:-3], descr_b[:-3]) def _simplified_transition( names: List[str], high_lvls: List[int], low_lvls: List[int] ) -> str: """ Returns a LaTeX string representing only the rotational level transitions. Parameters ---------- names : list of str Energies names with a single element. high_lvls : list of int List of higher level with a single element. low_lvls : List of int. List of lower level with a single element. Returns ------- str String representing the rotation level transitions. """ assert len(names) == len(high_lvls) == len(low_lvls) == 1 assert names[0] == "j" h, l = high_lvls[0], low_lvls[0] # return "({}\\,\\to\\,{})".format(_numerical_to_latex(h), _numerical_to_latex(l)) return "({}-{})".format(_numerical_to_latex(h), _numerical_to_latex(l))