Source code for astroquery.linelists.jplspec.core

# Licensed under a 3-clause BSD style license - see LICENSE.rst
import os
import warnings

import astropy.units as u
import numpy as np
from astropy.io import ascii
from astropy import table
from astroquery.query import BaseQuery
from astroquery.linelists.core import parse_letternumber, parse_molid
# import configurable items declared in __init__.py
from astroquery.linelists.jplspec import conf, lookup_table
from astroquery.exceptions import EmptyResponseError, InvalidQueryError
from astroquery.utils import process_asyncs
from urllib.parse import parse_qs


__all__ = ['JPLSpec', 'JPLSpecClass']


def data_path(filename):
    data_dir = os.path.join(os.path.dirname(__file__), 'data')
    return os.path.join(data_dir, filename)



[docs]
class JPLSpecClass(BaseQuery):

    # use the Configuration Items imported from __init__.py
    URL = conf.server
    TIMEOUT = conf.timeout

    def __init__(self):
        super().__init__()


[docs]
    def query_lines_async(self, min_frequency, max_frequency, *,
                          min_strength=-500,
                          max_lines=2000, molecule='All', flags=0,
                          parse_name_locally=False,
                          get_query_payload=False, cache=True
                          ):
        """
        Creates an HTTP POST request based on the desired parameters and
        returns a response.

        Parameters
        ----------
        min_frequency : `astropy.units`
            Minimum frequency (or any spectral() equivalent)
        max_frequency : `astropy.units`
            Maximum frequency (or any spectral() equivalent)
        min_strength : int, optional
            Minimum strength in catalog units, the default is -500
        max_lines :  int, optional
            Maximum number of lines to query, the default is 2000.
            The most the query allows is 100000

        molecule : list, string of regex if parse_name_locally=True, optional
            Identifiers of the molecules to search for. If this parameter
            is not provided the search will match any species. Default is 'All'.

        flags : int, optional
            Regular expression flags. Default is set to 0

        parse_name_locally : bool, optional
            When set to True it allows the method to parse through catdir.cat
            in order to match the regex inputted in the molecule parameter
            and request the corresponding tags of the matches instead. Default
            is set to False

        get_query_payload : bool, optional
            When set to `True` the method should return the HTTP request
            parameters as a dict. Default value is set to False
        cache : bool
            Defaults to True. If set overrides global caching behavior.
            See :ref:`caching documentation <astroquery_cache>`.

        Returns
        -------
        response : `requests.Response`
            The HTTP response returned from the service.

        Examples
        --------
        >>> table = JPLSpec.query_lines(min_frequency=100*u.GHz,
        ...                             max_frequency=200*u.GHz,
        ...                             min_strength=-500, molecule=18003) # doctest: +REMOTE_DATA
        >>> print(table) # doctest: +SKIP
            FREQ     ERR    LGINT    DR    ELO    GUP  TAG   QNFMT   QN'      QN"
        ----------- ------ -------- --- --------- --- ------ ----- -------- --------
        115542.5692 0.6588 -13.2595   3 4606.1683  35  18003  1404 17 810 0 18 513 0
         139614.293   0.15  -9.3636   3 3080.1788  87 -18003  1404 14 6 9 0 15 312 0
         177317.068   0.15 -10.3413   3 3437.2774  31 -18003  1404 15 610 0 16 313 0
         183310.087  0.001  -3.6463   3  136.1639   7 -18003  1404  3 1 3 0  2 2 0 0
        """
        # first initialize the dictionary of HTTP request parameters
        payload = dict()

        if min_frequency is not None and max_frequency is not None:
            # allow setting payload without having *ANY* valid frequencies set
            min_frequency = min_frequency.to(u.GHz, u.spectral())
            max_frequency = max_frequency.to(u.GHz, u.spectral())
            if min_frequency > max_frequency:
                min_frequency, max_frequency = max_frequency, min_frequency

            payload['MinNu'] = min_frequency.value
            payload['MaxNu'] = max_frequency.value

        if max_lines is not None:
            payload['MaxLines'] = max_lines

        payload['UnitNu'] = 'GHz'
        payload['StrLim'] = min_strength

        if molecule is not None:
            if parse_name_locally:
                self.lookup_ids = build_lookup()
                payload['Mol'] = tuple(self.lookup_ids.find(molecule, flags).values())
                if len(molecule) == 0:
                    raise InvalidQueryError('No matching species found. Please '
                                            'refine your search or read the Docs '
                                            'for pointers on how to search.')
            else:
                payload['Mol'] = molecule

        self.maxlines = max_lines

        payload = list(payload.items())

        if get_query_payload:
            return payload
        # BaseQuery classes come with a _request method that includes a
        # built-in caching system
        response = self._request(method='POST', url=self.URL, data=payload,
                                 timeout=self.TIMEOUT, cache=cache)
        response.raise_for_status()

        return response



[docs]
    def query_lines(self, min_frequency, max_frequency, *,
                    min_strength=-500,
                    max_lines=2000, molecule='All', flags=0,
                    parse_name_locally=False,
                    get_query_payload=False,
                    fallback_to_getmolecule=True,
                    cache=True):
        """
        Query the JPLSpec service for spectral lines.

        This is a synchronous version of `query_lines_async`.
        See `query_lines_async` for full parameter documentation.

        fallback_to_getmolecule is a unique parameter to this method that
        governs whether `get_molecule` will be used when no results are returned
        by the query service.  This workaround is needed while JPLSpec's query
        tool is broken.
        """
        response = self.query_lines_async(min_frequency=min_frequency,
                                          max_frequency=max_frequency,
                                          min_strength=min_strength,
                                          max_lines=max_lines,
                                          molecule=molecule,
                                          flags=flags,
                                          parse_name_locally=parse_name_locally,
                                          get_query_payload=get_query_payload,
                                          cache=cache)
        if get_query_payload:
            return response
        else:
            return self._parse_result(response, fallback_to_getmolecule=fallback_to_getmolecule)


    query_lines.__doc__ = process_asyncs.async_to_sync_docstr(query_lines_async.__doc__)

    def _parse_result(self, response, *, verbose=False, fallback_to_getmolecule=False):
        """
        Parse a response into an `~astropy.table.Table`

        The catalog data files are composed of 80-character card images, with
        one card image per spectral line.  The format of each card image is:
        FREQ, ERR, LGINT, DR,  ELO, GUP, TAG, QNFMT,  QN',  QN"
        (F13.4,F8.4, F8.4,  I2,F10.4,  I3,  I7,    I4,  6I2,  6I2)

        FREQ:  Frequency of the line in MHz.
        ERR:   Estimated or experimental error of FREQ in MHz.
        LGINT: Base 10 logarithm of the integrated intensity in units of nm^2 MHz at
            300 K.

        DR:    Degrees of freedom in the rotational partition function (0 for atoms,
            2 for linear molecules, and 3 for nonlinear molecules).

        ELO:   Lower state energy in cm^{-1} relative to the ground state.
        GUP:   Upper state degeneracy.
        TAG:   Species tag or molecular identifier.
            A negative value flags that the line frequency has
            been measured in the laboratory.  The absolute value of TAG is then the
            species tag and ERR is the reported experimental error.  The three most
            significant digits of the species tag are coded as the mass number of
            the species.

        QNFMT: Identifies the format of the quantum numbers
        QN':   Quantum numbers for the upper state.
        QN":   Quantum numbers for the lower state.
        """

        if 'Zero lines were found' in response.text:
            if fallback_to_getmolecule:
                self.lookup_ids = build_lookup()
                payload = parse_qs(response.request.body)
                tbs = [self.get_molecule(mol) for mol in payload['Mol']]
                if len(tbs) > 1:
                    mols = []
                    for tb, mol in zip(tbs, payload['Mol']):
                        tb['Name'] = self.lookup_ids.find(mol, flags=0)
                        for key in list(tb.meta.keys()):
                            tb.meta[f'{mol}_{key}'] = tb.meta.pop(key)
                        mols.append(mol)
                    tb = table.vstack(tbs)
                    tb.meta['molecule_list'] = mols
                else:
                    tb = tbs[0]
                    tb.meta['molecule_id'] = payload['Mol'][0]
                    tb.meta['molecule_name'] = self.lookup_ids.find(payload['Mol'][0], flags=0)

                return tb
            else:
                raise EmptyResponseError(f"Response was empty; message was '{response.text}'.")

        # data starts at 0 since regex was applied
        # Warning for a result with more than 1000 lines:
        # THIS form is currently limited to 1000 lines.
        result = ascii.read(response.text, header_start=None, data_start=0,
                            comment=r'THIS|^\s{12,14}\d{4,6}.*|CADDIR CATDIR',
                            names=('FREQ', 'ERR', 'LGINT', 'DR', 'ELO', 'GUP',
                                   'TAG', 'QNFMT', 'QN\'', 'QN"'),
                            col_starts=(0, 13, 21, 29, 31, 41, 44, 51, 55, 67),
                            format='fixed_width', fast_reader=False)

        if len(result) > self.maxlines:
            warnings.warn("This form is currently limited to {0} lines."
                          "Please limit your search.".format(self.maxlines))

        result['FREQ'].unit = u.MHz
        result['ERR'].unit = u.MHz
        result['LGINT'].unit = u.nm**2 * u.MHz
        result['ELO'].unit = u.cm**(-1)

        return result


[docs]
    def get_species_table(self, *, catfile='catdir.cat'):
        """
        A directory of the catalog is found in a file called 'catdir.cat.'
        Each element of this directory is an 80-character record with the
        following format:

        | TAG,  NAME, NLINE,  QLOG,  VER
        | (I6,X, A13, I6, 7F7.4,  I2)

        Parameters
        ----------
        catfile : str, name of file, default 'catdir.cat'
            The catalog file, installed locally along with the package

        Returns
        -------
        Table: `~astropy.table.Table`
            | TAG : The species tag or molecular identifier.
            | NAME : An ASCII name for the species.
            | NLINE : The number of lines in the catalog.
            | QLOG : A seven-element vector containing the base 10 logarithm of
                the partition function for temperatures of 300 K, 225 K, 150 K,
                75 K, 37.5 K, 18.75 K, and 9.375 K, respectively.
            | VER : The version of the calculation for this species in the catalog.
                The version number is followed by * if the entry is newer than the
                last edition of the catalog.

        """

        result = ascii.read(data_path(catfile), header_start=None, data_start=0,
                            names=('TAG', 'NAME', 'NLINE', 'QLOG1', 'QLOG2',
                                   'QLOG3', 'QLOG4', 'QLOG5', 'QLOG6',
                                   'QLOG7', 'VER'),
                            col_starts=(0, 6, 20, 26, 33, 40, 47, 54, 61,
                                        68, 75),
                            format='fixed_width', fast_reader=False)

        # store the corresponding temperatures as metadata
        result['QLOG1'].meta = {'Temperature (K)': 300}
        result['QLOG2'].meta = {'Temperature (K)': 225}
        result['QLOG3'].meta = {'Temperature (K)': 150}
        result['QLOG4'].meta = {'Temperature (K)': 75}
        result['QLOG5'].meta = {'Temperature (K)': 37.5}
        result['QLOG6'].meta = {'Temperature (K)': 18.75}
        result['QLOG7'].meta = {'Temperature (K)': 9.375}
        result.meta = {'Temperature (K)': [300, 225, 150, 75, 37.5, 18.5,
                                           9.375]}

        return result



[docs]
    def get_molecule(self, molecule_id, *, cache=True):
        """
        Retrieve the whole molecule table for a given molecule id from the JPL catalog.

        Parameters
        ----------
        molecule_id : int or str
            The molecule tag/identifier. Can be an integer (e.g., 18003 for H2O)
            or a zero-padded 6-character string (e.g., '018003').
        cache : bool
            Defaults to True. If set overrides global caching behavior.

        Returns
        -------
        Table : `~astropy.table.Table`
            Table containing all spectral lines for the requested molecule.

        Examples
        --------
        >>> table = JPLSpec.get_molecule(18003)  # doctest: +SKIP
        >>> print(table)  # doctest: +SKIP
        """
        molecule_str = parse_molid(molecule_id)

        # Construct the URL to the catalog file
        url = f'https://spec.jpl.nasa.gov/ftp/pub/catalog/c{molecule_str}.cat'

        # Request the catalog file
        response = self._request(method='GET', url=url,
                                 timeout=self.TIMEOUT, cache=cache)
        response.raise_for_status()

        if 'The requested URL was not found on this server.' in response.text:
            raise EmptyResponseError(f"No data found for molecule ID {molecule_id}.")

        # Parse the catalog file
        result = self._parse_cat(response)

        # Add metadata from species table
        species_table = self.get_species_table()
        # Find the row matching this molecule_id
        int_molecule_id = int(molecule_str)
        matching_rows = species_table[species_table['TAG'] == int_molecule_id]
        if len(matching_rows) > 0:
            # Add metadata as a dictionary
            result.meta = dict(zip(matching_rows.colnames, matching_rows[0]))

        return result


    def _parse_cat(self, response, *, verbose=False):
        """
        Parse a JPL-format catalog file into an `~astropy.table.Table`.

        The catalog data files are composed of 80-character card images, with
        one card image per spectral line.  The format of each card image is:
        FREQ, ERR, LGINT, DR,  ELO, GUP, TAG, QNFMT,  QN',  QN"
        (F13.4,F8.4, F8.4,  I2,F10.4,  I3,  I7,    I4,  6I2,  6I2)

        https://spec.jpl.nasa.gov/ftp/pub/catalog/doc/catintro.pdf

        Parameters
        ----------
        text : str
            The catalog file text content.
        verbose : bool, optional
            Not used currently.

        Returns
        -------
        Table : `~astropy.table.Table`
            Parsed catalog data.
        """
        text = response.text
        if 'Zero lines were found' in text or len(text.strip()) == 0:
            raise EmptyResponseError(f"Response was empty; message was '{text}'.")

        # Parse the catalog file with fixed-width format
        # Format: FREQ(13.4), ERR(8.4), LGINT(8.4), DR(2), ELO(10.4), GUP(3), TAG(7), QNFMT(4), QN'(12), QN"(12)
        result = ascii.read(text, header_start=None, data_start=0,
                            comment=r'THIS|^\s{12,14}\d{4,6}.*',
                            names=('FREQ', 'ERR', 'LGINT', 'DR', 'ELO', 'GUP',
                                   'TAG', 'QNFMT', 'QN\'', 'QN"'),
                            col_starts=(0, 13, 21, 29, 31, 41, 44, 51, 55, 67),
                            format='fixed_width', fast_reader=False)

        # Ensure TAG is integer type
        result['TAG'] = result['TAG'].astype(int)

        # Add units
        result['FREQ'].unit = u.MHz
        result['ERR'].unit = u.MHz
        result['LGINT'].unit = u.nm**2 * u.MHz
        result['ELO'].unit = u.cm**(-1)

        # split table by qnfmt; each chunk must be separately parsed.
        qnfmts = np.unique(result['QNFMT'])
        tables = [result[result['QNFMT'] == qq] for qq in qnfmts]

        # some tables have +/-/blank entries in QNs
        # pm_is_ok should be True when the QN columns contain '+' or '-'.
        # (can't do a str check on np.integer dtype so have to filter that out first)
        pm_is_ok = ((not np.issubdtype(result["QN'"].dtype, np.integer))
                    and any(('+' in str(line) or '-' in str(line)) for line in result["QN'"]))

        def int_or_pm(st):
            try:
                return int(st)
            except ValueError:
                try:
                    return parse_letternumber(st)
                except ValueError:
                    if pm_is_ok and (st.strip() == '' or st.strip() == '+' or st.strip() == '-'):
                        return st.strip()
                    else:
                        raise ValueError(f'"{st}" is not a valid +/-/blank entry')

        # At least this molecule, NH, claims 5 QNs but has only 4
        bad_qnfmt_dict = {
            15001: 1234,
        }
        mol_tag = result['TAG'][0]

        if mol_tag in (32001,):
            raise NotImplementedError("Molecule O2 (32001) does not follow the format standard.")

        for tbl in tables:
            if mol_tag in bad_qnfmt_dict:
                n_qns = bad_qnfmt_dict[mol_tag] % 10
            else:
                n_qns = tbl['QNFMT'][0] % 10
            if n_qns > 1:
                qnlen = 2 * n_qns
                for ii in range(n_qns):
                    if tbl["QN'"].dtype in (int, np.int32, np.int64):
                        # for the case where it was already parsed as int
                        # (53005 is an example)
                        tbl[f"QN'{ii+1}"] = tbl["QN'"]
                        tbl[f'QN"{ii+1}'] = tbl['QN"']
                    else:
                        # string parsing can truncate to length=2n or 2n-1 depending
                        # on whether there are any two-digit QNs in the column
                        ind1 = ii * 2
                        ind2 = ii * 2 + 2
                        # rjust(qnlen) is needed to enforce that all strings retain their exact original shape
                        qnp = [int_or_pm(line.rjust(qnlen)[ind1: ind2].strip()) for line in tbl['QN\'']]
                        qnpp = [int_or_pm(line.rjust(qnlen)[ind1: ind2].strip()) for line in tbl['QN"']]
                        dtype = str if any('+' in str(x) for x in qnp) else int
                        tbl[f"QN'{ii+1}"] = np.array(qnp, dtype=dtype)
                        tbl[f'QN"{ii+1}'] = np.array(qnpp, dtype=dtype)
                del tbl['QN\'']
                del tbl['QN"']
            else:
                tbl['QN\''] = np.array(list(map(parse_letternumber, tbl['QN\''])), dtype=int)
                tbl['QN"'] = np.array(list(map(parse_letternumber, tbl['QN"'])), dtype=int)

        result = table.vstack(tables)

        # Add laboratory measurement flag
        # A negative TAG value indicates laboratory-measured frequency
        result['Lab'] = result['TAG'] < 0
        # Convert TAG to absolute value
        result['TAG'] = abs(result['TAG'])

        return result



JPLSpec = JPLSpecClass()


def build_lookup():

    result = JPLSpec.get_species_table()
    keys = list(result['NAME'])
    values = list(result['TAG'])
    dictionary = dict(zip(keys, values))
    lookuptable = lookup_table.Lookuptable(dictionary)  # apply the class above

    return lookuptable
Navigation

Source code for astroquery.linelists.jplspec.core