# Licensed under a 3-clause BSD style license - see LICENSE.rst
import os
import warnings
import astropy.units as u
import numpy as np
from astropy.io import ascii
from astropy import table
from astroquery.query import BaseQuery
from astroquery.linelists.core import parse_letternumber, parse_molid
# import configurable items declared in __init__.py
from astroquery.linelists.jplspec import conf, lookup_table
from astroquery.exceptions import EmptyResponseError, InvalidQueryError
from astroquery.utils import process_asyncs
from urllib.parse import parse_qs
__all__ = ['JPLSpec', 'JPLSpecClass']
def data_path(filename):
data_dir = os.path.join(os.path.dirname(__file__), 'data')
return os.path.join(data_dir, filename)
[docs]
class JPLSpecClass(BaseQuery):
# use the Configuration Items imported from __init__.py
URL = conf.server
TIMEOUT = conf.timeout
def __init__(self):
super().__init__()
[docs]
def query_lines_async(self, min_frequency, max_frequency, *,
min_strength=-500,
max_lines=2000, molecule='All', flags=0,
parse_name_locally=False,
get_query_payload=False, cache=True
):
"""
Creates an HTTP POST request based on the desired parameters and
returns a response.
Parameters
----------
min_frequency : `astropy.units`
Minimum frequency (or any spectral() equivalent)
max_frequency : `astropy.units`
Maximum frequency (or any spectral() equivalent)
min_strength : int, optional
Minimum strength in catalog units, the default is -500
max_lines : int, optional
Maximum number of lines to query, the default is 2000.
The most the query allows is 100000
molecule : list, string of regex if parse_name_locally=True, optional
Identifiers of the molecules to search for. If this parameter
is not provided the search will match any species. Default is 'All'.
flags : int, optional
Regular expression flags. Default is set to 0
parse_name_locally : bool, optional
When set to True it allows the method to parse through catdir.cat
in order to match the regex inputted in the molecule parameter
and request the corresponding tags of the matches instead. Default
is set to False
get_query_payload : bool, optional
When set to `True` the method should return the HTTP request
parameters as a dict. Default value is set to False
cache : bool
Defaults to True. If set overrides global caching behavior.
See :ref:`caching documentation <astroquery_cache>`.
Returns
-------
response : `requests.Response`
The HTTP response returned from the service.
Examples
--------
>>> table = JPLSpec.query_lines(min_frequency=100*u.GHz,
... max_frequency=200*u.GHz,
... min_strength=-500, molecule=18003) # doctest: +REMOTE_DATA
>>> print(table) # doctest: +SKIP
FREQ ERR LGINT DR ELO GUP TAG QNFMT QN' QN"
----------- ------ -------- --- --------- --- ------ ----- -------- --------
115542.5692 0.6588 -13.2595 3 4606.1683 35 18003 1404 17 810 0 18 513 0
139614.293 0.15 -9.3636 3 3080.1788 87 -18003 1404 14 6 9 0 15 312 0
177317.068 0.15 -10.3413 3 3437.2774 31 -18003 1404 15 610 0 16 313 0
183310.087 0.001 -3.6463 3 136.1639 7 -18003 1404 3 1 3 0 2 2 0 0
"""
# first initialize the dictionary of HTTP request parameters
payload = dict()
if min_frequency is not None and max_frequency is not None:
# allow setting payload without having *ANY* valid frequencies set
min_frequency = min_frequency.to(u.GHz, u.spectral())
max_frequency = max_frequency.to(u.GHz, u.spectral())
if min_frequency > max_frequency:
min_frequency, max_frequency = max_frequency, min_frequency
payload['MinNu'] = min_frequency.value
payload['MaxNu'] = max_frequency.value
if max_lines is not None:
payload['MaxLines'] = max_lines
payload['UnitNu'] = 'GHz'
payload['StrLim'] = min_strength
if molecule is not None:
if parse_name_locally:
self.lookup_ids = build_lookup()
payload['Mol'] = tuple(self.lookup_ids.find(molecule, flags).values())
if len(molecule) == 0:
raise InvalidQueryError('No matching species found. Please '
'refine your search or read the Docs '
'for pointers on how to search.')
else:
payload['Mol'] = molecule
self.maxlines = max_lines
payload = list(payload.items())
if get_query_payload:
return payload
# BaseQuery classes come with a _request method that includes a
# built-in caching system
response = self._request(method='POST', url=self.URL, data=payload,
timeout=self.TIMEOUT, cache=cache)
response.raise_for_status()
return response
[docs]
def query_lines(self, min_frequency, max_frequency, *,
min_strength=-500,
max_lines=2000, molecule='All', flags=0,
parse_name_locally=False,
get_query_payload=False,
fallback_to_getmolecule=True,
cache=True):
"""
Query the JPLSpec service for spectral lines.
This is a synchronous version of `query_lines_async`.
See `query_lines_async` for full parameter documentation.
fallback_to_getmolecule is a unique parameter to this method that
governs whether `get_molecule` will be used when no results are returned
by the query service. This workaround is needed while JPLSpec's query
tool is broken.
"""
response = self.query_lines_async(min_frequency=min_frequency,
max_frequency=max_frequency,
min_strength=min_strength,
max_lines=max_lines,
molecule=molecule,
flags=flags,
parse_name_locally=parse_name_locally,
get_query_payload=get_query_payload,
cache=cache)
if get_query_payload:
return response
else:
return self._parse_result(response, fallback_to_getmolecule=fallback_to_getmolecule)
query_lines.__doc__ = process_asyncs.async_to_sync_docstr(query_lines_async.__doc__)
def _parse_result(self, response, *, verbose=False, fallback_to_getmolecule=False):
"""
Parse a response into an `~astropy.table.Table`
The catalog data files are composed of 80-character card images, with
one card image per spectral line. The format of each card image is:
FREQ, ERR, LGINT, DR, ELO, GUP, TAG, QNFMT, QN', QN"
(F13.4,F8.4, F8.4, I2,F10.4, I3, I7, I4, 6I2, 6I2)
FREQ: Frequency of the line in MHz.
ERR: Estimated or experimental error of FREQ in MHz.
LGINT: Base 10 logarithm of the integrated intensity in units of nm^2 MHz at
300 K.
DR: Degrees of freedom in the rotational partition function (0 for atoms,
2 for linear molecules, and 3 for nonlinear molecules).
ELO: Lower state energy in cm^{-1} relative to the ground state.
GUP: Upper state degeneracy.
TAG: Species tag or molecular identifier.
A negative value flags that the line frequency has
been measured in the laboratory. The absolute value of TAG is then the
species tag and ERR is the reported experimental error. The three most
significant digits of the species tag are coded as the mass number of
the species.
QNFMT: Identifies the format of the quantum numbers
QN': Quantum numbers for the upper state.
QN": Quantum numbers for the lower state.
"""
if 'Zero lines were found' in response.text:
if fallback_to_getmolecule:
self.lookup_ids = build_lookup()
payload = parse_qs(response.request.body)
tbs = [self.get_molecule(mol) for mol in payload['Mol']]
if len(tbs) > 1:
mols = []
for tb, mol in zip(tbs, payload['Mol']):
tb['Name'] = self.lookup_ids.find(mol, flags=0)
for key in list(tb.meta.keys()):
tb.meta[f'{mol}_{key}'] = tb.meta.pop(key)
mols.append(mol)
tb = table.vstack(tbs)
tb.meta['molecule_list'] = mols
else:
tb = tbs[0]
tb.meta['molecule_id'] = payload['Mol'][0]
tb.meta['molecule_name'] = self.lookup_ids.find(payload['Mol'][0], flags=0)
return tb
else:
raise EmptyResponseError(f"Response was empty; message was '{response.text}'.")
# data starts at 0 since regex was applied
# Warning for a result with more than 1000 lines:
# THIS form is currently limited to 1000 lines.
result = ascii.read(response.text, header_start=None, data_start=0,
comment=r'THIS|^\s{12,14}\d{4,6}.*|CADDIR CATDIR',
names=('FREQ', 'ERR', 'LGINT', 'DR', 'ELO', 'GUP',
'TAG', 'QNFMT', 'QN\'', 'QN"'),
col_starts=(0, 13, 21, 29, 31, 41, 44, 51, 55, 67),
format='fixed_width', fast_reader=False)
if len(result) > self.maxlines:
warnings.warn("This form is currently limited to {0} lines."
"Please limit your search.".format(self.maxlines))
result['FREQ'].unit = u.MHz
result['ERR'].unit = u.MHz
result['LGINT'].unit = u.nm**2 * u.MHz
result['ELO'].unit = u.cm**(-1)
return result
[docs]
def get_species_table(self, *, catfile='catdir.cat'):
"""
A directory of the catalog is found in a file called 'catdir.cat.'
Each element of this directory is an 80-character record with the
following format:
| TAG, NAME, NLINE, QLOG, VER
| (I6,X, A13, I6, 7F7.4, I2)
Parameters
----------
catfile : str, name of file, default 'catdir.cat'
The catalog file, installed locally along with the package
Returns
-------
Table: `~astropy.table.Table`
| TAG : The species tag or molecular identifier.
| NAME : An ASCII name for the species.
| NLINE : The number of lines in the catalog.
| QLOG : A seven-element vector containing the base 10 logarithm of
the partition function for temperatures of 300 K, 225 K, 150 K,
75 K, 37.5 K, 18.75 K, and 9.375 K, respectively.
| VER : The version of the calculation for this species in the catalog.
The version number is followed by * if the entry is newer than the
last edition of the catalog.
"""
result = ascii.read(data_path(catfile), header_start=None, data_start=0,
names=('TAG', 'NAME', 'NLINE', 'QLOG1', 'QLOG2',
'QLOG3', 'QLOG4', 'QLOG5', 'QLOG6',
'QLOG7', 'VER'),
col_starts=(0, 6, 20, 26, 33, 40, 47, 54, 61,
68, 75),
format='fixed_width', fast_reader=False)
# store the corresponding temperatures as metadata
result['QLOG1'].meta = {'Temperature (K)': 300}
result['QLOG2'].meta = {'Temperature (K)': 225}
result['QLOG3'].meta = {'Temperature (K)': 150}
result['QLOG4'].meta = {'Temperature (K)': 75}
result['QLOG5'].meta = {'Temperature (K)': 37.5}
result['QLOG6'].meta = {'Temperature (K)': 18.75}
result['QLOG7'].meta = {'Temperature (K)': 9.375}
result.meta = {'Temperature (K)': [300, 225, 150, 75, 37.5, 18.5,
9.375]}
return result
[docs]
def get_molecule(self, molecule_id, *, cache=True):
"""
Retrieve the whole molecule table for a given molecule id from the JPL catalog.
Parameters
----------
molecule_id : int or str
The molecule tag/identifier. Can be an integer (e.g., 18003 for H2O)
or a zero-padded 6-character string (e.g., '018003').
cache : bool
Defaults to True. If set overrides global caching behavior.
Returns
-------
Table : `~astropy.table.Table`
Table containing all spectral lines for the requested molecule.
Examples
--------
>>> table = JPLSpec.get_molecule(18003) # doctest: +SKIP
>>> print(table) # doctest: +SKIP
"""
molecule_str = parse_molid(molecule_id)
# Construct the URL to the catalog file
url = f'https://spec.jpl.nasa.gov/ftp/pub/catalog/c{molecule_str}.cat'
# Request the catalog file
response = self._request(method='GET', url=url,
timeout=self.TIMEOUT, cache=cache)
response.raise_for_status()
if 'The requested URL was not found on this server.' in response.text:
raise EmptyResponseError(f"No data found for molecule ID {molecule_id}.")
# Parse the catalog file
result = self._parse_cat(response)
# Add metadata from species table
species_table = self.get_species_table()
# Find the row matching this molecule_id
int_molecule_id = int(molecule_str)
matching_rows = species_table[species_table['TAG'] == int_molecule_id]
if len(matching_rows) > 0:
# Add metadata as a dictionary
result.meta = dict(zip(matching_rows.colnames, matching_rows[0]))
return result
def _parse_cat(self, response, *, verbose=False):
"""
Parse a JPL-format catalog file into an `~astropy.table.Table`.
The catalog data files are composed of 80-character card images, with
one card image per spectral line. The format of each card image is:
FREQ, ERR, LGINT, DR, ELO, GUP, TAG, QNFMT, QN', QN"
(F13.4,F8.4, F8.4, I2,F10.4, I3, I7, I4, 6I2, 6I2)
https://spec.jpl.nasa.gov/ftp/pub/catalog/doc/catintro.pdf
Parameters
----------
text : str
The catalog file text content.
verbose : bool, optional
Not used currently.
Returns
-------
Table : `~astropy.table.Table`
Parsed catalog data.
"""
text = response.text
if 'Zero lines were found' in text or len(text.strip()) == 0:
raise EmptyResponseError(f"Response was empty; message was '{text}'.")
# Parse the catalog file with fixed-width format
# Format: FREQ(13.4), ERR(8.4), LGINT(8.4), DR(2), ELO(10.4), GUP(3), TAG(7), QNFMT(4), QN'(12), QN"(12)
result = ascii.read(text, header_start=None, data_start=0,
comment=r'THIS|^\s{12,14}\d{4,6}.*',
names=('FREQ', 'ERR', 'LGINT', 'DR', 'ELO', 'GUP',
'TAG', 'QNFMT', 'QN\'', 'QN"'),
col_starts=(0, 13, 21, 29, 31, 41, 44, 51, 55, 67),
format='fixed_width', fast_reader=False)
# Ensure TAG is integer type
result['TAG'] = result['TAG'].astype(int)
# Add units
result['FREQ'].unit = u.MHz
result['ERR'].unit = u.MHz
result['LGINT'].unit = u.nm**2 * u.MHz
result['ELO'].unit = u.cm**(-1)
# split table by qnfmt; each chunk must be separately parsed.
qnfmts = np.unique(result['QNFMT'])
tables = [result[result['QNFMT'] == qq] for qq in qnfmts]
# some tables have +/-/blank entries in QNs
# pm_is_ok should be True when the QN columns contain '+' or '-'.
# (can't do a str check on np.integer dtype so have to filter that out first)
pm_is_ok = ((not np.issubdtype(result["QN'"].dtype, np.integer))
and any(('+' in str(line) or '-' in str(line)) for line in result["QN'"]))
def int_or_pm(st):
try:
return int(st)
except ValueError:
try:
return parse_letternumber(st)
except ValueError:
if pm_is_ok and (st.strip() == '' or st.strip() == '+' or st.strip() == '-'):
return st.strip()
else:
raise ValueError(f'"{st}" is not a valid +/-/blank entry')
# At least this molecule, NH, claims 5 QNs but has only 4
bad_qnfmt_dict = {
15001: 1234,
}
mol_tag = result['TAG'][0]
if mol_tag in (32001,):
raise NotImplementedError("Molecule O2 (32001) does not follow the format standard.")
for tbl in tables:
if mol_tag in bad_qnfmt_dict:
n_qns = bad_qnfmt_dict[mol_tag] % 10
else:
n_qns = tbl['QNFMT'][0] % 10
if n_qns > 1:
qnlen = 2 * n_qns
for ii in range(n_qns):
if tbl["QN'"].dtype in (int, np.int32, np.int64):
# for the case where it was already parsed as int
# (53005 is an example)
tbl[f"QN'{ii+1}"] = tbl["QN'"]
tbl[f'QN"{ii+1}'] = tbl['QN"']
else:
# string parsing can truncate to length=2n or 2n-1 depending
# on whether there are any two-digit QNs in the column
ind1 = ii * 2
ind2 = ii * 2 + 2
# rjust(qnlen) is needed to enforce that all strings retain their exact original shape
qnp = [int_or_pm(line.rjust(qnlen)[ind1: ind2].strip()) for line in tbl['QN\'']]
qnpp = [int_or_pm(line.rjust(qnlen)[ind1: ind2].strip()) for line in tbl['QN"']]
dtype = str if any('+' in str(x) for x in qnp) else int
tbl[f"QN'{ii+1}"] = np.array(qnp, dtype=dtype)
tbl[f'QN"{ii+1}'] = np.array(qnpp, dtype=dtype)
del tbl['QN\'']
del tbl['QN"']
else:
tbl['QN\''] = np.array(list(map(parse_letternumber, tbl['QN\''])), dtype=int)
tbl['QN"'] = np.array(list(map(parse_letternumber, tbl['QN"'])), dtype=int)
result = table.vstack(tables)
# Add laboratory measurement flag
# A negative TAG value indicates laboratory-measured frequency
result['Lab'] = result['TAG'] < 0
# Convert TAG to absolute value
result['TAG'] = abs(result['TAG'])
return result
JPLSpec = JPLSpecClass()
def build_lookup():
result = JPLSpec.get_species_table()
keys = list(result['NAME'])
values = list(result['TAG'])
dictionary = dict(zip(keys, values))
lookuptable = lookup_table.Lookuptable(dictionary) # apply the class above
return lookuptable