# Licensed under a 3-clause BSD style license - see LICENSE.rst
"""
MAST Missions
=================
This module contains methods for searching MAST missions.
"""
import difflib
import warnings
from json import JSONDecodeError
from pathlib import Path
from urllib.parse import quote
import astropy.units as u
import astropy.coordinates as coord
import numpy as np
from astropy.table import Table, Row, Column, vstack
from requests import HTTPError, RequestException
from astroquery import log
from astroquery.utils import commons, async_to_sync
from astroquery.utils.class_or_instance import class_or_instance
from astroquery.exceptions import InvalidQueryError, MaxResultsWarning, NoResultsWarning
from astroquery.mast import utils
from astroquery.mast.core import MastQueryWithLogin
from . import conf
__all__ = ['MastMissionsClass', 'MastMissions']
[docs]
@async_to_sync
class MastMissionsClass(MastQueryWithLogin):
"""
MastMissions search class.
Class that allows direct programmatic access to retrieve metadata via the MAST search API for a given mission.
"""
# Static class variables
_search = 'search'
_list_products = 'post_list_products'
# Workaround so that observation_id is returned in ULLYSES queries that do not specify columns
_default_ullyses_cols = ['target_name_ulysses', 'target_classification', 'targ_ra', 'targ_dec', 'host_galaxy_name',
'spectral_type', 'bmv0_mag', 'u_mag', 'b_mag', 'v_mag', 'gaia_g_mean_mag', 'star_mass',
'instrument', 'grating', 'filter', 'observation_id']
# maximum supported query radius
_max_query_radius = 30 * u.arcmin
def __init__(self, *, mission='hst', mast_token=None):
super().__init__(mast_token=mast_token)
self.dataset_kwds = { # column keywords corresponding to dataset ID
'hst': 'sci_data_set_name',
'jwst': 'fileSetName',
'roman': 'fileSetName',
'classy': 'Target',
'ullyses': 'observation_id'
}
# Service attributes
self.service = self._search # current API service
self.service_dict = {self._search: {'path': self._search},
self._list_products: {'path': self._list_products}}
# Search attributes
self._search_option_fields = ['limit', 'offset', 'sort_by', 'search_key', 'sort_desc', 'select_cols',
'skip_count', 'user_fields']
self.mission = mission # current mission
self.limit = 5000 # maximum number of results
self.columns = dict() # columns configuration for each mission
@property
def mission(self):
return self._mission
@mission.setter
def mission(self, value):
# Setter that updates the service parameters if the mission is changed
self._mission = value.lower() # case-insensitive
self._service_api_connection.set_service_params(self.service_dict, f'search/{self.mission}')
def _extract_products(self, response):
"""
Extract products from the response of a `~requests.Response` object.
Parameters
----------
response : `~requests.Response`
The response object containing the products data.
Returns
-------
list
A list of products extracted from the response.
"""
combined = []
for resp in response:
products = resp.json().get('products', [])
# Flatten if nested
if products and isinstance(products[0], list):
products = products[0]
combined.extend(products)
return combined
def _parse_result(self, response, *, verbose=False): # Used by the async_to_sync decorator functionality
"""
Parse the results of a `~requests.Response` objects and return an `~astropy.table.Table` of results.
Parameters
----------
response : `~requests.Response`
`~requests.Response` objects.
verbose : bool
(presently does nothing - there is no output with verbose set to
True or False)
Default False. Setting to True provides more extensive output.
Returns
-------
response : `~astropy.table.Table`
"""
if self.service == self._search:
results = self._service_api_connection._parse_result(response, verbose, data_key='results')
# Warn if maximum results are returned
if len(results) >= self.limit:
warnings.warn("Maximum results returned, may not include all sources within radius.",
MaxResultsWarning)
return results
elif self.service == self._list_products:
products = self._extract_products(response)
return Table(products)
def _validate_criteria(self, **criteria):
"""
Check that criteria keyword arguments are valid column names for the mission.
Raises InvalidQueryError if a criteria argument is invalid.
Parameters
----------
**criteria
Keyword arguments representing criteria filters to apply.
Raises
-------
InvalidQueryError
If a keyword does not match any valid column names, an error is raised that suggests the closest
matching column name, if available.
"""
# Ensure that self.columns is populated
self.get_column_list()
# Check each criteria argument for validity
valid_cols = list(self.columns[self.mission]['name']) + self._search_option_fields
for kwd in criteria.keys():
col = next((name for name in valid_cols if name == kwd), None)
if not col:
closest_match = difflib.get_close_matches(kwd, valid_cols, n=1)
error_msg = (
f"Filter '{kwd}' does not exist. Did you mean '{closest_match[0]}'?"
if closest_match
else f"Filter '{kwd}' does not exist."
)
raise InvalidQueryError(error_msg)
def _build_params_from_criteria(self, params, **criteria):
"""
Build the parameters for the API request based on the provided criteria.
Parameters
----------
params : dict
Dictionary to store the parameters for the API request.
**criteria
Keyword arguments representing criteria filters to apply.
"""
# Add each criterion to the params dictionary
params['conditions'] = []
for prop, value in criteria.items():
if prop not in self._search_option_fields:
if isinstance(value, list):
# Convert to comma-separated string if passed as a list
value = ','.join(str(item) for item in value)
params['conditions'].append({prop: value})
else:
if prop == 'sort_by' and isinstance(value, str):
# Convert to list if passed as a string
value = [value]
if prop == 'sort_desc' and isinstance(value, bool):
# Convert to list if passed as a boolean
value = [value]
params[prop] = value
[docs]
@class_or_instance
def query_region_async(self, coordinates, *, radius=3*u.arcmin, limit=5000, offset=0,
select_cols=None, **criteria):
"""
Given a sky position and radius, returns a list of matching dataset IDs.
Parameters
----------
coordinates : str or `~astropy.coordinates` object
The target around which to search. It may be specified as a
string or as the appropriate `~astropy.coordinates` object.
radius : str or `~astropy.units.Quantity` object
Default is 3 arcminutes. The radius around the coordinates to search within.
The string must be parsable by `~astropy.coordinates.Angle`. The
appropriate `~astropy.units.Quantity` object from `~astropy.units` may also be used.
The maximum supported query radius is 30 arcminutes.
limit : int
Default is 5000. The maximum number of dataset IDs in the results.
offset : int
Default is 0. The number of records you wish to skip before selecting records.
select_cols: list, optional
Default is None. Names of columns that will be included in the result table.
If None, a default set of columns will be returned.
**criteria
Other mission-specific criteria arguments.
All valid filters can be found using `~astroquery.mast.missions.MastMissionsClass.get_column_list`
function.
For example, one can specify the output columns(select_cols) or use other filters(conditions).
To filter by multiple values for a single column, pass in a list of values or
a comma-separated string of values.
Returns
-------
response : list of `~requests.Response`
Raises
------
InvalidQueryError
If the query radius is larger than the limit (30 arcminutes).
"""
self.limit = limit
self.service = self._search
# Check that criteria arguments are valid
self._validate_criteria(**criteria)
# Put coordinates and radius into consistent format
coordinates = commons.parse_coordinates(coordinates, return_frame='icrs')
# If radius is just a number, assume arcminutes
radius = coord.Angle(radius, u.arcmin)
if radius > self._max_query_radius:
raise InvalidQueryError(
f"Query radius too large. Must be ≤{self._max_query_radius}, got {radius}."
)
# Dataset ID column should always be returned
if select_cols:
select_cols.append(self.dataset_kwds.get(self.mission, None))
elif self.mission == 'ullyses':
select_cols = self._default_ullyses_cols
# Basic params
params = {'target': [f"{coordinates.ra.deg} {coordinates.dec.deg}"],
'radius': radius.arcsec,
'radius_units': 'arcseconds',
'limit': limit,
'offset': offset,
'select_cols': select_cols}
self._build_params_from_criteria(params, **criteria)
return self._service_api_connection.missions_request_async(self.service, params)
[docs]
@class_or_instance
def query_criteria_async(self, *, coordinates=None, objectname=None, radius=3*u.arcmin,
limit=5000, offset=0, select_cols=None, resolver=None, **criteria):
"""
Given a set of search criteria, returns a list of mission metadata.
Parameters
----------
coordinates : str or `~astropy.coordinates` object
The target around which to search. It may be specified as a
string or as the appropriate `~astropy.coordinates` object.
objectname : str
The name of the target around which to search.
radius : str or `~astropy.units.Quantity` object
Default is 3 arcminutes. The radius around the coordinates to search within.
The string must be parsable by `~astropy.coordinates.Angle`. The
appropriate `~astropy.units.Quantity` object from `~astropy.units` may also be used.
The maximum supported query radius is 30 arcminutes.
limit : int
Default is 5000. The maximum number of dataset IDs in the results.
offset : int
Default is 0. The number of records you wish to skip before selecting records.
select_cols: list, optional
Default is None. Names of columns that will be included in the result table.
If None, a default set of columns will be returned.
resolver : str, optional
Default is None. The resolver to use when resolving a named target into coordinates. Valid options are
"SIMBAD" and "NED". If not specified, the default resolver order will be used. Please see the
`STScI Archive Name Translation Application (SANTA) <https://mastresolver.stsci.edu/Santa-war/>`__
for more information. Default is None.
**criteria
Criteria to apply. At least one non-positional criterion must be supplied.
Valid criteria are coordinates, objectname, radius (as in
`~astroquery.mast.missions.MastMissionsClass.query_region` and
`~astroquery.mast.missions.MastMissionsClass.query_object` functions),
and all fields listed in the column documentation for the mission being queried.
List of all valid fields that can be used to match results on criteria can be retrieved by calling
`~astroquery.mast.missions.MastMissionsClass.get_column_list` function.
To filter by multiple values for a single column, pass in a list of values or
a comma-separated string of values.
Returns
-------
response : list of `~requests.Response`
Raises
------
InvalidQueryError
If the query radius is larger than the limit (30 arcminutes).
"""
self.limit = limit
self.service = self._search
# Check that criteria arguments are valid
self._validate_criteria(**criteria)
# Parse user input location
if objectname or coordinates:
coordinates = utils.parse_input_location(coordinates=coordinates,
objectname=objectname,
resolver=resolver)
# if radius is just a number we assume degrees
radius = coord.Angle(radius, u.arcmin)
if radius > self._max_query_radius:
raise InvalidQueryError(
f"Query radius too large. Must be ≤{self._max_query_radius}, got {radius}."
)
# Dataset ID column should always be returned
if select_cols:
select_cols.append(self.dataset_kwds.get(self.mission, None))
elif self.mission == 'ullyses':
select_cols = self._default_ullyses_cols
# build query
params = {"limit": self.limit, "offset": offset, 'select_cols': select_cols}
if coordinates:
params["target"] = [f"{coordinates.ra.deg} {coordinates.dec.deg}"]
params["radius"] = radius.arcsec
params["radius_units"] = 'arcseconds'
if not self._service_api_connection.check_catalogs_criteria_params(criteria):
raise InvalidQueryError("At least one non-positional criterion must be supplied.")
self._build_params_from_criteria(params, **criteria)
return self._service_api_connection.missions_request_async(self.service, params)
[docs]
@class_or_instance
def query_object_async(self, objectname, *, radius=3*u.arcmin, limit=5000, offset=0,
select_cols=None, resolver=None, **criteria):
"""
Given an object name, returns a list of matching rows.
Parameters
----------
objectname : str
The name of the target around which to search.
radius : str or `~astropy.units.Quantity` object, optional
Default is 3 arcminutes. The radius around the coordinates to search within.
The string must be parsable by `~astropy.coordinates.Angle`. The
appropriate `~astropy.units.Quantity` object from `~astropy.units` may also be used.
limit : int
Default is 5000. The maximum number of dataset IDs in the results.
offset : int
Default is 0. The number of records you wish to skip before selecting records.
select_cols: list, optional
Default is None. Names of columns that will be included in the result table.
If None, a default set of columns will be returned.
resolver : str, optional
Default is None. The resolver to use when resolving a named target into coordinates. Valid options are
"SIMBAD" and "NED". If not specified, the default resolver order will be used. Please see the
`STScI Archive Name Translation Application (SANTA) <https://mastresolver.stsci.edu/Santa-war/>`__
for more information. Default is None.
**criteria
Other mission-specific criteria arguments.
All valid filters can be found using `~astroquery.mast.missions.MastMissionsClass.get_column_list`
function.
For example, one can specify the output columns(select_cols) or use other filters(conditions).
To filter by multiple values for a single column, pass in a list of values or
a comma-separated string of values.
Returns
-------
response : list of `~requests.Response`
"""
coordinates = utils.resolve_object(objectname, resolver=resolver)
return self.query_region_async(coordinates, radius=radius, limit=limit, offset=offset,
select_cols=select_cols, **criteria)
[docs]
@class_or_instance
def get_product_list_async(self, datasets, *, batch_size=1000):
"""
Given a dataset ID or list of dataset IDs, returns a list of associated data products.
To return unique data products, use ``MastMissions.get_unique_product_list``.
Parameters
----------
datasets : str, list, `~astropy.table.Row`, `~astropy.table.Column`, `~astropy.table.Table`
Row/Table of MastMissions query results (e.g. output from `query_object`)
or single/list of dataset ID(s).
batch_size : int, optional
Default 1000. Number of dataset IDs to include in each batch request to the server.
If you experience timeouts or connection errors, consider lowering this value.
Returns
-------
response : list of `~requests.Response`
"""
self.service = self._list_products
if isinstance(datasets, Table) or isinstance(datasets, Row):
dataset_kwd = self.get_dataset_kwd()
if not dataset_kwd:
raise InvalidQueryError(f'Dataset keyword not found for mission "{self.mission}". Please input '
'dataset IDs as a string, list of strings, or `~astropy.table.Column`.')
# Extract dataset IDs based on input type and mission
if isinstance(datasets, Table):
datasets = datasets[dataset_kwd].tolist()
elif isinstance(datasets, Row):
datasets = [datasets[dataset_kwd]]
elif isinstance(datasets, Column):
datasets = datasets.tolist()
elif isinstance(datasets, str):
datasets = [datasets]
elif not isinstance(datasets, list):
raise TypeError('Unsupported data type for `datasets`. Expected string, '
'list of strings, Astropy Row, Astropy Column, or Astropy Table.')
# Filter out empty strings from IDs
datasets = [item.strip() for item in datasets if item and item.strip()]
if not datasets:
raise InvalidQueryError("Dataset list is empty, no associated products.")
# Filter out duplicates
datasets = list(set(datasets))
results = utils._batched_request(
datasets,
params={},
max_batch=batch_size,
param_key="dataset_ids",
request_func=lambda p: self._service_api_connection.missions_request_async(self.service, p),
extract_func=lambda r: [r], # missions_request_async already returns one result
desc=f"Fetching products for {len(datasets)} unique datasets"
)
# Return a list of responses
return results
[docs]
def get_unique_product_list(self, datasets, *, batch_size=1000):
"""
Given a dataset ID or list of dataset IDs, returns a list of associated data products with unique
filenames.
Parameters
----------
datasets : str, list, `~astropy.table.Row`, `~astropy.table.Column`, `~astropy.table.Table`
Row/Table of MastMissions query results (e.g. output from `query_object`)
or single/list of dataset ID(s).
batch_size : int, optional
Default 1000. Number of dataset IDs to include in each batch request to the server.
If you experience timeouts or connection errors, consider lowering this value.
Returns
-------
unique_products : `~astropy.table.Table`
Table containing products with unique URIs.
"""
products = self.get_product_list(datasets, batch_size=batch_size)
unique_products = utils.remove_duplicate_products(products, 'filename')
if len(unique_products) < len(products):
log.info("To return all products, use `MastMissions.get_product_list`")
return unique_products
[docs]
def filter_products(self, products, *, extension=None, **filters):
"""
Filters an `~astropy.table.Table` of mission data products based on given filters.
Parameters
----------
products : `~astropy.table.Table`
Table containing data products to be filtered.
extension : string or array, optional
Default is None. Filters by file extension(s), matching any specified extensions.
**filters :
Column-based filters to apply to the products table.
Each keyword corresponds to a column name in the table, with the argument being one or more
acceptable values for that column. AND logic is applied between filters.
Within each column's filter set:
- Positive (non-negated) values are combined with OR logic.
- Any negated values (prefixed with "!") are combined with AND logic against the ORed positives.
This results in: (NOT any_negatives) AND (any_positives)
Examples:
``file_suffix=['A', 'B', '!C']`` → (file_suffix != C) AND (file_suffix == A OR file_suffix == B)
``size=['!14400', '<20000']`` → (size != 14400) AND (size < 20000)
For columns with numeric data types (int or float), filter values can be expressed
in several ways:
- A single number: ``size=100``
- A range in the form "start..end": ``size="100..1000"``
- A comparison operator followed by a number: ``size=">=1000"``
- A list of expressions (OR logic): ``size=[100, "500..1000", ">=1500"]``
Returns
-------
response : `~astropy.table.Table`
Filtered Table of data products.
"""
# Start with a mask of True for all entries
filter_mask = np.full(len(products), True, dtype=bool)
# Filter by file extension, if provided
if extension:
ext_mask = utils.apply_extension_filter(products, extension, 'filename')
filter_mask &= ext_mask
# Apply column-based filters
col_mask = utils.apply_column_filters(products, filters)
filter_mask &= col_mask
return products[filter_mask]
[docs]
def download_file(self, uri, *, local_path=None, cache=True, verbose=True):
"""
Downloads a single file based on the data URI.
Parameters
----------
uri : str
The product dataURI
local_path : str
Directory or filename to which the file will be downloaded. Defaults to current working directory.
cache : bool
Default is True. If file is found on disk, it will not be downloaded again.
verbose : bool, optional
Default is True. Whether to show download progress in the console.
Returns
-------
status: str
Download status message. Either COMPLETE, SKIPPED, or ERROR.
msg : str
An error status message, if any.
url : str
The full URL download path.
"""
# Construct the full data URL based on mission
if self.mission in ['hst', 'jwst', 'roman']:
# HST, JWST, and RST have a dedicated endpoint for retrieving products
base_url = self._service_api_connection.MISSIONS_DOWNLOAD_URL + self.mission + '/api/v0.1/retrieve_product'
keyword = 'product_name'
else:
# HLSPs use MAST download URL
base_url = self._service_api_connection.MAST_DOWNLOAD_URL
keyword = 'uri'
data_url = base_url + f'?{keyword}=' + uri
escaped_url = base_url + f'?{keyword}=' + quote(uri, safe='')
# Determine local file path. Use current directory as default.
filename = Path(uri).name
local_path = Path(local_path or filename)
if not local_path.suffix: # Append filename if local path is directory
local_path = local_path / filename
local_path.parent.mkdir(parents=True, exist_ok=True)
status = 'COMPLETE'
msg = None
url = None
try:
# Attempt file download
self._download_file(escaped_url, local_path, cache=cache, verbose=verbose)
# Check if file exists
if not local_path.is_file() and status != 'SKIPPED':
status = 'ERROR'
msg = 'File was not downloaded'
url = data_url
except HTTPError as err:
if err.response.status_code == 401:
no_auth_msg = f'You are not authorized to download from {data_url}.'
if self._authenticated:
no_auth_msg += ('\nYou do not have access to download this data, or your authentication '
'token may be expired. You can generate a new token at '
'https://auth.mast.stsci.edu/token?suggested_name=Astroquery&'
'suggested_scope=mast:exclusive_access')
else:
no_auth_msg += ('\nPlease authenticate yourself using the `~astroquery.mast.MastMissions.login` '
'function or initialize `~astroquery.mast.MastMissions` with an authentication '
'token.')
log.warning(no_auth_msg)
status = 'ERROR'
msg = f'HTTPError: {err}'
url = data_url
return status, msg, url
def _download_files(self, products, base_dir, *, flat=False, cache=True, verbose=True):
"""
Downloads files listed in an `~astropy.table.Table` of data products to a specified directory.
Parameters
----------
products : `~astropy.table.Table`
Table containing products to be downloaded.
base_dir : str
Directory in which files will be downloaded.
flat : bool
Default is False. If True, all files are downloaded directly to `base_dir`, and no subdirectories
will be created.
cache : bool
Default is True. If file is found on disk, it will not be downloaded again.
verbose : bool, optional
Default is True. Whether to show download progress in the console.
Returns
-------
response : `~astropy.table.Table`
Table containing download results for each data product file.
"""
manifest_entries = []
base_dir = Path(base_dir)
for data_product in products:
# Determine local path for each file
local_path = base_dir / data_product['dataset'] if not flat else base_dir
local_path.mkdir(parents=True, exist_ok=True)
local_file_path = local_path / Path(data_product['filename']).name
# Download files and record status
status, msg, url = self.download_file(data_product['uri'],
local_path=local_file_path,
cache=cache,
verbose=verbose)
manifest_entries.append([local_file_path, status, msg, url])
# Return manifest as Astropy Table
manifest = Table(rows=manifest_entries, names=('Local Path', 'Status', 'Message', 'URL'))
return manifest
[docs]
def download_products(self, products, *, download_dir=None, flat=False,
cache=True, extension=None, verbose=True, **filters):
"""
Download specified data products.
Parameters
----------
products : str, list, `~astropy.table.Table`
Either a single or list of dataset IDs (e.g., as input for `get_product_list`),
or a Table of products (e.g., as output from `get_product_list`)
download_dir : str or Path, optional
Directory for file downloads. Defaults to current directory.
flat : bool, optional
Default is False. If False, puts files into the standard
directory structure of "mastDownload/<mission>/<dataset ID>/".
If True, places files directly in ``download_dir`` without subdirectories.
cache : bool, optional
Default is True. If file is found on disc, it will not be downloaded again.
extension : string or list, optional
Default is None. Filter by file extension.
verbose : bool, optional
Default is True. Whether to show download progress in the console.
**filters :
Column-based filters to be applied.
Each keyword corresponds to a column name in the table, with the argument being one or more
acceptable values for that column. AND logic is applied between filters, OR logic within
each filter set.
For example: type="science", extension=["fits","jpg"]
Returns
-------
manifest : `~astropy.table.Table`
A table manifest showing downloaded file locations and statuses.
"""
# Ensure `products` is a Table, collecting products if necessary
if isinstance(products, (str, list)):
products = [products] if isinstance(products, str) else products
products = vstack([self.get_product_list(oid) for oid in products])
elif isinstance(products, Row):
products = Table(products, masked=True)
# Apply filters
products = self.filter_products(products, extension=extension, **filters)
# Remove duplicates
products = utils.remove_duplicate_products(products, 'filename')
if not len(products):
warnings.warn("No products to download.", NoResultsWarning)
return
# Set up base directory for downloads
download_dir = Path(download_dir or '.')
base_dir = download_dir if flat else download_dir / 'mastDownload' / self.mission
# Download files
manifest = self._download_files(products,
base_dir=base_dir,
flat=flat,
cache=cache,
verbose=verbose)
return manifest
[docs]
@class_or_instance
def get_column_list(self):
"""
For a mission, return a list of all searchable columns and their descriptions
Returns
-------
response : `~astropy.table.Table` that contains columns names, types, and descriptions
"""
if not self.columns.get(self.mission):
try:
# Send server request to get column list for current mission
params = {'mission': self.mission}
resp = utils._simple_request(f'{conf.server}/search/util/api/v0.1/column_list', params)
# Parse JSON and extract necessary info
results = resp.json()
rows = [
(result['column_name'], result['qual_type'], result['description'])
for result in results
]
# Create Table with parsed data
col_table = Table(rows=rows, names=('name', 'data_type', 'description'))
self.columns[self.mission] = col_table
except JSONDecodeError as ex:
raise JSONDecodeError(f'Failed to decode JSON response while attempting to get column list'
f' for mission {self.mission}: {ex}')
except RequestException as ex:
raise ConnectionError(f'Failed to connect to the server while attempting to get column list'
f' for mission {self.mission}: {ex}')
except KeyError as ex:
raise KeyError(f'Expected key not found in response data while attempting to get column list'
f' for mission {self.mission}: {ex}')
except Exception as ex:
raise RuntimeError(f'An unexpected error occurred while attempting to get column list'
f' for mission {self.mission}: {ex}')
return self.columns[self.mission]
[docs]
def get_dataset_kwd(self):
"""
Return the Dataset ID keyword for the selected mission. If the keyword is unknown, returns None.
Returns
-------
keyword : str or None
Dataset ID keyword or None if unknown.
"""
if self.mission not in self.dataset_kwds:
log.warning('The mission "%s" does not have a known dataset ID keyword.', self.mission)
return None
return self.dataset_kwds[self.mission]
MastMissions = MastMissionsClass()