Source code for astroquery.mast.observations

# Licensed under a 3-clause BSD style license - see LICENSE.rst
"""
MAST Observations
=================

This module contains various methods for querying MAST observations.
"""

from pathlib import Path
import warnings
import time
import os
from urllib.parse import quote

import numpy as np

from requests import HTTPError

import astropy.units as u
import astropy.coordinates as coord

from astropy.table import Table, Row, vstack
from astroquery import log
from astroquery.mast.cloud import CloudAccess
from astroquery.utils import commons

from ..utils import async_to_sync
from ..utils.class_or_instance import class_or_instance
from ..exceptions import (InvalidQueryError, RemoteServiceError,
                          NoResultsWarning, InputWarning)

from . import utils
from .core import MastQueryWithLogin

__all__ = ['Observations', 'ObservationsClass',
           'MastClass', 'Mast']


[docs] @async_to_sync class ObservationsClass(MastQueryWithLogin): """ MAST Observations query class. Class for querying MAST observational data. """ # Calling static class variables _caom_all = 'Mast.Caom.All' _caom_cone = 'Mast.Caom.Cone' _caom_filtered_position = 'Mast.Caom.Filtered.Position' _caom_filtered = 'Mast.Caom.Filtered' _caom_products = 'Mast.Caom.Products' def _parse_result(self, responses, *, verbose=False): # Used by the async_to_sync decorator functionality """ Parse the results of a list of `~requests.Response` objects and returns an `~astropy.table.Table` of results. Parameters ---------- responses : list of `~requests.Response` List of `~requests.Response` objects. verbose : bool (presently does nothing - there is no output with verbose set to True or False) Default False. Setting to True provides more extensive output. Returns ------- response : `~astropy.table.Table` """ return self._portal_api_connection._parse_result(responses, verbose)
[docs] def list_missions(self): """ Lists data missions archived by MAST and available through `astroquery.mast`. Returns ------- response : list List of available missions. """ # getting all the histogram information service = self._caom_all params = {} response = self._portal_api_connection.service_request_async(service, params, format='extjs') json_response = response[0].json() # getting the list of missions hist_data = json_response['data']['Tables'][0]['Columns'] for facet in hist_data: if facet['text'] == "obs_collection": mission_info = facet['ExtendedProperties']['histObj'] missions = sorted(mission_info) missions.remove('hist') return missions
[docs] def get_metadata(self, query_type): """ Returns metadata about the requested query type. Parameters ---------- query_type : str The query to get metadata for. Options are observations, and products. Returns ------- response : `~astropy.table.Table` The metadata table. """ if query_type.lower() == "observations": colconf_name = self._caom_cone elif query_type.lower() == "products": colconf_name = self._caom_products else: raise InvalidQueryError("Unknown query type.") return self._portal_api_connection._get_columnsconfig_metadata(colconf_name)
def _parse_caom_criteria(self, *, resolver=None, **criteria): """ Helper function that takes dictionary of criteria and parses them into position (none if there are no coordinates/object name) and a filter set. Parameters ---------- resolver : str, optional The resolver to use when resolving a named target into coordinates. Valid options are "SIMBAD" and "NED". If not specified, the default resolver order will be used. Please see the `STScI Archive Name Translation Application (SANTA) <https://mastresolver.stsci.edu/Santa-war/>`__ for more information. Default is None. **criteria Criteria to apply. Valid criteria are coordinates, objectname, radius (as in `query_region` and `query_object`), and all observation fields returned by the ``get_metadata("observations")``. The Column Name is the keyword, with the argument being one or more acceptable values for that parameter, except for fields with a float datatype where the argument should be in the form [minVal, maxVal]. For non-float type criteria wildcards maybe used (both * and % are considered wildcards), however only one wildcarded value can be processed per criterion. RA and Dec must be given in decimal degrees, and datetimes in MJD. For example: filters=["FUV","NUV"],proposal_pi="Ost*",t_max=[52264.4586,54452.8914] Returns ------- response : tuple Tuple of the form (position, filter_set), where position is either None (coordinates and objectname not given) or a string, and filter_set is list of filters dictionaries. """ # Separating any position info from the rest of the filters coordinates = criteria.pop('coordinates', None) objectname = criteria.pop('objectname', None) radius = criteria.pop('radius', 0.2*u.deg) # Build the mashup filter object and store it in the correct service_name entry if coordinates or objectname: mashup_filters = self._portal_api_connection.build_filter_set(self._caom_cone, self._caom_filtered_position, **criteria) coordinates = utils.parse_input_location(coordinates=coordinates, objectname=objectname, resolver=resolver) else: mashup_filters = self._portal_api_connection.build_filter_set(self._caom_cone, self._caom_filtered, **criteria) # handle position info (if any) position = None if coordinates: # if radius is just a number we assume degrees radius = coord.Angle(radius, u.deg) # build the coordinates string needed by ObservationsClass._caom_filtered_position position = ', '.join([str(x) for x in (coordinates.ra.deg, coordinates.dec.deg, radius.deg)]) return position, mashup_filters
[docs] def enable_cloud_dataset(self, provider="AWS", profile=None, verbose=True): """ Enable downloading public files from S3 instead of MAST. Requires the boto3 library to function. Parameters ---------- provider : str Which cloud data provider to use. We may in the future support multiple providers, though at the moment this argument is ignored. profile : str Profile to use to identify yourself to the cloud provider (usually in ~/.aws/config). verbose : bool Default True. Logger to display extra info and warning. """ self._cloud_connection = CloudAccess(provider, profile, verbose)
[docs] def disable_cloud_dataset(self): """ Disables downloading public files from S3 instead of MAST. """ self._cloud_connection = None
[docs] @class_or_instance def query_region_async(self, coordinates, *, radius=0.2*u.deg, pagesize=None, page=None): """ Given a sky position and radius, returns a list of MAST observations. See column documentation `here <https://mast.stsci.edu/api/v0/_c_a_o_mfields.html>`__. Parameters ---------- coordinates : str or `~astropy.coordinates` object The target around which to search. It may be specified as a string or as the appropriate `~astropy.coordinates` object. radius : str or `~astropy.units.Quantity` object, optional Default 0.2 degrees. The string must be parsable by `~astropy.coordinates.Angle`. The appropriate `~astropy.units.Quantity` object from `~astropy.units` may also be used. Defaults to 0.2 deg. pagesize : int, optional Default None. Can be used to override the default pagesize for (set in configs) this query only. E.g. when using a slow internet connection. page : int, optional Default None. Can be used to override the default behavior of all results being returned to obtain a specific page of results. Returns ------- response : list of `~requests.Response` """ # Put coordinates and radius into consistent format coordinates = commons.parse_coordinates(coordinates, return_frame='icrs') # if radius is just a number we assume degrees radius = coord.Angle(radius, u.deg) service = self._caom_cone params = {'ra': coordinates.ra.deg, 'dec': coordinates.dec.deg, 'radius': radius.deg} return self._portal_api_connection.service_request_async(service, params, pagesize=pagesize, page=page)
[docs] @class_or_instance def query_object_async(self, objectname, *, radius=0.2*u.deg, pagesize=None, page=None, resolver=None): """ Given an object name, returns a list of MAST observations. See column documentation `here <https://mast.stsci.edu/api/v0/_c_a_o_mfields.html>`__. Parameters ---------- objectname : str The name of the target around which to search. radius : str or `~astropy.units.Quantity` object, optional Default 0.2 degrees. The string must be parsable by `~astropy.coordinates.Angle`. The appropriate `~astropy.units.Quantity` object from `~astropy.units` may also be used. Defaults to 0.2 deg. pagesize : int, optional Default None. Can be used to override the default pagesize for (set in configs) this query only. E.g. when using a slow internet connection. page : int, optional Defaulte None. Can be used to override the default behavior of all results being returned to obtain a specific page of results. resolver : str, optional The resolver to use when resolving a named target into coordinates. Valid options are "SIMBAD" and "NED". If not specified, the default resolver order will be used. Please see the `STScI Archive Name Translation Application (SANTA) <https://mastresolver.stsci.edu/Santa-war/>`__ for more information. Default is None. Returns ------- response : list of `~requests.Response` """ coordinates = utils.resolve_object(objectname, resolver=resolver) return self.query_region_async(coordinates, radius=radius, pagesize=pagesize, page=page)
[docs] @class_or_instance def query_criteria_async(self, *, pagesize=None, page=None, resolver=None, **criteria): """ Given an set of criteria, returns a list of MAST observations. Valid criteria are returned by ``get_metadata("observations")`` Parameters ---------- pagesize : int, optional Can be used to override the default pagesize. E.g. when using a slow internet connection. page : int, optional Can be used to override the default behavior of all results being returned to obtain one specific page of results. resolver : str, optional The resolver to use when resolving a named target into coordinates. Valid options are "SIMBAD" and "NED". If not specified, the default resolver order will be used. Please see the `STScI Archive Name Translation Application (SANTA) <https://mastresolver.stsci.edu/Santa-war/>`__ for more information. Default is None. **criteria Criteria to apply. At least one non-positional criteria must be supplied. Valid criteria are coordinates, objectname, radius (as in `query_region` and `query_object`), and all observation fields returned by the ``get_metadata("observations")``. The Column Name is the keyword, with the argument being one or more acceptable values for that parameter, except for fields with a float datatype where the argument should be in the form [minVal, maxVal]. For non-float type criteria wildcards maybe used (both * and % are considered wildcards), however only one wildcarded value can be processed per criterion. RA and Dec must be given in decimal degrees, and datetimes in MJD. For example: filters=["FUV","NUV"],proposal_pi="Ost*",t_max=[52264.4586,54452.8914] Returns ------- response : list of `~requests.Response` """ position, mashup_filters = self._parse_caom_criteria(resolver=resolver, **criteria) if not mashup_filters: raise InvalidQueryError("At least one non-positional criterion must be supplied.") if position: service = self._caom_filtered_position params = {"columns": "*", "filters": mashup_filters, "position": position} else: service = self._caom_filtered params = {"columns": "*", "filters": mashup_filters} return self._portal_api_connection.service_request_async(service, params, pagesize=pagesize, page=page)
[docs] def query_region_count(self, coordinates, *, radius=0.2*u.deg, pagesize=None, page=None): """ Given a sky position and radius, returns the number of MAST observations in that region. Parameters ---------- coordinates : str or `~astropy.coordinates` object The target around which to search. It may be specified as a string or as the appropriate `~astropy.coordinates` object. radius : str or `~astropy.units.Quantity` object, optional The string must be parsable by `~astropy.coordinates.Angle`. The appropriate `~astropy.units.Quantity` object from `~astropy.units` may also be used. Defaults to 0.2 deg. pagesize : int, optional Can be used to override the default pagesize for. E.g. when using a slow internet connection. page : int, optional Can be used to override the default behavior of all results being returned to obtain a specific page of results. Returns ------- response : int """ # build the coordinates string needed by ObservationsClass._caom_filtered_position coordinates = commons.parse_coordinates(coordinates, return_frame='icrs') # if radius is just a number we assume degrees radius = coord.Angle(radius, u.deg) # turn coordinates into the format position = ', '.join([str(x) for x in (coordinates.ra.deg, coordinates.dec.deg, radius.deg)]) service = self._caom_filtered_position params = {"columns": "COUNT_BIG(*)", "filters": [], "position": position} return int(self._portal_api_connection.service_request(service, params, pagesize, page)[0][0])
[docs] def query_object_count(self, objectname, *, radius=0.2*u.deg, pagesize=None, page=None, resolver=None): """ Given an object name, returns the number of MAST observations. Parameters ---------- objectname : str The name of the target around which to search. radius : str or `~astropy.units.Quantity` object, optional The string must be parsable by `~astropy.coordinates.Angle`. The appropriate `~astropy.units.Quantity` object from `~astropy.units` may also be used. Defaults to 0.2 deg. pagesize : int, optional Can be used to override the default pagesize. E.g. when using a slow internet connection. page : int, optional Can be used to override the default behavior of all results being returned to obtain one specific page of results. resolver : str, optional The resolver to use when resolving a named target into coordinates. Valid options are "SIMBAD" and "NED". If not specified, the default resolver order will be used. Please see the `STScI Archive Name Translation Application (SANTA) <https://mastresolver.stsci.edu/Santa-war/>`__ for more information. Default is None. Returns ------- response : int """ coordinates = utils.resolve_object(objectname, resolver=resolver) return self.query_region_count(coordinates, radius=radius, pagesize=pagesize, page=page)
[docs] def query_criteria_count(self, *, pagesize=None, page=None, resolver=None, **criteria): """ Given an set of filters, returns the number of MAST observations meeting those criteria. Parameters ---------- pagesize : int, optional Can be used to override the default pagesize. E.g. when using a slow internet connection. page : int, optional Can be used to override the default behavior of all results being returned to obtain one specific page of results. resolver : str, optional The resolver to use when resolving a named target into coordinates. Valid options are "SIMBAD" and "NED". If not specified, the default resolver order will be used. Please see the `STScI Archive Name Translation Application (SANTA) <https://mastresolver.stsci.edu/Santa-war/>`__ for more information. Default is None. **criteria Criteria to apply. At least one non-positional criterion must be supplied. Valid criteria are coordinates, objectname, radius (as in `query_region` and `query_object`), and all observation fields listed `here <https://mast.stsci.edu/api/v0/_c_a_o_mfields.html>`__. The Column Name is the keyword, with the argument being one or more acceptable values for that parameter, except for fields with a float datatype where the argument should be in the form [minVal, maxVal]. For non-float type criteria wildcards maybe used (both * and % are considered wildcards), however only one wildcarded value can be processed per criterion. RA and Dec must be given in decimal degrees, and datetimes in MJD. For example: filters=["FUV","NUV"],proposal_pi="Ost*",t_max=[52264.4586,54452.8914] Returns ------- response : int """ position, mashup_filters = self._parse_caom_criteria(resolver=resolver, **criteria) # send query if position: service = self._caom_filtered_position params = {"columns": "COUNT_BIG(*)", "filters": mashup_filters, "position": position} else: service = self._caom_filtered params = {"columns": "COUNT_BIG(*)", "filters": mashup_filters} return self._portal_api_connection.service_request(service, params)[0][0].astype(int)
def _filter_ffi_observations(self, observations): """ Given a `~astropy.table.Row` or `~astropy.table.Table` of observations, filter out full-frame images (FFIs) from TESS and TICA. If any observations are filtered, warn the user. Parameters ---------- observations : `~astropy.table.Row` or `~astropy.table.Table` Row/Table of MAST query results (e.g. output from `query_object`) Returns ------- filtered_obs_table : filtered observations Table """ obs_table = Table(observations) tess_ffis = obs_table[obs_table['target_name'] == 'TESS FFI']['obs_id'] tica_ffis = obs_table[obs_table['target_name'] == 'TICA FFI']['obs_id'] if tess_ffis.size: # Warn user if TESS FFIs exist log.warning("Because of their large size, Astroquery should not be used to " "download TESS FFI products.\n" "If you are looking for TESS image data for a specific target, " "please use TESScut at https://mast.stsci.edu/tesscut/.\n" "If you need a TESS image for an entire field, please see our " "dedicated page for downloading larger quantities of TESS data at \n" "https://archive.stsci.edu/tess/. Data products will not be fetched " "for the following observations IDs: \n" + "\n".join(tess_ffis)) if tica_ffis.size: # Warn user if TICA FFIs exist log.warning("Because of their large size, Astroquery should not be used to " "download TICA FFI products.\n" "Please see our dedicated page for downloading larger quantities of " "TICA data: https://archive.stsci.edu/hlsp/tica.\n" "Data products will not be fetched for the following " "observation IDs: \n" + "\n".join(tica_ffis)) # Filter out FFIs with a mask mask = (obs_table['target_name'] != 'TESS FFI') & (obs_table['target_name'] != 'TICA FFI') return obs_table[mask]
[docs] @class_or_instance def get_product_list_async(self, observations, *, batch_size=500): """ Given a "Product Group Id" (column name obsid) returns a list of associated data products. Note that obsid is NOT the same as obs_id, and inputting obs_id values will result in an error. See column documentation `here <https://masttest.stsci.edu/api/v0/_productsfields.html>`__. To return unique data products, use ``Observations.get_unique_product_list``. Parameters ---------- observations : str or `~astropy.table.Row` or list/Table of same Row/Table of MAST query results (e.g. output from `query_object`) or single/list of MAST Product Group Id(s) (obsid). See description `here <https://masttest.stsci.edu/api/v0/_c_a_o_mfields.html>`__. batch_size : int, optional Default 500. Number of obsids to include in each batch request to the server. If you experience timeouts or connection errors, consider lowering this value. Returns ------- response : list of `~requests.Response` A list of asynchronous response objects for each batch request. """ # Getting the obsids as a list if np.isscalar(observations): observations = [observations] elif isinstance(observations, (Row, Table)): # Filter out TESS FFIs and TICA FFIs # Can only perform filtering on Row or Table because of access to `target_name` field observations = self._filter_ffi_observations(observations) observations = observations['obsid'].tolist() # Clean and validate observations = [str(obs).strip() for obs in observations] observations = [obs for obs in observations if obs] if not observations: raise InvalidQueryError('Observation list is empty, no associated products.') # Define a helper to join obsids for each batch request def _request_joined_obsid(params): """Join batched obsid list into comma-separated string and send async request.""" pp = dict(params) vals = pp.get('obsid', []) pp['obsid'] = ','.join(map(str, vals)) return self._portal_api_connection.service_request_async(self._caom_products, pp)[0] # Perform batched requests results = utils._batched_request( items=observations, params={}, max_batch=batch_size, param_key='obsid', request_func=_request_joined_obsid, extract_func=lambda r: [r], desc=f'Fetching products for {len(observations)} unique observations' ) return results
[docs] def filter_products(self, products, *, mrp_only=False, extension=None, **filters): """ Filters an `~astropy.table.Table` of data products based on given filters. Parameters ---------- products : `~astropy.table.Table` Table containing data products to be filtered. mrp_only : bool, optional Default False. When set to true only "Minimum Recommended Products" will be returned. extension : string or array, optional Default None. Option to filter by file extension. **filters : Column-based filters to apply to the products table. Valid filters are all products fields listed `here <https://masttest.stsci.edu/api/v0/_productsfields.html>`__. Each keyword corresponds to a column name in the table, with the argument being one or more acceptable values for that column. AND logic is applied between filters. Within each column's filter set: - Positive (non-negated) values are combined with OR logic. - Any negated values (prefixed with "!") are combined with AND logic against the ORed positives. This results in: (NOT any_negatives) AND (any_positives) Examples: ``productType=['A', 'B', '!C']`` → (productType != C) AND (productType == A OR productType == B) ``size=['!14400', '<20000']`` → (size != 14400) AND (size < 20000) For columns with numeric data types (int or float), filter values can be expressed in several ways: - A single number: ``size=100`` - A range in the form "start..end": ``size="100..1000"`` - A comparison operator followed by a number: ``size=">=1000"`` - A list of expressions (OR logic): ``size=[100, "500..1000", ">=1500"]`` Returns ------- response : `~astropy.table.Table` Filtered table of data products. """ filter_mask = np.full(len(products), True, dtype=bool) # Filter by minimum recommended products (MRP) if specified if mrp_only: filter_mask &= (products['productGroupDescription'] == "Minimum Recommended Products") # Filter by file extension, if provided if extension: ext_mask = utils.apply_extension_filter(products, extension, 'productFilename') filter_mask &= ext_mask # Apply column-based filters col_mask = utils.apply_column_filters(products, filters) filter_mask &= col_mask return products[filter_mask]
[docs] def download_file(self, uri, *, local_path=None, base_url=None, cache=True, cloud_only=False, verbose=True): """ Downloads a single file based on the data URI Parameters ---------- uri : str The product dataURI, e.g. mast:JWST/product/jw00736-o039_t001_miri_ch1-long_x1d.fits local_path : str Directory or filename to which the file will be downloaded. Defaults to current working directory. base_url: str A base url to use when downloading. Default is the MAST Portal API cache : bool Default is True. If file is found on disk it will not be downloaded again. cloud_only : bool, optional Default False. If set to True and cloud data access is enabled (see `enable_cloud_dataset`) files that are not found in the cloud will be skipped rather than downloaded from MAST as is the default behavior. If cloud access is not enables this argument as no affect. verbose : bool, optional Default True. Whether to show download progress in the console. Returns ------- status: str download status message. Either COMPLETE, SKIPPED, or ERROR. msg : str An error status message, if any. url : str The full url download path """ # create the full data URL base_url = base_url if base_url else self._portal_api_connection.MAST_DOWNLOAD_URL data_url = base_url + "?uri=" + uri escaped_url = base_url + "?uri=" + quote(uri, safe=":/") # parse a local file path from local_path parameter. Use current directory as default. filename = os.path.basename(uri) if not local_path: # local file path is not defined local_path = filename else: path = Path(local_path) if not path.suffix: # local_path is a directory local_path = path / filename # append filename if not path.exists(): # create directory if it doesn't exist path.mkdir(parents=True, exist_ok=True) # recreate the data_product key for cloud connection check data_product = {'dataURI': uri} status = "COMPLETE" msg = None url = None try: if self._cloud_connection is not None and self._cloud_connection.is_supported(data_product): try: self._cloud_connection.download_file(data_product, local_path, cache, verbose) except Exception as ex: log.exception("Error pulling from S3 bucket: {}".format(ex)) if cloud_only: log.warning("Skipping file...") local_path = "" status = "SKIPPED" else: log.warning("Falling back to mast download...") self._download_file(escaped_url, local_path, cache=cache, head_safe=True, verbose=verbose) else: self._download_file(escaped_url, local_path, cache=cache, head_safe=True, verbose=verbose) # check if file exists also this is where would perform md5, # and also check the filesize if the database reliably reported file sizes if (not os.path.isfile(local_path)) and (status != "SKIPPED"): status = "ERROR" msg = "File was not downloaded" url = data_url except HTTPError as err: status = "ERROR" msg = "HTTPError: {0}".format(err) url = data_url return status, msg, url
def _download_files(self, products, base_dir, *, flat=False, cache=True, cloud_only=False, verbose=True): """ Takes an `~astropy.table.Table` of data products and downloads them into the directory given by base_dir. Parameters ---------- products : `~astropy.table.Table` Table containing products to be downloaded. base_dir : str Directory in which files will be downloaded. flat : bool Default is False. If set to True, no subdirectories will be made for the downloaded files. cache : bool Default is True. If file is found on disk it will not be downloaded again. cloud_only : bool, optional Default False. If set to True and cloud data access is enabled (see `enable_cloud_dataset`) files that are not found in the cloud will be skipped rather than downloaded from MAST as is the default behavior. If cloud access is not enables this argument as no affect. verbose : bool, optional Default True. Whether to show download progress in the console. Returns ------- response : `~astropy.table.Table` """ manifest_array = [] for data_product in products: # create the local file download path if not flat: local_path = os.path.join(base_dir, data_product['obs_collection'], data_product['obs_id']) if not os.path.exists(local_path): os.makedirs(local_path) else: local_path = base_dir local_path = os.path.join(local_path, os.path.basename(data_product['productFilename'])) # download the files status, msg, url = self.download_file(data_product["dataURI"], local_path=local_path, cache=cache, cloud_only=cloud_only, verbose=verbose) manifest_array.append([local_path, status, msg, url]) manifest = Table(rows=manifest_array, names=('Local Path', 'Status', 'Message', "URL")) return manifest def _download_curl_script(self, products, out_dir, verbose=True): """ Takes an `~astropy.table.Table` of data products and downloads a curl script to pull the datafiles. Parameters ---------- products : `~astropy.table.Table` Table containing products to be included in the curl script. out_dir : str Directory in which the curl script will be saved. verbose : bool, optional Default True. Whether to show download progress in the console. Returns ------- response : `~astropy.table.Table` """ url_list = [("uri", url) for url in products['dataURI']] download_file = "mastDownload_" + time.strftime("%Y%m%d%H%M%S") + ".sh" local_path = os.path.join(out_dir, download_file) self._download_file(self._portal_api_connection.MAST_BUNDLE_URL + ".sh", local_path, data=url_list, method="POST", verbose=verbose) status = "COMPLETE" msg = None if not os.path.isfile(local_path): status = "ERROR" msg = "Curl could not be downloaded" manifest = Table({'Local Path': [local_path], 'Status': [status], 'Message': [msg]}) return manifest
[docs] def download_products(self, products, *, download_dir=None, flat=False, cache=True, curl_flag=False, mrp_only=False, cloud_only=False, verbose=True, **filters): """ Download data products. If cloud access is enabled, files will be downloaded from the cloud if possible. Parameters ---------- products : str, list, `~astropy.table.Table` Either a single or list of obsids (as can be given to `get_product_list`), or a Table of products (as is returned by `get_product_list`) download_dir : str, optional Optional. Directory to download files to. Defaults to current directory. flat : bool, optional Default is False. If set to True, and download_dir is specified, it will put all files into download_dir without subdirectories. Or if set to True and download_dir is not specified, it will put files in the current directory, again with no subdirs. The default of False puts files into the standard directory structure of "mastDownload/<obs_collection>/<obs_id>/". If curl_flag=True, the flat flag has no effect, as astroquery does not control how MAST generates the curl download script. cache : bool, optional Default is True. If file is found on disc it will not be downloaded again. Note: has no affect when downloading curl script. curl_flag : bool, optional Default is False. If true instead of downloading files directly, a curl script will be downloaded that can be used to download the data files at a later time. mrp_only : bool, optional Default False. When set to true only "Minimum Recommended Products" will be returned. cloud_only : bool, optional Default False. If set to True and cloud data access is enabled (see `enable_cloud_dataset`) files that are not found in the cloud will be skipped rather than downloaded from MAST as is the default behavior. If cloud access is not enables this argument as no affect. verbose : bool, optional Default True. Whether to show download progress in the console. **filters : Filters to be applied. Valid filters are all products fields returned by ``get_metadata("products")`` and 'extension' which is the desired file extension. The Column Name (or 'extension') is the keyword, with the argument being one or more acceptable values for that parameter. Filter behavior is AND between the filters and OR within a filter set. For example: productType="SCIENCE",extension=["fits","jpg"] Returns ------- response : `~astropy.table.Table` The manifest of files downloaded, or status of files on disk if curl option chosen. """ # If the products list is a row we need to cast it as a table if isinstance(products, Row): products = Table(products, masked=True) # If the products list is not already a table of products we need to # get the products and filter them appropriately if not isinstance(products, Table): if isinstance(products, str): products = [products] # collect list of products product_lists = [] for oid in products: product_lists.append(self.get_product_list(oid)) products = vstack(product_lists) # apply filters products = self.filter_products(products, mrp_only=mrp_only, **filters) # remove duplicate products products = utils.remove_duplicate_products(products, 'dataURI') if not len(products): warnings.warn("No products to download.", NoResultsWarning) return # set up the download directory and paths if not download_dir: download_dir = '.' if curl_flag: # don't want to download the files now, just the curl script if flat: # flat=True doesn't work with curl_flag=True, so issue a warning warnings.warn("flat=True has no effect on curl downloads.", InputWarning) manifest = self._download_curl_script(products, download_dir) else: if flat: base_dir = download_dir else: base_dir = os.path.join(download_dir, "mastDownload") manifest = self._download_files(products, base_dir=base_dir, flat=flat, cache=cache, cloud_only=cloud_only, verbose=verbose) return manifest
[docs] def get_cloud_uris(self, data_products=None, *, include_bucket=True, full_url=False, pagesize=None, page=None, mrp_only=False, extension=None, filter_products={}, return_uri_map=False, verbose=True, **criteria): """ Given an `~astropy.table.Table` of data products or query criteria and filter parameters, returns the associated cloud data URIs. Parameters ---------- data_products : `~astropy.table.Table`, list Table containing products or list of MAST uris to be converted into cloud data uris. If provided, this will supercede page_size, page, or any keyword arguments passed in as criteria. include_bucket : bool Default True. When False, returns the path of the file relative to the top level cloud storage location. Must be set to False when using the full_url argument. full_url : bool Default False. Return an HTTP fetchable url instead of a cloud uri. Must set include_bucket to False to use this option. pagesize : int, optional Default None. Can be used to override the default pagesize when making a query. E.g. when using a slow internet connection. Query criteria must also be provided. page : int, optional Default None. Can be used to override the default behavior of all results being returned for a query to obtain one specific page of results. Query criteria must also be provided. mrp_only : bool, optional Default False. When set to True, only "Minimum Recommended Products" will be returned. extension : string or array, optional Default None. Option to filter by file extension. filter_products : dict, optional Filters to be applied to data products. Valid filters are all products fields listed `here <https://masttest.stsci.edu/api/v0/_productsfields.html>`__. The column name as a string is the key. The corresponding value is one or more acceptable values for that parameter. Filter behavior is AND between the filters and OR within a filter set. For example: {"productType": "SCIENCE", "extension"=["fits","jpg"]} return_uri_map : bool, optional Default False. If set to True, returns a dictionary mapping the original data product URIs to their corresponding cloud URIs. This is useful for tracking which products were successfully converted to cloud URIs. verbose : bool, optional Default True. Whether to issue warnings if a product cannot be found in the cloud. **criteria Criteria to apply. At least one non-positional criteria must be supplied. Valid criteria are coordinates, objectname, radius (as in `query_region` and `query_object`), and all observation fields returned by the ``get_metadata("observations")``. The Column Name is the keyword, with the argument being one or more acceptable values for that parameter, except for fields with a float datatype where the argument should be in the form [minVal, maxVal]. For non-float type criteria wildcards maybe used (both * and % are considered wildcards), however only one wildcarded value can be processed per criterion. RA and Dec must be given in decimal degrees, and datetimes in MJD. For example: filters=["FUV","NUV"],proposal_pi="Ost*",t_max=[52264.4586,54452.8914] Returns ------- response : list List of URIs generated from the data products. May contain entries that are None if data_products includes products not found in the cloud. """ if self._cloud_connection is None: raise RemoteServiceError( 'Please enable anonymous cloud access by calling `enable_cloud_dataset` method. ' 'Refer to `~astroquery.mast.ObservationsClass.enable_cloud_dataset` documentation for more info.') if data_products is None: if not criteria: raise InvalidQueryError( 'Please provide either a `~astropy.table.Table` of data products or query criteria.' ) else: # Get table of observations based on query criteria obs = self.query_criteria(pagesize=pagesize, page=page, **criteria) if not len(obs): # Warning raised by ~astroquery.mast.ObservationsClass.query_criteria return # Return list of associated data products data_products = self.get_product_list(obs) if isinstance(data_products, Table): # Filter product list data_products = self.filter_products(data_products, mrp_only=mrp_only, extension=extension, **filter_products) data_uris = data_products['dataURI'] else: # data_products is a list of URIs # Warn if trying to supply filters if filter_products or extension or mrp_only: warnings.warn('Filtering is not supported when providing a list of MAST URIs. ' 'To apply filters, please provide query criteria or a table of data products ' 'as returned by `Observations.get_product_list`', InputWarning) data_uris = data_products if not len(data_uris): warnings.warn('No matching products to fetch associated cloud URIs.', NoResultsWarning) return # Remove duplicate products data_uris = utils.remove_duplicate_products(data_uris, 'dataURI') # Get cloud URIS cloud_uris = self._cloud_connection.get_cloud_uri_list(data_uris, include_bucket=include_bucket, full_url=full_url, verbose=verbose) # If return_uri_map is True, create a mapping of dataURIs to cloud URIs if return_uri_map: uri_map = dict(zip(data_uris, cloud_uris)) return uri_map # Remove None values from the list cloud_uris = [uri for uri in cloud_uris if uri is not None] return cloud_uris
[docs] def get_cloud_uri(self, data_product, *, include_bucket=True, full_url=False): """ For a given data product, returns the associated cloud URI. If the product is from a mission that does not support cloud access an exception is raised. If the mission is supported but the product cannot be found in the cloud, the returned path is None. Parameters ---------- data_product : `~astropy.table.Row`, str Product to be converted into cloud data uri. include_bucket : bool Default True. When false returns the path of the file relative to the top level cloud storage location. Must be set to False when using the full_url argument. full_url : bool Default False. Return an HTTP fetchable url instead of a cloud uri. Must set include_bucket to False to use this option. Returns ------- response : str or None Cloud URI generated from the data product. If the product cannot be found in the cloud, None is returned. """ if self._cloud_connection is None: raise RemoteServiceError( 'Please enable anonymous cloud access by calling `enable_cloud_dataset` method. ' 'Refer to `~astroquery.mast.ObservationsClass.enable_cloud_dataset` documentation for more info.') # Query for product URIs return self._cloud_connection.get_cloud_uri(data_product, include_bucket, full_url)
[docs] def get_unique_product_list(self, observations, *, batch_size=500): """ Given a "Product Group Id" (column name obsid), returns a list of associated data products with unique dataURIs. Note that obsid is NOT the same as obs_id, and inputting obs_id values will result in an error. See column documentation `here <https://masttest.stsci.edu/api/v0/_productsfields.html>`__. Parameters ---------- observations : str or `~astropy.table.Row` or list/Table of same Row/Table of MAST query results (e.g. output from `query_object`) or single/list of MAST Product Group Id(s) (obsid). See description `here <https://masttest.stsci.edu/api/v0/_c_a_o_mfields.html>`__. batch_size : int, optional Default 500. Number of obsids to include in each batch request to the server. If you experience timeouts or connection errors, consider lowering this value. Returns ------- unique_products : `~astropy.table.Table` Table containing products with unique dataURIs. """ products = self.get_product_list(observations, batch_size=batch_size) unique_products = utils.remove_duplicate_products(products, 'dataURI') if len(unique_products) < len(products): log.info("To return all products, use `Observations.get_product_list`") return unique_products
[docs] @async_to_sync class MastClass(MastQueryWithLogin): """ MAST query class. Class that allows direct programmatic access to the MAST Portal, more flexible but less user friendly than `ObservationsClass`. """ def _parse_result(self, responses, *, verbose=False): # Used by the async_to_sync decorator functionality """ Parse the results of a list of `~requests.Response` objects and returns an `~astropy.table.Table` of results. Parameters ---------- responses : list of `~requests.Response` List of `~requests.Response` objects. verbose : bool (presently does nothing - there is no output with verbose set to True or False) Default False. Setting to True provides more extensive output. Returns ------- response : `~astropy.table.Table` """ return self._portal_api_connection._parse_result(responses, verbose)
[docs] @class_or_instance def service_request_async(self, service, params, *, pagesize=None, page=None, **kwargs): """ Given a Mashup service and parameters, builds and executes a Mashup query. See documentation `here <https://mast.stsci.edu/api/v0/class_mashup_1_1_mashup_request.html>`__ for information about how to build a Mashup request. Parameters ---------- service : str The Mashup service to query. params : dict JSON object containing service parameters. pagesize : int, optional Default None. Can be used to override the default pagesize (set in configs) for this query only. E.g. when using a slow internet connection. page : int, optional Default None. Can be used to override the default behavior of all results being returned to obtain a specific page of results. **kwargs : See MashupRequest properties `here <https://mast.stsci.edu/api/v0/class_mashup_1_1_mashup_request.html>`__ for additional keyword arguments. Returns ------- response : list of `~requests.Response` """ return self._portal_api_connection.service_request_async(service, params, pagesize, page, **kwargs)
def _normalize_filter_value(self, key: str, value) -> list: """ Normalize a filter value into a list suitable for MAST filters. Parameters ---------- key : str Parameter name (used for error messages). value : any Raw filter value. Returns ------- list Normalized filter values. """ # Range filters must be dicts with 'min' and 'max' if isinstance(value, dict): if not {"min", "max"}.issubset(value.keys()): raise InvalidQueryError( f'Range filter for "{key}" must be a dictionary with "min" and "max" keys.' ) return [value] # Convert numpy arrays to lists if isinstance(value, np.ndarray): value = value.tolist() # Convert numpy arrays, sets, or tuples to lists if isinstance(value, (set, tuple)): value = list(value) # Wrap scalars into a list return value if isinstance(value, list) else [value] def _build_filters(self, service_params): """ Construct filters for filtered services. Parameters ---------- service_params : dict Parameters not classified as request/position keys. Returns ------- list of dict Filters suitable for a MAST filtered query. """ filters = [] for key, value in service_params.items(): filters.append({ "paramName": key, "values": self._normalize_filter_value(key, value) }) return filters
[docs] def mast_query(self, service, columns=None, **kwargs): """ Given a Mashup service and parameters as keyword arguments, builds and excecutes a Mashup query. Parameters ---------- service : str The Mashup service to query. columns : str or list, optional Specifies the columns to be returned as a comma-separated list, e.g. "ID, ra, dec". **kwargs : Service-specific parameters and MashupRequest properties. See the `service documentation <https://mast.stsci.edu/api/v0/_services.html>`__ and the `MashupRequest Class Reference <https://mast.stsci.edu/api/v0/class_mashup_1_1_mashup_request.html>`__ for valid keyword arguments. For filtered services (i.e. those with "filtered" in the service name), parameters that are not related to position or MashupRequest properties are treated as filters. If the column has discrete values, the parameter value should be a single value or list of values, and values will be matched exactly. If the column is continuous, you can filter by a single value, a list of values, or a range of values. If filtering by a range of values, the parameter value should be a dict in the form ``{'min': minVal, 'max': maxVal}``. Returns ------- response : `~astropy.table.Table` """ # Specific keywords related to positional and MashupRequest parameters. position_keys = {'ra', 'dec', 'radius', 'position'} request_keys = {'format', 'data', 'filename', 'timeout', 'clearcache', 'removecache', 'removenullcolumns', 'page', 'pagesize'} # Split params into categories position_params = {k: v for k, v in kwargs.items() if k.lower() in position_keys} request_params = {k: v for k, v in kwargs.items() if k.lower() in request_keys} service_params = {k: v for k, v in kwargs.items() if k.lower() not in position_keys | request_keys} # Handle filtered vs. non-filtered services if 'filtered' in service.lower(): filters = self._build_filters(service_params) if not filters: raise InvalidQueryError('Please provide at least one filter.') if columns is not None and isinstance(columns, list): columns = ','.join(columns) params = { 'columns': columns or '*', 'filters': filters, **position_params, } else: if columns is not None: warnings.warn( "'columns' parameter is ignored for non-filtered services.", InputWarning ) params = {**service_params, **position_params} return self.service_request(service, params, **request_params)
Observations = ObservationsClass() Mast = MastClass()