"""Module implementing the galsample function
used for galsampling
"""
import numpy as np
from scipy.spatial import cKDTree
from numba import njit
from collections import namedtuple
from .crossmatch import crossmatch, compute_richness
GalsamplerCorrespondence = namedtuple(
"GalsamplerCorrespondence",
[
"target_gals_selection_indx",
"target_gals_target_halo_ids",
"target_gals_source_halo_ids",
],
)
__all__ = ("galsample", "calculate_halo_correspondence")
@njit
def galaxy_selection_kernel(first_source_gal_indices, richness, n_target_halo, result):
"""Numba kernel filling in array of galaxy selection indices
Parameters
----------
first_source_gal_indices : ndarray of shape (n_target_halo, )
Stores the index of the first galaxy in the galaxy catalog
assigned to each target halo
richness : ndarray of shape (n_target_halo, )
Stores the number of galaxies that will be mapped to each target halo
n_target_halo : int
result : ndarray of shape richness.sum()
"""
cur = 0
for i in range(n_target_halo):
ifirst = first_source_gal_indices[i]
n = richness[i]
if n > 0:
ilast = ifirst + richness[i]
for j in range(ifirst, ilast):
result[cur] = j
cur += 1
def _get_data_block(*halo_properties):
return np.vstack(halo_properties).T
def calculate_halo_correspondence(source_halo_props, target_halo_props, n_threads=-1):
"""Calculating indexing array defined by a statistical correspondence between
source and target halos.
Parameters
----------
source_halo_props : sequence of n_props ndarrays
Each ndarray should have shape (n_source_halos, )
target_halo_props : sequence of n_props ndarrays
Each ndarray should have shape (n_target_halos, )
Returns
-------
dd_match : ndarray of shape (n_target_halos, )
Euclidean distance to the source halo matched to each target halo
indx_match : ndarray of shape (n_target_halos, )
Index of the source halo matched to each target halo
"""
assert len(source_halo_props) == len(target_halo_props)
X_source = _get_data_block(*source_halo_props)
X_target = _get_data_block(*target_halo_props)
source_tree = cKDTree(X_source)
dd_match, indx_match = source_tree.query(X_target, workers=n_threads)
return dd_match, indx_match
[docs]
def galsample(
source_galaxies_host_halo_id,
source_halo_ids,
target_halo_ids,
source_halo_props,
target_halo_props,
):
"""Calculate the indexing array that transfers source galaxies to target halos
Parameters
----------
source_galaxies_host_halo_id : ndarray of shape (n_source_gals, )
Integer array storing the value of the halo ID
of the source halo of each source galaxy
Each entry in source_galaxies_host_halo_id must appear in source_halo_ids
This quantity can be computed via the following function in halotools:
halotools.utils.value_added_halo_table_functions.compute_uber_hostid
source_halo_ids : ndarray of shape (n_source_halos, )
target_halo_ids : ndarray of shape (n_target_halos, )
source_halo_props : sequence of n_props ndarrays
Sequence of n_props of ndarrays, each with shape (n_source_halos, )
These properties determine the correspondence between source and target halos
target_halo_props : sequence of n_props ndarrays
Sequence of n_props of ndarrays, each with shape (n_target_halos, )
These properties determine the correspondence between source and target halos
Returns
-------
target_gals_selection_indx : ndarray of shape (n_target_gals, )
Integer array storing values in the range [0, n_source_gals-1]
target_galaxy_target_halo_ids : ndarray of shape (n_target_gals, )
Integer array storing values appearing in target_halo_ids
target_galaxy_source_halo_ids : ndarray of shape (n_target_gals, )
Integer array storing values appearing in source_halo_ids
"""
# Sort the source galaxies so that members of a common halo are grouped together
idx_sorted_source_galaxies = np.argsort(source_galaxies_host_halo_id)
sorted_source_galaxies_host_halo_id = source_galaxies_host_halo_id[
idx_sorted_source_galaxies
]
# Calculate the index correspondence array that will undo the sorting at the end
num_source_gals = len(source_galaxies_host_halo_id)
idx_unsorted_galaxy_indices = np.arange(num_source_gals).astype("i8")[
idx_sorted_source_galaxies
]
# For each source halo, calculate the number of resident galaxies
source_halos_richness = compute_richness(
source_halo_ids, sorted_source_galaxies_host_halo_id
)
# For each source halo, calculate the index of its first resident galaxy
source_halo_sorted_source_galaxies_indices = _galaxy_table_indices(
source_halo_ids, sorted_source_galaxies_host_halo_id
)
# For each target halo, calculate the index of the associated source halo
__, source_halo_selection_indices = calculate_halo_correspondence(
source_halo_props, target_halo_props
)
# For each target halo, calculate the number of galaxies
target_halo_richness = source_halos_richness[source_halo_selection_indices]
num_target_gals = np.sum(target_halo_richness)
# For each target halo, calculate the halo ID of the associated source halo
target_halo_source_halo_ids = source_halo_ids[source_halo_selection_indices]
# For each target halo, calculate the index of its first resident galaxy
target_halo_first_sorted_source_gal_indices = (
source_halo_sorted_source_galaxies_indices[source_halo_selection_indices]
)
# For every target halo, we know the index of the first and last galaxy to select
# Calculate an array of shape (num_target_gals, )
# with the index of each selected galaxy
n_target_halos = target_halo_ids.size
sorted_source_galaxy_selection_indices = np.zeros(num_target_gals).astype(int)
galaxy_selection_kernel(
target_halo_first_sorted_source_gal_indices.astype("i8"),
target_halo_richness.astype("i4"),
n_target_halos,
sorted_source_galaxy_selection_indices,
)
# For each target galaxy, calculate the halo ID of its source and target halo
target_gals_target_halo_ids = np.repeat(target_halo_ids, target_halo_richness)
target_gals_source_halo_ids = np.repeat(
target_halo_source_halo_ids, target_halo_richness
)
# For each index in the sorted galaxy catalog,
# calculate the index of the catalog in its original order
target_gals_selection_indx = idx_unsorted_galaxy_indices[
sorted_source_galaxy_selection_indices
]
return GalsamplerCorrespondence(
target_gals_selection_indx,
target_gals_target_halo_ids,
target_gals_source_halo_ids,
)
def _galaxy_table_indices(source_halo_id, galaxy_host_halo_id):
"""For every halo in the source halo catalog, calculate the index
in the source galaxy catalog of the first appearance of a galaxy that
occupies the halo, reserving -1 for source halos with no resident galaxies.
Parameters
----------
source_halo_id : ndarray
Numpy integer array of shape (num_halos, )
galaxy_host_halo_id : ndarray
Numpy integer array of shape (num_gals, )
Returns
-------
indices : ndarray
Numpy integer array of shape (num_halos, ).
All values will be in the interval [-1, num_gals)
"""
uval_gals, indx_uval_gals = np.unique(galaxy_host_halo_id, return_index=True)
idxA, idxB = crossmatch(source_halo_id, uval_gals)
num_source_halos = len(source_halo_id)
indices = np.zeros(num_source_halos) - 1
indices[idxA] = indx_uval_gals[idxB]
return indices.astype(int)
def calculate_indx_correspondence(source_props, target_props, n_threads=-1):
"""For each target data object, find a closely matching source data object
Parameters
----------
source_props : list of n_props ndarrays
Each ndarray should have shape (n_source, )
target_props : list of n_props ndarrays
Each ndarray should have shape (n_target, )
Returns
-------
dd_match : ndarray of shape (n_target, )
Euclidean distance between each target and its matching source object
indx_match : ndarray of shape (n_target, )
Index into the source object that is matched to each target
Notes
-----
This function essentially provides wrapper behavior around scipy.spatial.cKDTree
"""
assert len(source_props) == len(target_props)
X_source = _get_data_block(*source_props)
X_target = _get_data_block(*target_props)
source_tree = cKDTree(X_source)
dd_match, indx_match = source_tree.query(X_target, workers=n_threads)
return dd_match, indx_match