Source code for galsampler.galmatch

"""Module implementing the galsample function
used for galsampling
"""
import numpy as np
from scipy.spatial import cKDTree
from numba import njit
from collections import namedtuple
from .crossmatch import crossmatch, compute_richness

GalsamplerCorrespondence = namedtuple(
    "GalsamplerCorrespondence",
    [
        "target_gals_selection_indx",
        "target_gals_target_halo_ids",
        "target_gals_source_halo_ids",
    ],
)

__all__ = ("galsample", "calculate_halo_correspondence")


@njit
def galaxy_selection_kernel(first_source_gal_indices, richness, n_target_halo, result):
    """Numba kernel filling in array of galaxy selection indices

    Parameters
    ----------
    first_source_gal_indices : ndarray of shape (n_target_halo, )
        Stores the index of the first galaxy in the galaxy catalog
        assigned to each target halo

    richness : ndarray of shape (n_target_halo, )
        Stores the number of galaxies that will be mapped to each target halo

    n_target_halo : int

    result : ndarray of shape richness.sum()

    """
    cur = 0
    for i in range(n_target_halo):
        ifirst = first_source_gal_indices[i]
        n = richness[i]
        if n > 0:
            ilast = ifirst + richness[i]
            for j in range(ifirst, ilast):
                result[cur] = j
                cur += 1


def _get_data_block(*halo_properties):
    return np.vstack(halo_properties).T


def calculate_halo_correspondence(source_halo_props, target_halo_props, n_threads=-1):
    """Calculating indexing array defined by a statistical correspondence between
    source and target halos.

    Parameters
    ----------
    source_halo_props : sequence of n_props ndarrays
        Each ndarray should have shape (n_source_halos, )

    target_halo_props : sequence of n_props ndarrays
        Each ndarray should have shape (n_target_halos, )

    Returns
    -------
    dd_match : ndarray of shape (n_target_halos, )
        Euclidean distance to the source halo matched to each target halo

    indx_match : ndarray of shape (n_target_halos, )
        Index of the source halo matched to each target halo

    """
    assert len(source_halo_props) == len(target_halo_props)
    X_source = _get_data_block(*source_halo_props)
    X_target = _get_data_block(*target_halo_props)
    source_tree = cKDTree(X_source)
    dd_match, indx_match = source_tree.query(X_target, workers=n_threads)
    return dd_match, indx_match


[docs] def galsample( source_galaxies_host_halo_id, source_halo_ids, target_halo_ids, source_halo_props, target_halo_props, ): """Calculate the indexing array that transfers source galaxies to target halos Parameters ---------- source_galaxies_host_halo_id : ndarray of shape (n_source_gals, ) Integer array storing the value of the halo ID of the source halo of each source galaxy Each entry in source_galaxies_host_halo_id must appear in source_halo_ids This quantity can be computed via the following function in halotools: halotools.utils.value_added_halo_table_functions.compute_uber_hostid source_halo_ids : ndarray of shape (n_source_halos, ) target_halo_ids : ndarray of shape (n_target_halos, ) source_halo_props : sequence of n_props ndarrays Sequence of n_props of ndarrays, each with shape (n_source_halos, ) These properties determine the correspondence between source and target halos target_halo_props : sequence of n_props ndarrays Sequence of n_props of ndarrays, each with shape (n_target_halos, ) These properties determine the correspondence between source and target halos Returns ------- target_gals_selection_indx : ndarray of shape (n_target_gals, ) Integer array storing values in the range [0, n_source_gals-1] target_galaxy_target_halo_ids : ndarray of shape (n_target_gals, ) Integer array storing values appearing in target_halo_ids target_galaxy_source_halo_ids : ndarray of shape (n_target_gals, ) Integer array storing values appearing in source_halo_ids """ # Sort the source galaxies so that members of a common halo are grouped together idx_sorted_source_galaxies = np.argsort(source_galaxies_host_halo_id) sorted_source_galaxies_host_halo_id = source_galaxies_host_halo_id[ idx_sorted_source_galaxies ] # Calculate the index correspondence array that will undo the sorting at the end num_source_gals = len(source_galaxies_host_halo_id) idx_unsorted_galaxy_indices = np.arange(num_source_gals).astype("i8")[ idx_sorted_source_galaxies ] # For each source halo, calculate the number of resident galaxies source_halos_richness = compute_richness( source_halo_ids, sorted_source_galaxies_host_halo_id ) # For each source halo, calculate the index of its first resident galaxy source_halo_sorted_source_galaxies_indices = _galaxy_table_indices( source_halo_ids, sorted_source_galaxies_host_halo_id ) # For each target halo, calculate the index of the associated source halo __, source_halo_selection_indices = calculate_halo_correspondence( source_halo_props, target_halo_props ) # For each target halo, calculate the number of galaxies target_halo_richness = source_halos_richness[source_halo_selection_indices] num_target_gals = np.sum(target_halo_richness) # For each target halo, calculate the halo ID of the associated source halo target_halo_source_halo_ids = source_halo_ids[source_halo_selection_indices] # For each target halo, calculate the index of its first resident galaxy target_halo_first_sorted_source_gal_indices = ( source_halo_sorted_source_galaxies_indices[source_halo_selection_indices] ) # For every target halo, we know the index of the first and last galaxy to select # Calculate an array of shape (num_target_gals, ) # with the index of each selected galaxy n_target_halos = target_halo_ids.size sorted_source_galaxy_selection_indices = np.zeros(num_target_gals).astype(int) galaxy_selection_kernel( target_halo_first_sorted_source_gal_indices.astype("i8"), target_halo_richness.astype("i4"), n_target_halos, sorted_source_galaxy_selection_indices, ) # For each target galaxy, calculate the halo ID of its source and target halo target_gals_target_halo_ids = np.repeat(target_halo_ids, target_halo_richness) target_gals_source_halo_ids = np.repeat( target_halo_source_halo_ids, target_halo_richness ) # For each index in the sorted galaxy catalog, # calculate the index of the catalog in its original order target_gals_selection_indx = idx_unsorted_galaxy_indices[ sorted_source_galaxy_selection_indices ] return GalsamplerCorrespondence( target_gals_selection_indx, target_gals_target_halo_ids, target_gals_source_halo_ids, )
def _galaxy_table_indices(source_halo_id, galaxy_host_halo_id): """For every halo in the source halo catalog, calculate the index in the source galaxy catalog of the first appearance of a galaxy that occupies the halo, reserving -1 for source halos with no resident galaxies. Parameters ---------- source_halo_id : ndarray Numpy integer array of shape (num_halos, ) galaxy_host_halo_id : ndarray Numpy integer array of shape (num_gals, ) Returns ------- indices : ndarray Numpy integer array of shape (num_halos, ). All values will be in the interval [-1, num_gals) """ uval_gals, indx_uval_gals = np.unique(galaxy_host_halo_id, return_index=True) idxA, idxB = crossmatch(source_halo_id, uval_gals) num_source_halos = len(source_halo_id) indices = np.zeros(num_source_halos) - 1 indices[idxA] = indx_uval_gals[idxB] return indices.astype(int) def calculate_indx_correspondence(source_props, target_props, n_threads=-1): """For each target data object, find a closely matching source data object Parameters ---------- source_props : list of n_props ndarrays Each ndarray should have shape (n_source, ) target_props : list of n_props ndarrays Each ndarray should have shape (n_target, ) Returns ------- dd_match : ndarray of shape (n_target, ) Euclidean distance between each target and its matching source object indx_match : ndarray of shape (n_target, ) Index into the source object that is matched to each target Notes ----- This function essentially provides wrapper behavior around scipy.spatial.cKDTree """ assert len(source_props) == len(target_props) X_source = _get_data_block(*source_props) X_target = _get_data_block(*target_props) source_tree = cKDTree(X_source) dd_match, indx_match = source_tree.query(X_target, workers=n_threads) return dd_match, indx_match