Source code for galsampler.crossmatch

"""
"""
import numpy as np


__all__ = ("crossmatch", "compute_richness")



[docs]
def crossmatch(x, y, skip_bounds_checking=False):
    """
    Finds where the elements of ``x`` appear in the array ``y``, including repeats.

    The elements in x may be repeated, but the elements in y must be unique.
    The arrays x and y may be only partially overlapping.

    Applications of this function involve cross-matching two catalogs which share an ID.

    Parameters
    ----------
    x : integer array
        Array of integers with possibly repeated entries.

    y : integer array
        Array of unique integers.

    skip_bounds_checking : bool, optional
        The first step in the `crossmatch` function is to test that the input
        arrays satisfy the assumptions of the algorithm
        (namely that ``x`` and ``y`` store integers,
        and that all values in ``y`` are unique).
        If ``skip_bounds_checking`` is set to True,
        this testing is bypassed and the function evaluates faster.
        Default is False.

    Returns
    -------
    idx_x : integer array
        Integer array used to apply a mask to x
        such that x[idx_x] = y[idx_y]

    y_idx : integer array
        Integer array used to apply a mask to y
        such that x[idx_x] = y[idx_y]

    """
    # Ensure inputs are Numpy arrays
    x = np.atleast_1d(x)
    y = np.atleast_1d(y)

    # Require that the inputs meet the assumptions of the algorithm
    if skip_bounds_checking is True:
        pass
    else:
        try:
            assert len(set(y)) == len(y)
            assert np.all(np.array(y, dtype=np.int64) == y)
            assert np.shape(y) == (len(y),)
        except (AssertionError, ValueError, TypeError):
            msg = "Input array y must be a 1d sequence of unique integers"
            raise ValueError(msg)
        try:
            assert np.all(np.array(x, dtype=np.int64) == x)
            assert np.shape(x) == (len(x),)
        except (AssertionError, ValueError, TypeError):
            msg = "Input array x must be a 1d sequence of integers"
            raise ValueError(msg)

    # Internally, we will work with sorted arrays, and then undo the sorting at the end
    idx_x_sorted = np.argsort(x)
    idx_y_sorted = np.argsort(y)
    x_sorted = np.copy(x[idx_x_sorted])
    y_sorted = np.copy(y[idx_y_sorted])

    # x may have repeated entries
    # Address by finding the unique values as well as their multiplicity
    unique_xvals, counts = np.unique(x_sorted, return_counts=True)

    # Determine which of the unique x values has a match in y
    unique_xval_has_match = np.in1d(unique_xvals, y_sorted, assume_unique=True)

    # Create a boolean array with True for each value in x with a match, otherwise False
    idx_x = np.repeat(unique_xval_has_match, counts)

    # For each unique value of x with a match in y, identify the index of the match
    matching_indices_in_y = np.searchsorted(
        y_sorted, unique_xvals[unique_xval_has_match]
    )

    # Repeat each matching index according to the multiplicity in x
    idx_y = np.repeat(matching_indices_in_y, counts[unique_xval_has_match])

    # Undo the original sorting and return the result
    return idx_x_sorted[idx_x], idx_y_sorted[idx_y]




[docs]
def compute_richness(unique_halo_ids, halo_id_of_galaxies):
    r"""For every ID in unique_halo_ids,
    calculate the number of times the ID appears in halo_id_of_galaxies.

    Parameters
    ----------
    unique_halo_ids : ndarray
        Numpy array of shape (num_halos, ) storing unique integers

    halo_id_of_galaxies : ndarray
        Numpy integer array of shape (num_galaxies, ) storing the host ID of each galaxy

    Returns
    -------
    richness : ndarray
        Numpy integer array of shape (num_halos, ) storing richness of each host halo

    Examples
    --------
    >>> num_hosts = 100
    >>> num_sats = int(1e5)
    >>> unique_halo_ids = np.arange(5, num_hosts + 5)
    >>> halo_id_of_galaxies = np.random.randint(0, 5000, num_sats)
    >>> richness = compute_richness(unique_halo_ids, halo_id_of_galaxies)
    """
    unique_halo_ids = np.atleast_1d(unique_halo_ids).astype(int)
    halo_id_of_galaxies = np.atleast_1d(halo_id_of_galaxies).astype(int)
    richness_result = np.zeros_like(unique_halo_ids).astype(int)

    vals, counts = np.unique(halo_id_of_galaxies, return_counts=True)
    idxA, idxB = crossmatch(vals, unique_halo_ids)
    richness_result[idxB] = counts[idxA]
    return richness_result
Source code for galsampler.crossmatch

galsampler

Navigation

Related Topics