# Author: Leland McInnes <leland.mcinnes@gmail.com>
#
# License: BSD 3 clause
import numpy as np
import numba

from pynndescent.optimal_transport import (
    allocate_graph_structures,
    initialize_graph_structures,
    initialize_supply,
    initialize_cost,
    network_simplex_core,
    total_cost,
    ProblemStatus,
    sinkhorn_transport_plan,
)

_mock_identity = np.eye(2, dtype=np.float32)
_mock_ones = np.ones(2, dtype=np.float32)
_dummy_cost = np.zeros((2, 2), dtype=np.float64)

FLOAT32_EPS = np.finfo(np.float32).eps
FLOAT32_MAX = np.finfo(np.float32).max

popcnt = np.array(
    [bin(i).count('1') for i in range(256)],
    dtype=np.float32
)


@numba.njit(fastmath=True)
def euclidean(x, y):
    r"""Standard euclidean distance.

    .. math::
        D(x, y) = \\sqrt{\sum_i (x_i - y_i)^2}
    """
    result = 0.0
    for i in range(x.shape[0]):
        result += (x[i] - y[i]) ** 2
    return np.sqrt(result)


@numba.njit(
    [
        "f4(f4[::1],f4[::1])",
        numba.types.float32(
            numba.types.Array(numba.types.float32, 1, "C", readonly=True),
            numba.types.Array(numba.types.float32, 1, "C", readonly=True),
        ),
    ],
    fastmath=True,
    locals={
        "result": numba.types.float32,
        "diff": numba.types.float32,
        "dim": numba.types.intp,
        "i": numba.types.uint16,
    },
)
def squared_euclidean(x, y):
    r"""Squared euclidean distance.

    .. math::
        D(x, y) = \sum_i (x_i - y_i)^2
    """
    result = 0.0
    dim = x.shape[0]
    for i in range(dim):
        diff = x[i] - y[i]
        result += diff * diff

    return result


@numba.njit(fastmath=True)
def standardised_euclidean(x, y, sigma=_mock_ones):
    r"""Euclidean distance standardised against a vector of standard
    deviations per coordinate.

    .. math::
        D(x, y) = \sqrt{\sum_i \frac{(x_i - y_i)**2}{v_i}}
    """
    result = 0.0
    for i in range(x.shape[0]):
        result += ((x[i] - y[i]) ** 2) / sigma[i]

    return np.sqrt(result)


@numba.njit(fastmath=True)
def manhattan(x, y):
    r"""Manhattan, taxicab, or l1 distance.

    .. math::
        D(x, y) = \sum_i |x_i - y_i|
    """
    result = 0.0
    for i in range(x.shape[0]):
        result += np.abs(x[i] - y[i])

    return result


@numba.njit(fastmath=True)
def chebyshev(x, y):
    r"""Chebyshev or l-infinity distance.

    .. math::
        D(x, y) = \max_i |x_i - y_i|
    """
    result = 0.0
    for i in range(x.shape[0]):
        result = max(result, np.abs(x[i] - y[i]))

    return result


@numba.njit(fastmath=True)
def minkowski(x, y, p=2):
    r"""Minkowski distance.

    .. math::
        D(x, y) = \left(\sum_i |x_i - y_i|^p\right)^{\frac{1}{p}}

    This is a general distance. For p=1 it is equivalent to
    manhattan distance, for p=2 it is Euclidean distance, and
    for p=infinity it is Chebyshev distance. In general it is better
    to use the more specialised functions for those distances.
    """
    result = 0.0
    for i in range(x.shape[0]):
        result += (np.abs(x[i] - y[i])) ** p

    return result ** (1.0 / p)


@numba.njit(fastmath=True)
def weighted_minkowski(x, y, w=_mock_ones, p=2):
    r"""A weighted version of Minkowski distance.

    .. math::
        D(x, y) = \left(\sum_i w_i |x_i - y_i|^p\right)^{\frac{1}{p}}

    If weights w_i are inverse standard deviations of graph_data in each dimension
    then this represented a standardised Minkowski distance (and is
    equivalent to standardised Euclidean distance for p=1).
    """
    result = 0.0
    for i in range(x.shape[0]):
        result += w[i] * np.abs(x[i] - y[i]) ** p

    return result ** (1.0 / p)


@numba.njit(fastmath=True)
def mahalanobis(x, y, vinv=_mock_identity):
    result = 0.0

    diff = np.empty(x.shape[0], dtype=np.float32)

    for i in range(x.shape[0]):
        diff[i] = x[i] - y[i]

    for i in range(x.shape[0]):
        tmp = 0.0
        for j in range(x.shape[0]):
            tmp += vinv[i, j] * diff[j]
        result += tmp * diff[i]

    return np.sqrt(result)


@numba.njit(fastmath=True)
def hamming(x, y):
    result = 0.0
    for i in range(x.shape[0]):
        if x[i] != y[i]:
            result += 1.0

    return float(result) / x.shape[0]


@numba.njit(fastmath=True)
def canberra(x, y):
    result = 0.0
    for i in range(x.shape[0]):
        denominator = np.abs(x[i]) + np.abs(y[i])
        if denominator > 0:
            result += np.abs(x[i] - y[i]) / denominator

    return result


@numba.njit(fastmath=True)
def bray_curtis(x, y):
    numerator = 0.0
    denominator = 0.0
    for i in range(x.shape[0]):
        numerator += np.abs(x[i] - y[i])
        denominator += np.abs(x[i] + y[i])

    if denominator > 0.0:
        return float(numerator) / denominator
    else:
        return 0.0


@numba.njit(fastmath=True)
def jaccard(x, y):
    num_non_zero = 0.0
    num_equal = 0.0
    for i in range(x.shape[0]):
        x_true = x[i] != 0
        y_true = y[i] != 0
        num_non_zero += x_true or y_true
        num_equal += x_true and y_true

    if num_non_zero == 0.0:
        return 0.0
    else:
        return float(num_non_zero - num_equal) / num_non_zero


@numba.njit(
    [
        "f4(f4[::1],f4[::1])",
        numba.types.float32(
            numba.types.Array(numba.types.float32, 1, "C", readonly=True),
            numba.types.Array(numba.types.float32, 1, "C", readonly=True),
        ),
    ],
    fastmath=True,
    locals={
        "result": numba.types.float32,
        "num_non_zero": numba.types.float32,
        "num_equal": numba.types.float32,
        "x_true": numba.types.uint8,
        "y_true": numba.types.uint8,
        "dim": numba.types.intp,
        "i": numba.types.uint16,
    },
)
def alternative_jaccard(x, y):
    num_non_zero = 0.0
    num_equal = 0.0
    dim = x.shape[0]
    for i in range(dim):
        x_true = x[i] != 0
        y_true = y[i] != 0
        num_non_zero += x_true or y_true
        num_equal += x_true and y_true

    if num_non_zero == 0.0:
        return 0.0
    else:
        return -np.log2(num_equal / num_non_zero)


@numba.vectorize(fastmath=True)
def correct_alternative_jaccard(v):
    return 1.0 - pow(2.0, -v)


@numba.njit(fastmath=True)
def matching(x, y):
    num_not_equal = 0.0
    for i in range(x.shape[0]):
        x_true = x[i] != 0
        y_true = y[i] != 0
        num_not_equal += x_true != y_true

    return float(num_not_equal) / x.shape[0]


@numba.njit(fastmath=True)
def dice(x, y):
    num_true_true = 0.0
    num_not_equal = 0.0
    for i in range(x.shape[0]):
        x_true = x[i] != 0
        y_true = y[i] != 0
        num_true_true += x_true and y_true
        num_not_equal += x_true != y_true

    if num_not_equal == 0.0:
        return 0.0
    else:
        return num_not_equal / (2.0 * num_true_true + num_not_equal)


@numba.njit(fastmath=True)
def kulsinski(x, y):
    num_true_true = 0.0
    num_not_equal = 0.0
    for i in range(x.shape[0]):
        x_true = x[i] != 0
        y_true = y[i] != 0
        num_true_true += x_true and y_true
        num_not_equal += x_true != y_true

    if num_not_equal == 0:
        return 0.0
    else:
        return float(num_not_equal - num_true_true + x.shape[0]) / (
            num_not_equal + x.shape[0]
        )


@numba.njit(fastmath=True)
def rogers_tanimoto(x, y):
    num_not_equal = 0.0
    for i in range(x.shape[0]):
        x_true = x[i] != 0
        y_true = y[i] != 0
        num_not_equal += x_true != y_true

    return (2.0 * num_not_equal) / (x.shape[0] + num_not_equal)


@numba.njit(fastmath=True)
def russellrao(x, y):
    num_true_true = 0.0
    for i in range(x.shape[0]):
        x_true = x[i] != 0
        y_true = y[i] != 0
        num_true_true += x_true and y_true

    if num_true_true == np.sum(x != 0) and num_true_true == np.sum(y != 0):
        return 0.0
    else:
        return float(x.shape[0] - num_true_true) / (x.shape[0])


@numba.njit(fastmath=True)
def sokal_michener(x, y):
    num_not_equal = 0.0
    for i in range(x.shape[0]):
        x_true = x[i] != 0
        y_true = y[i] != 0
        num_not_equal += x_true != y_true

    return (2.0 * num_not_equal) / (x.shape[0] + num_not_equal)


@numba.njit(fastmath=True)
def sokal_sneath(x, y):
    num_true_true = 0.0
    num_not_equal = 0.0
    for i in range(x.shape[0]):
        x_true = x[i] != 0
        y_true = y[i] != 0
        num_true_true += x_true and y_true
        num_not_equal += x_true != y_true

    if num_not_equal == 0.0:
        return 0.0
    else:
        return num_not_equal / (0.5 * num_true_true + num_not_equal)


@numba.njit(fastmath=True)
def haversine(x, y):
    if x.shape[0] != 2:
        raise ValueError("haversine is only defined for 2 dimensional graph_data")
    sin_lat = np.sin(0.5 * (x[0] - y[0]))
    sin_long = np.sin(0.5 * (x[1] - y[1]))
    result = np.sqrt(sin_lat**2 + np.cos(x[0]) * np.cos(y[0]) * sin_long**2)
    return 2.0 * np.arcsin(result)


@numba.njit(fastmath=True)
def yule(x, y):
    num_true_true = 0.0
    num_true_false = 0.0
    num_false_true = 0.0
    for i in range(x.shape[0]):
        x_true = x[i] != 0
        y_true = y[i] != 0
        num_true_true += x_true and y_true
        num_true_false += x_true and (not y_true)
        num_false_true += (not x_true) and y_true

    num_false_false = x.shape[0] - num_true_true - num_true_false - num_false_true

    if num_true_false == 0.0 or num_false_true == 0.0:
        return 0.0
    else:
        return (2.0 * num_true_false * num_false_true) / (
            num_true_true * num_false_false + num_true_false * num_false_true
        )


@numba.njit(fastmath=True)
def cosine(x, y):
    result = 0.0
    norm_x = 0.0
    norm_y = 0.0
    for i in range(x.shape[0]):
        result += x[i] * y[i]
        norm_x += x[i] ** 2
        norm_y += y[i] ** 2

    if norm_x == 0.0 and norm_y == 0.0:
        return 0.0
    elif norm_x == 0.0 or norm_y == 0.0:
        return 1.0
    else:
        return 1.0 - (result / np.sqrt(norm_x * norm_y))


@numba.njit(
    [
        "f4(f4[::1],f4[::1])",
        numba.types.float32(
            numba.types.Array(numba.types.float32, 1, "C", readonly=True),
            numba.types.Array(numba.types.float32, 1, "C", readonly=True),
        ),
    ],
    fastmath=True,
    locals={
        "result": numba.types.float32,
        "norm_x": numba.types.float32,
        "norm_y": numba.types.float32,
        "dim": numba.types.intp,
        "i": numba.types.uint16,
    },
)
def alternative_cosine(x, y):
    result = 0.0
    norm_x = 0.0
    norm_y = 0.0
    dim = x.shape[0]
    for i in range(dim):
        result += x[i] * y[i]
        norm_x += x[i] * x[i]
        norm_y += y[i] * y[i]

    if norm_x == 0.0 and norm_y == 0.0:
        return 0.0
    elif norm_x == 0.0 or norm_y == 0.0:
        return FLOAT32_MAX
    elif result <= 0.0:
        return FLOAT32_MAX
    else:
        result = np.sqrt(norm_x * norm_y) / result
        return np.log2(result)


@numba.njit(
    "f4(f4[::1],f4[::1])",
    fastmath=True,
    locals={
        "result": numba.types.float32,
        "dim": numba.types.intp,
        "i": numba.types.uint16,
    },
)
def dot(x, y):
    result = 0.0
    dim = x.shape[0]
    for i in range(dim):
        result += x[i] * y[i]

    if result <= 0.0:
        return 1.0
    else:
        return 1.0 - result


@numba.njit(
    [
        "f4(f4[::1],f4[::1])",
        numba.types.float32(
            numba.types.Array(numba.types.float32, 1, "C", readonly=True),
            numba.types.Array(numba.types.float32, 1, "C", readonly=True),
        ),
    ],
    fastmath=True,
    locals={
        "result": numba.types.float32,
        "dim": numba.types.intp,
        "i": numba.types.uint16,
    },
)
def alternative_dot(x, y):
    result = 0.0
    dim = x.shape[0]
    for i in range(dim):
        result += x[i] * y[i]

    if result <= 0.0:
        return FLOAT32_MAX
    else:
        return -np.log2(result)


@numba.vectorize(fastmath=True)
def correct_alternative_cosine(d):
    return 1.0 - pow(2.0, -d)


@numba.njit(fastmath=True)
def tsss(x, y):
    d_euc_squared = 0.0
    d_cos = 0.0
    norm_x = 0.0
    norm_y = 0.0
    dim = x.shape[0]

    for i in range(dim):
        diff = x[i] - y[i]
        d_euc_squared += diff * diff
        d_cos += x[i] * y[i]
        norm_x += x[i] * x[i]
        norm_y += y[i] * y[i]

    norm_x = np.sqrt(norm_x)
    norm_y = np.sqrt(norm_y)
    magnitude_difference = np.abs(norm_x - norm_y)
    d_cos /= norm_x * norm_y
    theta = np.arccos(d_cos) + np.radians(10)  # Add 10 degrees as an "epsilon" to
    # avoid problems
    sector = ((np.sqrt(d_euc_squared) + magnitude_difference) ** 2) * theta
    triangle = norm_x * norm_y * np.sin(theta) / 2.0
    return triangle * sector


@numba.njit(fastmath=True)
def true_angular(x, y):
    result = 0.0
    norm_x = 0.0
    norm_y = 0.0
    dim = x.shape[0]
    for i in range(dim):
        result += x[i] * y[i]
        norm_x += x[i] * x[i]
        norm_y += y[i] * y[i]

    if norm_x == 0.0 and norm_y == 0.0:
        return 0.0
    elif norm_x == 0.0 or norm_y == 0.0:
        return FLOAT32_MAX
    elif result <= 0.0:
        return FLOAT32_MAX
    else:
        result = result / np.sqrt(norm_x * norm_y)
        return 1.0 - (np.arccos(result) / np.pi)


@numba.vectorize(fastmath=True)
def true_angular_from_alt_cosine(d):
    return 1.0 - (np.arccos(pow(2.0, -d)) / np.pi)


@numba.njit(fastmath=True)
def correlation(x, y):
    mu_x = 0.0
    mu_y = 0.0
    norm_x = 0.0
    norm_y = 0.0
    dot_product = 0.0

    for i in range(x.shape[0]):
        mu_x += x[i]
        mu_y += y[i]

    mu_x /= x.shape[0]
    mu_y /= x.shape[0]

    for i in range(x.shape[0]):
        shifted_x = x[i] - mu_x
        shifted_y = y[i] - mu_y
        norm_x += shifted_x**2
        norm_y += shifted_y**2
        dot_product += shifted_x * shifted_y

    if norm_x == 0.0 and norm_y == 0.0:
        return 0.0
    elif dot_product == 0.0:
        return 1.0
    else:
        return 1.0 - (dot_product / np.sqrt(norm_x * norm_y))


@numba.njit(
    [
        "f4(f4[::1],f4[::1])",
        numba.types.float32(
            numba.types.Array(numba.types.float32, 1, "C", readonly=True),
            numba.types.Array(numba.types.float32, 1, "C", readonly=True),
        ),
    ],
    fastmath=True,
    locals={
        "result": numba.types.float32,
        "l1_norm_x": numba.types.float32,
        "l1_norm_y": numba.types.float32,
        "dim": numba.types.intp,
        "i": numba.types.uint16,
    },
)
def hellinger(x, y):
    result = 0.0
    l1_norm_x = 0.0
    l1_norm_y = 0.0
    dim = x.shape[0]

    for i in range(dim):
        result += np.sqrt(x[i] * y[i])
        l1_norm_x += x[i]
        l1_norm_y += y[i]

    if l1_norm_x == 0 and l1_norm_y == 0:
        return 0.0
    elif l1_norm_x == 0 or l1_norm_y == 0:
        return 1.0
    else:
        return np.sqrt(1 - result / np.sqrt(l1_norm_x * l1_norm_y))


@numba.njit(
    [
        "f4(f4[::1],f4[::1])",
        numba.types.float32(
            numba.types.Array(numba.types.float32, 1, "C", readonly=True),
            numba.types.Array(numba.types.float32, 1, "C", readonly=True),
        ),
    ],
    fastmath=True,
    locals={
        "result": numba.types.float32,
        "l1_norm_x": numba.types.float32,
        "l1_norm_y": numba.types.float32,
        "dim": numba.types.intp,
        "i": numba.types.uint16,
    },
)
def alternative_hellinger(x, y):
    result = 0.0
    l1_norm_x = 0.0
    l1_norm_y = 0.0
    dim = x.shape[0]

    for i in range(dim):
        result += np.sqrt(x[i] * y[i])
        l1_norm_x += x[i]
        l1_norm_y += y[i]

    if l1_norm_x == 0 and l1_norm_y == 0:
        return 0.0
    elif l1_norm_x == 0 or l1_norm_y == 0:
        return FLOAT32_MAX
    elif result <= 0:
        return FLOAT32_MAX
    else:
        result = np.sqrt(l1_norm_x * l1_norm_y) / result
        return np.log2(result)


@numba.vectorize(fastmath=True)
def correct_alternative_hellinger(d):
    return np.sqrt(1.0 - pow(2.0, -d))


@numba.njit()
def rankdata(a, method="average"):
    arr = np.ravel(np.asarray(a))
    if method == "ordinal":
        sorter = arr.argsort(kind="mergesort")
    else:
        sorter = arr.argsort(kind="quicksort")

    inv = np.empty(sorter.size, dtype=np.intp)
    inv[sorter] = np.arange(sorter.size)

    if method == "ordinal":
        return (inv + 1).astype(np.float64)

    arr = arr[sorter]
    obs = np.ones(arr.size, np.bool_)
    obs[1:] = arr[1:] != arr[:-1]
    dense = obs.cumsum()[inv]

    if method == "dense":
        return dense.astype(np.float64)

    # cumulative counts of each unique value
    nonzero = np.nonzero(obs)[0]
    count = np.concatenate((nonzero, np.array([len(obs)], nonzero.dtype)))

    if method == "max":
        return count[dense].astype(np.float64)

    if method == "min":
        return (count[dense - 1] + 1).astype(np.float64)

    # average method
    return 0.5 * (count[dense] + count[dense - 1] + 1)


@numba.njit(fastmath=True)
def spearmanr(x, y):
    x_rank = rankdata(x)
    y_rank = rankdata(y)

    return correlation(x_rank, y_rank)


@numba.njit(nogil=True)
def kantorovich(x, y, cost=_dummy_cost, max_iter=100000):

    row_mask = x != 0
    col_mask = y != 0

    a = x[row_mask].astype(np.float64)
    b = y[col_mask].astype(np.float64)

    a_sum = a.sum()
    b_sum = b.sum()

    # if not isclose(a_sum, b_sum):
    #     raise ValueError(
    #         "Kantorovich distance inputs must be valid probability distributions."
    #     )

    a /= a_sum
    b /= b_sum

    sub_cost = cost[row_mask, :][:, col_mask]

    node_arc_data, spanning_tree, graph = allocate_graph_structures(
        a.shape[0], b.shape[0], False
    )
    initialize_supply(a, -b, graph, node_arc_data.supply)
    initialize_cost(sub_cost, graph, node_arc_data.cost)
    # initialize_cost(cost, graph, node_arc_data.cost)
    init_status = initialize_graph_structures(graph, node_arc_data, spanning_tree)
    if init_status == False:
        raise ValueError(
            "Kantorovich distance inputs must be valid probability distributions."
        )
    solve_status = network_simplex_core(node_arc_data, spanning_tree, graph, max_iter)
    # if solve_status == ProblemStatus.MAX_ITER_REACHED:
    #     print("WARNING: RESULT MIGHT BE INACCURATE\nMax number of iteration reached!")
    if solve_status == ProblemStatus.INFEASIBLE:
        raise ValueError(
            "Optimal transport problem was INFEASIBLE. Please check inputs."
        )
    elif solve_status == ProblemStatus.UNBOUNDED:
        raise ValueError(
            "Optimal transport problem was UNBOUNDED. Please check inputs."
        )
    result = total_cost(node_arc_data.flow, node_arc_data.cost)

    return result


@numba.njit(fastmath=True)
def sinkhorn(x, y, cost=_dummy_cost, regularization=1.0):
    row_mask = x != 0
    col_mask = y != 0

    a = x[row_mask].astype(np.float64)
    b = y[col_mask].astype(np.float64)

    a_sum = a.sum()
    b_sum = b.sum()

    a /= a_sum
    b /= b_sum

    sub_cost = cost[row_mask, :][:, col_mask]

    transport_plan = sinkhorn_transport_plan(
        x, y, cost=sub_cost, regularization=regularization
    )
    dim_i = transport_plan.shape[0]
    dim_j = transport_plan.shape[1]
    result = 0.0
    for i in range(dim_i):
        for j in range(dim_j):
            result += transport_plan[i, j] * cost[i, j]

    return result


@numba.njit()
def jensen_shannon_divergence(x, y):
    result = 0.0
    l1_norm_x = 0.0
    l1_norm_y = 0.0
    dim = x.shape[0]

    for i in range(dim):
        l1_norm_x += x[i]
        l1_norm_y += y[i]

    l1_norm_x += FLOAT32_EPS * dim
    l1_norm_y += FLOAT32_EPS * dim

    pdf_x = (x + FLOAT32_EPS) / l1_norm_x
    pdf_y = (y + FLOAT32_EPS) / l1_norm_y
    m = 0.5 * (pdf_x + pdf_y)

    for i in range(dim):
        result += 0.5 * (
            pdf_x[i] * np.log(pdf_x[i] / m[i]) + pdf_y[i] * np.log(pdf_y[i] / m[i])
        )

    return result


@numba.njit()
def wasserstein_1d(x, y, p=1):
    x_sum = 0.0
    y_sum = 0.0
    for i in range(x.shape[0]):
        x_sum += x[i]
        y_sum += y[i]

    x_cdf = x / x_sum
    y_cdf = y / y_sum

    for i in range(1, x_cdf.shape[0]):
        x_cdf[i] += x_cdf[i - 1]
        y_cdf[i] += y_cdf[i - 1]

    return minkowski(x_cdf, y_cdf, p)


@numba.njit()
def circular_kantorovich(x, y, p=1):
    x_sum = 0.0
    y_sum = 0.0
    for i in range(x.shape[0]):
        x_sum += x[i]
        y_sum += y[i]

    x_cdf = x / x_sum
    y_cdf = y / y_sum

    for i in range(1, x_cdf.shape[0]):
        x_cdf[i] += x_cdf[i - 1]
        y_cdf[i] += y_cdf[i - 1]

    mu = np.median((x_cdf - y_cdf) ** p)

    # Now we just want minkowski distance on the CDFs shifted by mu
    result = 0.0
    if p > 2:
        for i in range(x_cdf.shape[0]):
            result += np.abs(x_cdf[i] - y_cdf[i] - mu) ** p

        return result ** (1.0 / p)

    elif p == 2:
        for i in range(x_cdf.shape[0]):
            val = x_cdf[i] - y_cdf[i] - mu
            result += val * val

        return np.sqrt(result)

    elif p == 1:
        for i in range(x_cdf.shape[0]):
            result += np.abs(x_cdf[i] - y_cdf[i] - mu)

        return result

    else:
        raise ValueError("Invalid p supplied to Kantorvich distance")


@numba.njit()
def symmetric_kl_divergence(x, y):
    result = 0.0
    l1_norm_x = 0.0
    l1_norm_y = 0.0
    dim = x.shape[0]

    for i in range(dim):
        l1_norm_x += x[i]
        l1_norm_y += y[i]

    l1_norm_x += FLOAT32_EPS * dim
    l1_norm_y += FLOAT32_EPS * dim

    pdf_x = (x + FLOAT32_EPS) / l1_norm_x
    pdf_y = (y + FLOAT32_EPS) / l1_norm_y

    for i in range(dim):
        result += pdf_x[i] * np.log(pdf_x[i] / pdf_y[i]) + pdf_y[i] * np.log(
            pdf_y[i] / pdf_x[i]
        )

    return result


@numba.njit(
    [
        "f4(u1[::1],u1[::1])",
        numba.types.float32(
            numba.types.Array(numba.types.uint8, 1, "C", readonly=True),
            numba.types.Array(numba.types.uint8, 1, "C", readonly=True),
        ),
    ],
    fastmath=True,
    locals={
        "result": numba.types.float32,
        "intersection": numba.types.uint8,
        "dim": numba.types.intp,
        "i": numba.types.uint16,
    },
)
def bit_hamming(x, y):
    result = 0.0
    dim = x.shape[0]

    for i in range(dim):
        intersection = x[i] ^ y[i]
        result += popcnt[intersection]

    return result


@numba.njit(
    [
        "f4(u1[::1],u1[::1])",
        numba.types.float32(
            numba.types.Array(numba.types.uint8, 1, "C", readonly=True),
            numba.types.Array(numba.types.uint8, 1, "C", readonly=True),
        ),
    ],
    fastmath=True,
    locals={
        "result": numba.types.float32,
        "denom": numba.types.float32,
        "and_": numba.types.uint8,
        "or_": numba.types.uint8,
        "dim": numba.types.intp,
        "i": numba.types.uint16,
    },
)
def bit_jaccard(x, y):
    result = 0.0
    denom = 0.0
    dim = x.shape[0]

    for i in range(dim):
        and_ = x[i] & y[i]
        or_ = x[i] | y[i]
        result += popcnt[and_]
        denom += popcnt[or_]

    return -np.log(result / denom)


named_distances = {
    # general minkowski distances
    "euclidean": euclidean,
    "l2": euclidean,
    "sqeuclidean": squared_euclidean,
    "manhattan": manhattan,
    "taxicab": manhattan,
    "l1": manhattan,
    "chebyshev": chebyshev,
    "linfinity": chebyshev,
    "linfty": chebyshev,
    "linf": chebyshev,
    "minkowski": minkowski,
    # Standardised/weighted distances
    "seuclidean": standardised_euclidean,
    "standardised_euclidean": standardised_euclidean,
    "wminkowski": weighted_minkowski,
    "weighted_minkowski": weighted_minkowski,
    "mahalanobis": mahalanobis,
    # Other distances
    "canberra": canberra,
    "cosine": cosine,
    "dot": dot,
    "correlation": correlation,
    "haversine": haversine,
    "braycurtis": bray_curtis,
    "spearmanr": spearmanr,
    "tsss": tsss,
    "true_angular": true_angular,
    # Distribution distances
    "hellinger": hellinger,
    "kantorovich": kantorovich,
    "wasserstein": kantorovich,
    "wasserstein_1d": wasserstein_1d,
    "wasserstein-1d": wasserstein_1d,
    "kantorovich-1d": wasserstein_1d,
    "kantorovich_1d": wasserstein_1d,
    "circular_kantorovich": circular_kantorovich,
    "circular_wasserstein": circular_kantorovich,
    "sinkhorn": sinkhorn,
    "jensen-shannon": jensen_shannon_divergence,
    "jensen_shannon": jensen_shannon_divergence,
    "symmetric-kl": symmetric_kl_divergence,
    "symmetric_kl": symmetric_kl_divergence,
    "symmetric_kullback_liebler": symmetric_kl_divergence,
    # Binary distances
    "hamming": hamming,
    "jaccard": jaccard,
    "dice": dice,
    "matching": matching,
    "kulsinski": kulsinski,
    "rogerstanimoto": rogers_tanimoto,
    "russellrao": russellrao,
    "sokalsneath": sokal_sneath,
    "sokalmichener": sokal_michener,
    "yule": yule,
    "bit_hamming": bit_hamming,
    "bit_jaccard": bit_jaccard,
}

# Some distances have a faster to compute alternative that
# retains the same ordering of distances. We can compute with
# this instead, and then correct the final distances when complete.
# This provides a list of distances that have such an alternative
# along with the alternative distance function and the correction
# function to be applied.
fast_distance_alternatives = {
    "euclidean": {"dist": squared_euclidean, "correction": np.sqrt},
    "l2": {"dist": squared_euclidean, "correction": np.sqrt},
    "cosine": {"dist": alternative_cosine, "correction": correct_alternative_cosine},
    "dot": {"dist": alternative_dot, "correction": correct_alternative_cosine},
    "true_angular": {
        "dist": alternative_cosine,
        "correction": true_angular_from_alt_cosine,
    },
    "hellinger": {
        "dist": alternative_hellinger,
        "correction": correct_alternative_hellinger,
    },
    "jaccard": {"dist": alternative_jaccard, "correction": correct_alternative_jaccard},
}