# Author: Leland McInnes # # License: BSD 3 clause import numba import numpy as np import scipy.stats from sklearn.metrics import pairwise_distances _mock_identity = np.eye(2, dtype=np.float64) _mock_cost = 1.0 - _mock_identity _mock_ones = np.ones(2, dtype=np.float64) @numba.njit() def sign(a): if a < 0: return -1 else: return 1 @numba.njit(fastmath=True) def euclidean(x, y): r"""Standard euclidean distance. ..math:: D(x, y) = \sqrt{\sum_i (x_i - y_i)^2} """ result = 0.0 for i in range(x.shape[0]): result += (x[i] - y[i]) ** 2 return np.sqrt(result) @numba.njit(fastmath=True) def euclidean_grad(x, y): r"""Standard euclidean distance and its gradient. ..math:: D(x, y) = \sqrt{\sum_i (x_i - y_i)^2} \frac{dD(x, y)}{dx} = (x_i - y_i)/D(x,y) """ result = 0.0 for i in range(x.shape[0]): result += (x[i] - y[i]) ** 2 d = np.sqrt(result) grad = (x - y) / (1e-6 + d) return d, grad @numba.njit() def standardised_euclidean(x, y, sigma=_mock_ones): r"""Euclidean distance standardised against a vector of standard deviations per coordinate. ..math:: D(x, y) = \sqrt{\sum_i \frac{(x_i - y_i)**2}{v_i}} """ result = 0.0 for i in range(x.shape[0]): result += ((x[i] - y[i]) ** 2) / sigma[i] return np.sqrt(result) @numba.njit(fastmath=True) def standardised_euclidean_grad(x, y, sigma=_mock_ones): r"""Euclidean distance standardised against a vector of standard deviations per coordinate with gradient. ..math:: D(x, y) = \sqrt{\sum_i \frac{(x_i - y_i)**2}{v_i}} """ result = 0.0 for i in range(x.shape[0]): result += (x[i] - y[i]) ** 2 / sigma[i] d = np.sqrt(result) grad = (x - y) / (1e-6 + d * sigma) return d, grad @numba.njit() def manhattan(x, y): r"""Manhattan, taxicab, or l1 distance. ..math:: D(x, y) = \sum_i |x_i - y_i| """ result = 0.0 for i in range(x.shape[0]): result += np.abs(x[i] - y[i]) return result @numba.njit() def manhattan_grad(x, y): r"""Manhattan, taxicab, or l1 distance with gradient. ..math:: D(x, y) = \sum_i |x_i - y_i| """ result = 0.0 grad = np.zeros(x.shape) for i in range(x.shape[0]): result += np.abs(x[i] - y[i]) grad[i] = np.sign(x[i] - y[i]) return result, grad @numba.njit() def chebyshev(x, y): r"""Chebyshev or l-infinity distance. ..math:: D(x, y) = \max_i |x_i - y_i| """ result = 0.0 for i in range(x.shape[0]): result = max(result, np.abs(x[i] - y[i])) return result @numba.njit() def chebyshev_grad(x, y): r"""Chebyshev or l-infinity distance with gradient. ..math:: D(x, y) = \max_i |x_i - y_i| """ result = 0.0 max_i = 0 for i in range(x.shape[0]): v = np.abs(x[i] - y[i]) if v > result: result = v max_i = i grad = np.zeros(x.shape) grad[max_i] = np.sign(x[max_i] - y[max_i]) return result, grad @numba.njit() def minkowski(x, y, p=2): r"""Minkowski distance. ..math:: D(x, y) = \left(\sum_i |x_i - y_i|^p\right)^{\frac{1}{p}} This is a general distance. For p=1 it is equivalent to manhattan distance, for p=2 it is Euclidean distance, and for p=infinity it is Chebyshev distance. In general it is better to use the more specialised functions for those distances. """ result = 0.0 for i in range(x.shape[0]): result += (np.abs(x[i] - y[i])) ** p return result ** (1.0 / p) @numba.njit() def minkowski_grad(x, y, p=2): r"""Minkowski distance with gradient. ..math:: D(x, y) = \left(\sum_i |x_i - y_i|^p\right)^{\frac{1}{p}} This is a general distance. For p=1 it is equivalent to manhattan distance, for p=2 it is Euclidean distance, and for p=infinity it is Chebyshev distance. In general it is better to use the more specialised functions for those distances. """ result = 0.0 for i in range(x.shape[0]): result += (np.abs(x[i] - y[i])) ** p grad = np.empty(x.shape[0], dtype=np.float32) for i in range(x.shape[0]): grad[i] = ( pow(np.abs(x[i] - y[i]), (p - 1.0)) * sign(x[i] - y[i]) * pow(result, (1.0 / (p - 1))) ) return result ** (1.0 / p), grad @numba.njit() def poincare(u, v): r"""Poincare distance. ..math:: \delta (u, v) = 2 \frac{ \lVert u - v \rVert ^2 }{ ( 1 - \lVert u \rVert ^2 ) ( 1 - \lVert v \rVert ^2 ) } D(x, y) = \operatorname{arcosh} (1+\delta (u,v)) """ sq_u_norm = np.sum(u * u) sq_v_norm = np.sum(v * v) sq_dist = np.sum(np.power(u - v, 2)) return np.arccosh(1 + 2 * (sq_dist / ((1 - sq_u_norm) * (1 - sq_v_norm)))) @numba.njit() def hyperboloid_grad(x, y): s = np.sqrt(1 + np.sum(x ** 2)) t = np.sqrt(1 + np.sum(y ** 2)) B = s * t for i in range(x.shape[0]): B -= x[i] * y[i] if B <= 1: B = 1.0 + 1e-8 grad_coeff = 1.0 / (np.sqrt(B - 1) * np.sqrt(B + 1)) # return np.arccosh(B), np.zeros(x.shape[0]) grad = np.zeros(x.shape[0]) for i in range(x.shape[0]): grad[i] = grad_coeff * (((x[i] * t) / s) - y[i]) return np.arccosh(B), grad @numba.njit() def weighted_minkowski(x, y, w=_mock_ones, p=2): r"""A weighted version of Minkowski distance. ..math:: D(x, y) = \left(\sum_i w_i |x_i - y_i|^p\right)^{\frac{1}{p}} If weights w_i are inverse standard deviations of data in each dimension then this represented a standardised Minkowski distance (and is equivalent to standardised Euclidean distance for p=1). """ result = 0.0 for i in range(x.shape[0]): result += w[i] * np.abs(x[i] - y[i]) ** p return result ** (1.0 / p) @numba.njit() def weighted_minkowski_grad(x, y, w=_mock_ones, p=2): r"""A weighted version of Minkowski distance with gradient. ..math:: D(x, y) = \left(\sum_i w_i |x_i - y_i|^p\right)^{\frac{1}{p}} If weights w_i are inverse standard deviations of data in each dimension then this represented a standardised Minkowski distance (and is equivalent to standardised Euclidean distance for p=1). """ result = 0.0 for i in range(x.shape[0]): result += w[i] * (np.abs(x[i] - y[i])) ** p grad = np.empty(x.shape[0], dtype=np.float32) for i in range(x.shape[0]): grad[i] = ( w[i] * pow(np.abs(x[i] - y[i]), (p - 1.0)) * sign(x[i] - y[i]) * pow(result, (1.0 / (p - 1))) ) return result ** (1.0 / p), grad @numba.njit() def mahalanobis(x, y, vinv=_mock_identity): result = 0.0 diff = np.empty(x.shape[0], dtype=np.float32) for i in range(x.shape[0]): diff[i] = x[i] - y[i] for i in range(x.shape[0]): tmp = 0.0 for j in range(x.shape[0]): tmp += vinv[i, j] * diff[j] result += tmp * diff[i] return np.sqrt(result) @numba.njit() def mahalanobis_grad(x, y, vinv=_mock_identity): result = 0.0 diff = np.empty(x.shape[0], dtype=np.float32) for i in range(x.shape[0]): diff[i] = x[i] - y[i] grad_tmp = np.zeros(x.shape) for i in range(x.shape[0]): tmp = 0.0 for j in range(x.shape[0]): tmp += vinv[i, j] * diff[j] grad_tmp[i] += vinv[i, j] * diff[j] result += tmp * diff[i] dist = np.sqrt(result) grad = grad_tmp / (1e-6 + dist) return dist, grad @numba.njit() def hamming(x, y): result = 0.0 for i in range(x.shape[0]): if x[i] != y[i]: result += 1.0 return float(result) / x.shape[0] @numba.njit() def canberra(x, y): result = 0.0 for i in range(x.shape[0]): denominator = np.abs(x[i]) + np.abs(y[i]) if denominator > 0: result += np.abs(x[i] - y[i]) / denominator return result @numba.njit() def canberra_grad(x, y): result = 0.0 grad = np.zeros(x.shape) for i in range(x.shape[0]): denominator = np.abs(x[i]) + np.abs(y[i]) if denominator > 0: result += np.abs(x[i] - y[i]) / denominator grad[i] = ( np.sign(x[i] - y[i]) / denominator - np.abs(x[i] - y[i]) * np.sign(x[i]) / denominator ** 2 ) return result, grad @numba.njit() def bray_curtis(x, y): numerator = 0.0 denominator = 0.0 for i in range(x.shape[0]): numerator += np.abs(x[i] - y[i]) denominator += np.abs(x[i] + y[i]) if denominator > 0.0: return float(numerator) / denominator else: return 0.0 @numba.njit() def bray_curtis_grad(x, y): numerator = 0.0 denominator = 0.0 for i in range(x.shape[0]): numerator += np.abs(x[i] - y[i]) denominator += np.abs(x[i] + y[i]) if denominator > 0.0: dist = float(numerator) / denominator grad = (np.sign(x - y) - dist) / denominator else: dist = 0.0 grad = np.zeros(x.shape) return dist, grad @numba.njit() def jaccard(x, y): num_non_zero = 0.0 num_equal = 0.0 for i in range(x.shape[0]): x_true = x[i] != 0 y_true = y[i] != 0 num_non_zero += x_true or y_true num_equal += x_true and y_true if num_non_zero == 0.0: return 0.0 else: return float(num_non_zero - num_equal) / num_non_zero @numba.njit() def matching(x, y): num_not_equal = 0.0 for i in range(x.shape[0]): x_true = x[i] != 0 y_true = y[i] != 0 num_not_equal += x_true != y_true return float(num_not_equal) / x.shape[0] @numba.njit() def dice(x, y): num_true_true = 0.0 num_not_equal = 0.0 for i in range(x.shape[0]): x_true = x[i] != 0 y_true = y[i] != 0 num_true_true += x_true and y_true num_not_equal += x_true != y_true if num_not_equal == 0.0: return 0.0 else: return num_not_equal / (2.0 * num_true_true + num_not_equal) @numba.njit() def kulsinski(x, y): num_true_true = 0.0 num_not_equal = 0.0 for i in range(x.shape[0]): x_true = x[i] != 0 y_true = y[i] != 0 num_true_true += x_true and y_true num_not_equal += x_true != y_true if num_not_equal == 0: return 0.0 else: return float(num_not_equal - num_true_true + x.shape[0]) / ( num_not_equal + x.shape[0] ) @numba.njit() def rogers_tanimoto(x, y): num_not_equal = 0.0 for i in range(x.shape[0]): x_true = x[i] != 0 y_true = y[i] != 0 num_not_equal += x_true != y_true return (2.0 * num_not_equal) / (x.shape[0] + num_not_equal) @numba.njit() def russellrao(x, y): num_true_true = 0.0 for i in range(x.shape[0]): x_true = x[i] != 0 y_true = y[i] != 0 num_true_true += x_true and y_true if num_true_true == np.sum(x != 0) and num_true_true == np.sum(y != 0): return 0.0 else: return float(x.shape[0] - num_true_true) / (x.shape[0]) @numba.njit() def sokal_michener(x, y): num_not_equal = 0.0 for i in range(x.shape[0]): x_true = x[i] != 0 y_true = y[i] != 0 num_not_equal += x_true != y_true return (2.0 * num_not_equal) / (x.shape[0] + num_not_equal) @numba.njit() def sokal_sneath(x, y): num_true_true = 0.0 num_not_equal = 0.0 for i in range(x.shape[0]): x_true = x[i] != 0 y_true = y[i] != 0 num_true_true += x_true and y_true num_not_equal += x_true != y_true if num_not_equal == 0.0: return 0.0 else: return num_not_equal / (0.5 * num_true_true + num_not_equal) @numba.njit() def haversine(x, y): if x.shape[0] != 2: raise ValueError("haversine is only defined for 2 dimensional data") sin_lat = np.sin(0.5 * (x[0] - y[0])) sin_long = np.sin(0.5 * (x[1] - y[1])) result = np.sqrt(sin_lat ** 2 + np.cos(x[0]) * np.cos(y[0]) * sin_long ** 2) return 2.0 * np.arcsin(result) @numba.njit() def haversine_grad(x, y): # spectral initialization puts many points near the poles # currently, adding pi/2 to the latitude avoids problems # TODO: reimplement with quaternions to avoid singularity if x.shape[0] != 2: raise ValueError("haversine is only defined for 2 dimensional data") sin_lat = np.sin(0.5 * (x[0] - y[0])) cos_lat = np.cos(0.5 * (x[0] - y[0])) sin_long = np.sin(0.5 * (x[1] - y[1])) cos_long = np.cos(0.5 * (x[1] - y[1])) a_0 = np.cos(x[0] + np.pi / 2) * np.cos(y[0] + np.pi / 2) * sin_long ** 2 a_1 = a_0 + sin_lat ** 2 d = 2.0 * np.arcsin(np.sqrt(min(max(abs(a_1), 0), 1))) denom = np.sqrt(abs(a_1 - 1)) * np.sqrt(abs(a_1)) grad = ( np.array( [ ( sin_lat * cos_lat - np.sin(x[0] + np.pi / 2) * np.cos(y[0] + np.pi / 2) * sin_long ** 2 ), ( np.cos(x[0] + np.pi / 2) * np.cos(y[0] + np.pi / 2) * sin_long * cos_long ), ] ) / (denom + 1e-6) ) return d, grad @numba.njit() def yule(x, y): num_true_true = 0.0 num_true_false = 0.0 num_false_true = 0.0 for i in range(x.shape[0]): x_true = x[i] != 0 y_true = y[i] != 0 num_true_true += x_true and y_true num_true_false += x_true and (not y_true) num_false_true += (not x_true) and y_true num_false_false = x.shape[0] - num_true_true - num_true_false - num_false_true if num_true_false == 0.0 or num_false_true == 0.0: return 0.0 else: return (2.0 * num_true_false * num_false_true) / ( num_true_true * num_false_false + num_true_false * num_false_true ) @numba.njit() def cosine(x, y): result = 0.0 norm_x = 0.0 norm_y = 0.0 for i in range(x.shape[0]): result += x[i] * y[i] norm_x += x[i] ** 2 norm_y += y[i] ** 2 if norm_x == 0.0 and norm_y == 0.0: return 0.0 elif norm_x == 0.0 or norm_y == 0.0: return 1.0 else: return 1.0 - (result / np.sqrt(norm_x * norm_y)) @numba.njit(fastmath=True) def cosine_grad(x, y): result = 0.0 norm_x = 0.0 norm_y = 0.0 for i in range(x.shape[0]): result += x[i] * y[i] norm_x += x[i] ** 2 norm_y += y[i] ** 2 if norm_x == 0.0 and norm_y == 0.0: dist = 0.0 grad = np.zeros(x.shape) elif norm_x == 0.0 or norm_y == 0.0: dist = 1.0 grad = np.zeros(x.shape) else: grad = -(x * result - y * norm_x) / np.sqrt(norm_x ** 3 * norm_y) dist = 1.0 - (result / np.sqrt(norm_x * norm_y)) return dist, grad @numba.njit() def correlation(x, y): mu_x = 0.0 mu_y = 0.0 norm_x = 0.0 norm_y = 0.0 dot_product = 0.0 for i in range(x.shape[0]): mu_x += x[i] mu_y += y[i] mu_x /= x.shape[0] mu_y /= x.shape[0] for i in range(x.shape[0]): shifted_x = x[i] - mu_x shifted_y = y[i] - mu_y norm_x += shifted_x ** 2 norm_y += shifted_y ** 2 dot_product += shifted_x * shifted_y if norm_x == 0.0 and norm_y == 0.0: return 0.0 elif dot_product == 0.0: return 1.0 else: return 1.0 - (dot_product / np.sqrt(norm_x * norm_y)) @numba.njit() def hellinger(x, y): result = 0.0 l1_norm_x = 0.0 l1_norm_y = 0.0 for i in range(x.shape[0]): result += np.sqrt(x[i] * y[i]) l1_norm_x += x[i] l1_norm_y += y[i] if l1_norm_x == 0 and l1_norm_y == 0: return 0.0 elif l1_norm_x == 0 or l1_norm_y == 0: return 1.0 else: return np.sqrt(1 - result / np.sqrt(l1_norm_x * l1_norm_y)) @numba.njit() def hellinger_grad(x, y): result = 0.0 l1_norm_x = 0.0 l1_norm_y = 0.0 grad_term = np.empty(x.shape[0]) for i in range(x.shape[0]): grad_term[i] = np.sqrt(x[i] * y[i]) result += grad_term[i] l1_norm_x += x[i] l1_norm_y += y[i] if l1_norm_x == 0 and l1_norm_y == 0: dist = 0.0 grad = np.zeros(x.shape) elif l1_norm_x == 0 or l1_norm_y == 0: dist = 1.0 grad = np.zeros(x.shape) else: dist_denom = np.sqrt(l1_norm_x * l1_norm_y) dist = np.sqrt(1 - result / dist_denom) grad_denom = 2 * dist grad_numer_const = (l1_norm_y * result) / (2 * dist_denom ** 3) grad = (grad_numer_const - (y / grad_term * dist_denom)) / grad_denom return dist, grad @numba.njit() def approx_log_Gamma(x): if x == 1: return 0 # x2= 1/(x*x); return x * np.log(x) - x + 0.5 * np.log(2.0 * np.pi / x) + 1.0 / (x * 12.0) # + x2*(-1.0/360.0) + x2* (1.0/1260.0 + x2*(-1.0/(1680.0) +\ # x2*(1.0/1188.0 + x2*(-691.0/360360.0 + x2*(1.0/156.0 +\ # x2*(-3617.0/122400.0 + x2*(43687.0/244188.0 + x2*(-174611.0/125400.0) +\ # x2*(77683.0/5796.0 + x2*(-236364091.0/1506960.0 + x2*(657931.0/300.0)))))))))))) @numba.njit() def log_beta(x, y): a = min(x, y) b = max(x, y) if b < 5: value = -np.log(b) for i in range(1, int(a)): value += np.log(i) - np.log(b + i) return value else: return approx_log_Gamma(x) + approx_log_Gamma(y) - approx_log_Gamma(x + y) @numba.njit() def log_single_beta(x): return np.log(2.0) * (-2.0 * x + 0.5) + 0.5 * np.log(2.0 * np.pi / x) + 0.125 / x # + x2*(-1.0/192.0 + x2* (1.0/640.0 + x2*(-17.0/(14336.0) +\ # x2*(31.0/18432.0 + x2*(-691.0/180224.0 +\ # x2*(5461.0/425984.0 + x2*(-929569.0/15728640.0 +\ # x2*(3189151.0/8912896.0 + x2*(-221930581.0/79691776.0) +\ # x2*(4722116521.0/176160768.0 + x2*(-968383680827.0/3087007744.0 +\ # x2*(14717667114151.0/3355443200.0 )))))))))))) @numba.njit() def ll_dirichlet(data1, data2): """The symmetric relative log likelihood of rolling data2 vs data1 in n trials on a die that rolled data1 in sum(data1) trials. ..math:: D(data1, data2) = DirichletMultinomail(data2 | data1) """ n1 = np.sum(data1) n2 = np.sum(data2) log_b = 0.0 self_denom1 = 0.0 self_denom2 = 0.0 for i in range(data1.shape[0]): if data1[i] * data2[i] > 0.9: log_b += log_beta(data1[i], data2[i]) self_denom1 += log_single_beta(data1[i]) self_denom2 += log_single_beta(data2[i]) else: if data1[i] > 0.9: self_denom1 += log_single_beta(data1[i]) if data2[i] > 0.9: self_denom2 += log_single_beta(data2[i]) return np.sqrt( 1.0 / n2 * (log_b - log_beta(n1, n2) - (self_denom2 - log_single_beta(n2))) + 1.0 / n1 * (log_b - log_beta(n2, n1) - (self_denom1 - log_single_beta(n1))) ) @numba.njit(fastmath=True) def symmetric_kl(x, y, z=1e-11): # pragma: no cover r""" symmetrized KL divergence between two probability distributions ..math:: D(x, y) = \frac{D_{KL}\left(x \Vert y\right) + D_{KL}\left(y \Vert x\right)}{2} """ n = x.shape[0] x_sum = 0.0 y_sum = 0.0 kl1 = 0.0 kl2 = 0.0 for i in range(n): x[i] += z x_sum += x[i] y[i] += z y_sum += y[i] for i in range(n): x[i] /= x_sum y[i] /= y_sum for i in range(n): kl1 += x[i] * np.log(x[i] / y[i]) kl2 += y[i] * np.log(y[i] / x[i]) return (kl1 + kl2) / 2 @numba.njit(fastmath=True) def symmetric_kl_grad(x, y, z=1e-11): # pragma: no cover """ symmetrized KL divergence and its gradient """ n = x.shape[0] x_sum = 0.0 y_sum = 0.0 kl1 = 0.0 kl2 = 0.0 for i in range(n): x[i] += z x_sum += x[i] y[i] += z y_sum += y[i] for i in range(n): x[i] /= x_sum y[i] /= y_sum for i in range(n): kl1 += x[i] * np.log(x[i] / y[i]) kl2 += y[i] * np.log(y[i] / x[i]) dist = (kl1 + kl2) / 2 grad = (np.log(y / x) - (x / y) + 1) / 2 return dist, grad @numba.njit() def correlation_grad(x, y): mu_x = 0.0 mu_y = 0.0 norm_x = 0.0 norm_y = 0.0 dot_product = 0.0 for i in range(x.shape[0]): mu_x += x[i] mu_y += y[i] mu_x /= x.shape[0] mu_y /= x.shape[0] for i in range(x.shape[0]): shifted_x = x[i] - mu_x shifted_y = y[i] - mu_y norm_x += shifted_x ** 2 norm_y += shifted_y ** 2 dot_product += shifted_x * shifted_y if norm_x == 0.0 and norm_y == 0.0: dist = 0.0 grad = np.zeros(x.shape) elif dot_product == 0.0: dist = 1.0 grad = np.zeros(x.shape) else: dist = 1.0 - (dot_product / np.sqrt(norm_x * norm_y)) grad = ((x - mu_x) / norm_x - (y - mu_y) / dot_product) * dist return dist, grad @numba.njit(fastmath=True) def sinkhorn_distance( x, y, M=_mock_identity, cost=_mock_cost, maxiter=64 ): # pragma: no cover p = (x / x.sum()).astype(np.float32) q = (y / y.sum()).astype(np.float32) u = np.ones(p.shape, dtype=np.float32) v = np.ones(q.shape, dtype=np.float32) for n in range(maxiter): t = M @ v u[t > 0] = p[t > 0] / t[t > 0] t = M.T @ u v[t > 0] = q[t > 0] / t[t > 0] pi = np.diag(v) @ M @ np.diag(u) result = 0.0 for i in range(pi.shape[0]): for j in range(pi.shape[1]): if pi[i, j] > 0: result += pi[i, j] * cost[i, j] return result @numba.njit(fastmath=True) def spherical_gaussian_energy_grad(x, y): # pragma: no cover mu_1 = x[0] - y[0] mu_2 = x[1] - y[1] sigma = np.abs(x[2]) + np.abs(y[2]) sign_sigma = np.sign(x[2]) dist = (mu_1 ** 2 + mu_2 ** 2) / (2 * sigma) + np.log(sigma) + np.log(2 * np.pi) grad = np.empty(3, np.float32) grad[0] = mu_1 / sigma grad[1] = mu_2 / sigma grad[2] = sign_sigma * (1.0 / sigma - (mu_1 ** 2 + mu_2 ** 2) / (2 * sigma ** 2)) return dist, grad @numba.njit(fastmath=True) def diagonal_gaussian_energy_grad(x, y): # pragma: no cover mu_1 = x[0] - y[0] mu_2 = x[1] - y[1] sigma_11 = np.abs(x[2]) + np.abs(y[2]) sigma_12 = 0.0 sigma_22 = np.abs(x[3]) + np.abs(y[3]) det = sigma_11 * sigma_22 sign_s1 = np.sign(x[2]) sign_s2 = np.sign(x[3]) if det == 0.0: # TODO: figure out the right thing to do here return mu_1 ** 2 + mu_2 ** 2, np.array([0.0, 0.0, 1.0, 1.0], dtype=np.float32) cross_term = 2 * sigma_12 m_dist = ( np.abs(sigma_22) * (mu_1 ** 2) - cross_term * mu_1 * mu_2 + np.abs(sigma_11) * (mu_2 ** 2) ) dist = (m_dist / det + np.log(np.abs(det))) / 2.0 + np.log(2 * np.pi) grad = np.empty(6, dtype=np.float32) grad[0] = (2 * sigma_22 * mu_1 - cross_term * mu_2) / (2 * det) grad[1] = (2 * sigma_11 * mu_2 - cross_term * mu_1) / (2 * det) grad[2] = sign_s1 * (sigma_22 * (det - m_dist) + det * mu_2 ** 2) / (2 * det ** 2) grad[3] = sign_s2 * (sigma_11 * (det - m_dist) + det * mu_1 ** 2) / (2 * det ** 2) return dist, grad @numba.njit(fastmath=True) def gaussian_energy_grad(x, y): # pragma: no cover mu_1 = x[0] - y[0] mu_2 = x[1] - y[1] # Ensure width are positive x[2] = np.abs(x[2]) y[2] = np.abs(y[2]) # Ensure heights are positive x[3] = np.abs(x[3]) y[3] = np.abs(y[3]) # Ensure angle is in range -pi,pi x[4] = np.arcsin(np.sin(x[4])) y[4] = np.arcsin(np.sin(y[4])) # Covariance entries for y a = y[2] * np.cos(y[4]) ** 2 + y[3] * np.sin(y[4]) ** 2 b = (y[2] - y[3]) * np.sin(y[4]) * np.cos(y[4]) c = y[3] * np.cos(y[4]) ** 2 + y[2] * np.sin(y[4]) ** 2 # Sum of covariance matrices sigma_11 = x[2] * np.cos(x[4]) ** 2 + x[3] * np.sin(x[4]) ** 2 + a sigma_12 = (x[2] - x[3]) * np.sin(x[4]) * np.cos(x[4]) + b sigma_22 = x[2] * np.sin(x[4]) ** 2 + x[3] * np.cos(x[4]) ** 2 + c # Determinant of the sum of covariances det_sigma = np.abs(sigma_11 * sigma_22 - sigma_12 ** 2) x_inv_sigma_y_numerator = ( sigma_22 * mu_1 ** 2 - 2 * sigma_12 * mu_1 * mu_2 + sigma_11 * mu_2 ** 2 ) if det_sigma < 1e-32: return ( mu_1 ** 2 + mu_2 ** 2, np.array([0.0, 0.0, 1.0, 1.0, 0.0], dtype=np.float32), ) dist = x_inv_sigma_y_numerator / det_sigma + np.log(det_sigma) + np.log(2 * np.pi) grad = np.zeros(5, np.float32) grad[0] = (2 * sigma_22 * mu_1 - 2 * sigma_12 * mu_2) / det_sigma grad[1] = (2 * sigma_11 * mu_2 - 2 * sigma_12 * mu_1) / det_sigma grad[2] = mu_2 * (mu_2 * np.cos(x[4]) ** 2 - mu_1 * np.cos(x[4]) * np.sin(x[4])) grad[2] += mu_1 * (mu_1 * np.sin(x[4]) ** 2 - mu_2 * np.cos(x[4]) * np.sin(x[4])) grad[2] *= det_sigma grad[2] -= x_inv_sigma_y_numerator * np.cos(x[4]) ** 2 * sigma_22 grad[2] -= x_inv_sigma_y_numerator * np.sin(x[4]) ** 2 * sigma_11 grad[2] += x_inv_sigma_y_numerator * 2 * sigma_12 * np.sin(x[4]) * np.cos(x[4]) grad[2] /= det_sigma ** 2 + 1e-8 grad[3] = mu_1 * (mu_1 * np.cos(x[4]) ** 2 - mu_2 * np.cos(x[4]) * np.sin(x[4])) grad[3] += mu_2 * (mu_2 * np.sin(x[4]) ** 2 - mu_1 * np.cos(x[4]) * np.sin(x[4])) grad[3] *= det_sigma grad[3] -= x_inv_sigma_y_numerator * np.sin(x[4]) ** 2 * sigma_22 grad[3] -= x_inv_sigma_y_numerator * np.cos(x[4]) ** 2 * sigma_11 grad[3] -= x_inv_sigma_y_numerator * 2 * sigma_12 * np.sin(x[4]) * np.cos(x[4]) grad[3] /= det_sigma ** 2 + 1e-8 grad[4] = (x[3] - x[2]) * ( 2 * mu_1 * mu_2 * np.cos(2 * x[4]) - (mu_1 ** 2 - mu_2 ** 2) * np.sin(2 * x[4]) ) grad[4] *= det_sigma grad[4] -= x_inv_sigma_y_numerator * (x[3] - x[2]) * np.sin(2 * x[4]) * sigma_22 grad[4] -= x_inv_sigma_y_numerator * (x[2] - x[3]) * np.sin(2 * x[4]) * sigma_11 grad[4] -= x_inv_sigma_y_numerator * 2 * sigma_12 * (x[2] - x[3]) * np.cos(2 * x[4]) grad[4] /= det_sigma ** 2 + 1e-8 return dist, grad @numba.njit(fastmath=True) def spherical_gaussian_grad(x, y): # pragma: no cover mu_1 = x[0] - y[0] mu_2 = x[1] - y[1] sigma = x[2] + y[2] sigma_sign = np.sign(sigma) if sigma == 0: return 10.0, np.array([0.0, 0.0, -1.0], dtype=np.float32) dist = ( (mu_1 ** 2 + mu_2 ** 2) / np.abs(sigma) + 2 * np.log(np.abs(sigma)) + np.log(2 * np.pi) ) grad = np.empty(3, dtype=np.float32) grad[0] = (2 * mu_1) / np.abs(sigma) grad[1] = (2 * mu_2) / np.abs(sigma) grad[2] = sigma_sign * ( -(mu_1 ** 2 + mu_2 ** 2) / (sigma ** 2) + (2 / np.abs(sigma)) ) return dist, grad # Special discrete distances -- where x and y are objects, not vectors def get_discrete_params(data, metric): if metric == "ordinal": return {"support_size": float(data.max() - data.min()) / 2.0} elif metric == "count": min_count = scipy.stats.tmin(data) max_count = scipy.stats.tmax(data) lambda_ = scipy.stats.tmean(data) normalisation = count_distance(min_count, max_count, poisson_lambda=lambda_) return { "poisson_lambda": lambda_, "normalisation": normalisation / 2.0, # heuristic } elif metric == "string": lengths = np.array([len(x) for x in data]) max_length = scipy.stats.tmax(lengths) max_dist = max_length / 1.5 # heuristic normalisation = max_dist / 2.0 # heuristic return {"normalisation": normalisation, "max_dist": max_dist / 2.0} # heuristic else: return {} @numba.njit() def categorical_distance(x, y): if x == y: return 0.0 else: return 1.0 @numba.njit() def hierarchical_categorical_distance(x, y, cat_hierarchy=[{}]): n_levels = float(len(cat_hierarchy)) for level, cats in enumerate(cat_hierarchy): if cats[x] == cats[y]: return float(level) / n_levels else: return 1.0 @numba.njit() def ordinal_distance(x, y, support_size=1.0): return abs(x - y) / support_size @numba.njit() def count_distance(x, y, poisson_lambda=1.0, normalisation=1.0): lo = int(min(x, y)) hi = int(max(x, y)) log_lambda = np.log(poisson_lambda) if lo < 2: log_k_factorial = 0.0 elif lo < 10: log_k_factorial = 0.0 for k in range(2, lo): log_k_factorial += np.log(k) else: log_k_factorial = approx_log_Gamma(lo + 1) result = 0.0 for k in range(lo, hi): result += k * log_lambda - poisson_lambda - log_k_factorial log_k_factorial += np.log(k) return result / normalisation @numba.njit() def levenshtein(x, y, normalisation=1.0, max_distance=20): x_len, y_len = len(x), len(y) # Opt out of some comparisons if abs(x_len - y_len) > max_distance: return abs(x_len - y_len) / normalisation v0 = np.arange(y_len + 1).astype(np.float64) v1 = np.zeros(y_len + 1) for i in range(x_len): v1[i] = i + 1 for j in range(y_len): deletion_cost = v0[j + 1] + 1 insertion_cost = v1[j] + 1 substitution_cost = int(x[i] == y[j]) v1[j + 1] = min(deletion_cost, insertion_cost, substitution_cost) v0 = v1 # Abort early if we've already exceeded max_dist if np.min(v0) > max_distance: return max_distance / normalisation return v0[y_len] / normalisation named_distances = { # general minkowski distances "euclidean": euclidean, "l2": euclidean, "manhattan": manhattan, "taxicab": manhattan, "l1": manhattan, "chebyshev": chebyshev, "linfinity": chebyshev, "linfty": chebyshev, "linf": chebyshev, "minkowski": minkowski, "poincare": poincare, # Standardised/weighted distances "seuclidean": standardised_euclidean, "standardised_euclidean": standardised_euclidean, "wminkowski": weighted_minkowski, "weighted_minkowski": weighted_minkowski, "mahalanobis": mahalanobis, # Other distances "canberra": canberra, "cosine": cosine, "correlation": correlation, "hellinger": hellinger, "haversine": haversine, "braycurtis": bray_curtis, "ll_dirichlet": ll_dirichlet, "symmetric_kl": symmetric_kl, # Binary distances "hamming": hamming, "jaccard": jaccard, "dice": dice, "matching": matching, "kulsinski": kulsinski, "rogerstanimoto": rogers_tanimoto, "russellrao": russellrao, "sokalsneath": sokal_sneath, "sokalmichener": sokal_michener, "yule": yule, # Special discrete distances "categorical": categorical_distance, "ordinal": ordinal_distance, "hierarchical_categorical": hierarchical_categorical_distance, "count": count_distance, "string": levenshtein, } named_distances_with_gradients = { # general minkowski distances "euclidean": euclidean_grad, "l2": euclidean_grad, "manhattan": manhattan_grad, "taxicab": manhattan_grad, "l1": manhattan_grad, "chebyshev": chebyshev_grad, "linfinity": chebyshev_grad, "linfty": chebyshev_grad, "linf": chebyshev_grad, "minkowski": minkowski_grad, # Standardised/weighted distances "seuclidean": standardised_euclidean_grad, "standardised_euclidean": standardised_euclidean_grad, "wminkowski": weighted_minkowski_grad, "weighted_minkowski": weighted_minkowski_grad, "mahalanobis": mahalanobis_grad, # Other distances "canberra": canberra_grad, "cosine": cosine_grad, "correlation": correlation_grad, "hellinger": hellinger_grad, "haversine": haversine_grad, "braycurtis": bray_curtis_grad, "symmetric_kl": symmetric_kl_grad, # Special embeddings "spherical_gaussian_energy": spherical_gaussian_energy_grad, "diagonal_gaussian_energy": diagonal_gaussian_energy_grad, "gaussian_energy": gaussian_energy_grad, "hyperboloid": hyperboloid_grad, } DISCRETE_METRICS = ( "categorical", "hierarchical_categorical", "ordinal", "count", "string", ) SPECIAL_METRICS = ( "hellinger", "ll_dirichlet", "symmetric_kl", "poincare", hellinger, ll_dirichlet, symmetric_kl, poincare, ) @numba.njit(parallel=True) def parallel_special_metric(X, Y=None, metric=hellinger): if Y is None: result = np.zeros((X.shape[0], X.shape[0])) for i in range(X.shape[0]): for j in range(i + 1, X.shape[0]): result[i, j] = metric(X[i], X[j]) result[j, i] = result[i, j] else: result = np.zeros((X.shape[0], Y.shape[0])) for i in range(X.shape[0]): for j in range(Y.shape[0]): result[i, j] = metric(X[i], Y[j]) return result # We can gain efficiency by chunking the matrix into blocks; # this keeps data vectors in cache better @numba.njit(parallel=True, nogil=True) def chunked_parallel_special_metric(X, Y=None, metric=hellinger, chunk_size=16): if Y is None: XX, symmetrical = X, True row_size = col_size = X.shape[0] else: XX, symmetrical = Y, False row_size, col_size = X.shape[0], Y.shape[0] result = np.zeros((row_size, col_size), dtype=np.float32) n_row_chunks = (row_size // chunk_size) + 1 for chunk_idx in numba.prange(n_row_chunks): n = chunk_idx * chunk_size chunk_end_n = min(n + chunk_size, row_size) m_start = n if symmetrical else 0 for m in range(m_start, col_size, chunk_size): chunk_end_m = min(m + chunk_size, col_size) for i in range(n, chunk_end_n): for j in range(m, chunk_end_m): result[i, j] = metric(X[i], XX[j]) return result def pairwise_special_metric(X, Y=None, metric="hellinger", kwds=None, force_all_finite=True): if callable(metric): if kwds is not None: kwd_vals = tuple(kwds.values()) else: kwd_vals = () @numba.njit(fastmath=True) def _partial_metric(_X, _Y=None): return metric(_X, _Y, *kwd_vals) return pairwise_distances(X, Y, metric=_partial_metric, force_all_finite=force_all_finite) else: special_metric_func = named_distances[metric] return parallel_special_metric(X, Y, metric=special_metric_func)