import math import numpy import pytest from thinc.api import SGD, SparseLinear, SparseLinear_v2, to_categorical @pytest.fixture def instances(): lengths = numpy.asarray([5, 4], dtype="int32") keys = numpy.arange(9, dtype="uint64") values = numpy.ones(9, dtype="float32") X = (keys, values, lengths) y = numpy.asarray([0, 2], dtype="int32") return X, to_categorical(y, n_classes=3) @pytest.fixture def sgd(): return SGD(0.001) def test_basic(instances, sgd): X, y = instances nr_class = 3 model = SparseLinear(nr_class).initialize() yh, backprop = model.begin_update(X) loss1 = ((yh - y) ** 2).sum() backprop(yh - y) model.finish_update(sgd) yh, backprop = model.begin_update(X) loss2 = ((yh - y) ** 2).sum() assert loss2 < loss1 def test_init(): model = SparseLinear(3).initialize() keys = numpy.ones((5,), dtype="uint64") values = numpy.ones((5,), dtype="f") lengths = numpy.zeros((2,), dtype="int32") lengths[0] = 3 lengths[1] = 2 scores, backprop = model.begin_update((keys, values, lengths)) assert scores.shape == (2, 3) d_feats = backprop(scores) assert len(d_feats) == 3 def test_distribution(): n_class = 10 length = 2**18 model = SparseLinear_v2(nO=n_class, length=length).initialize() ii64 = numpy.iinfo(numpy.uint64) lengths = numpy.zeros((2,), dtype="int32") for p_nonzero in range(1, 12): # Clear gradients from the previous iterarion. model.set_grad("W", 0.0) n = 2**p_nonzero keys = numpy.random.randint(ii64.min, ii64.max, size=(n,), dtype=numpy.uint64) values = numpy.ones((n,), dtype="f") lengths[0] = n // 2 lengths[1] = n // 2 # Probability that a bit is set (2 because we use 2 hashes). p_nonzero = 1 - math.exp(-2 * n / length) Y, backprop = model.begin_update((keys, values, lengths)) backprop(numpy.ones_like(Y)) # Check that for each class we have the expected rate of non-zeros. dW = model.get_grad("W").reshape(n_class, -1) nonzero_empirical = numpy.count_nonzero(dW, axis=1) / dW.shape[1] numpy.testing.assert_allclose( nonzero_empirical, p_nonzero, rtol=1e-4, atol=1e-4 )