ai-content-maker/.venv/Lib/site-packages/thinc/tests/layers/test_sparse_linear.py

80 lines
2.2 KiB
Python
Raw Normal View History

2024-05-03 04:18:51 +03:00
import math
import numpy
import pytest
from thinc.api import SGD, SparseLinear, SparseLinear_v2, to_categorical
@pytest.fixture
def instances():
lengths = numpy.asarray([5, 4], dtype="int32")
keys = numpy.arange(9, dtype="uint64")
values = numpy.ones(9, dtype="float32")
X = (keys, values, lengths)
y = numpy.asarray([0, 2], dtype="int32")
return X, to_categorical(y, n_classes=3)
@pytest.fixture
def sgd():
return SGD(0.001)
def test_basic(instances, sgd):
X, y = instances
nr_class = 3
model = SparseLinear(nr_class).initialize()
yh, backprop = model.begin_update(X)
loss1 = ((yh - y) ** 2).sum()
backprop(yh - y)
model.finish_update(sgd)
yh, backprop = model.begin_update(X)
loss2 = ((yh - y) ** 2).sum()
assert loss2 < loss1
def test_init():
model = SparseLinear(3).initialize()
keys = numpy.ones((5,), dtype="uint64")
values = numpy.ones((5,), dtype="f")
lengths = numpy.zeros((2,), dtype="int32")
lengths[0] = 3
lengths[1] = 2
scores, backprop = model.begin_update((keys, values, lengths))
assert scores.shape == (2, 3)
d_feats = backprop(scores)
assert len(d_feats) == 3
def test_distribution():
n_class = 10
length = 2**18
model = SparseLinear_v2(nO=n_class, length=length).initialize()
ii64 = numpy.iinfo(numpy.uint64)
lengths = numpy.zeros((2,), dtype="int32")
for p_nonzero in range(1, 12):
# Clear gradients from the previous iterarion.
model.set_grad("W", 0.0)
n = 2**p_nonzero
keys = numpy.random.randint(ii64.min, ii64.max, size=(n,), dtype=numpy.uint64)
values = numpy.ones((n,), dtype="f")
lengths[0] = n // 2
lengths[1] = n // 2
# Probability that a bit is set (2 because we use 2 hashes).
p_nonzero = 1 - math.exp(-2 * n / length)
Y, backprop = model.begin_update((keys, values, lengths))
backprop(numpy.ones_like(Y))
# Check that for each class we have the expected rate of non-zeros.
dW = model.get_grad("W").reshape(n_class, -1)
nonzero_empirical = numpy.count_nonzero(dW, axis=1) / dW.shape[1]
numpy.testing.assert_allclose(
nonzero_empirical, p_nonzero, rtol=1e-4, atol=1e-4
)