ai-content-maker/.venv/Lib/site-packages/sklearn/linear_model/tests/test_omp.py

263 lines
8.7 KiB
Python
Raw Permalink Normal View History

2024-05-03 04:18:51 +03:00
# Author: Vlad Niculae
# License: BSD 3 clause
import numpy as np
import pytest
from sklearn.datasets import make_sparse_coded_signal
from sklearn.linear_model import (
LinearRegression,
OrthogonalMatchingPursuit,
OrthogonalMatchingPursuitCV,
orthogonal_mp,
orthogonal_mp_gram,
)
from sklearn.utils import check_random_state
from sklearn.utils._testing import (
assert_allclose,
assert_array_almost_equal,
assert_array_equal,
ignore_warnings,
)
n_samples, n_features, n_nonzero_coefs, n_targets = 25, 35, 5, 3
y, X, gamma = make_sparse_coded_signal(
n_samples=n_targets,
n_components=n_features,
n_features=n_samples,
n_nonzero_coefs=n_nonzero_coefs,
random_state=0,
)
y, X, gamma = y.T, X.T, gamma.T
# Make X not of norm 1 for testing
X *= 10
y *= 10
G, Xy = np.dot(X.T, X), np.dot(X.T, y)
# this makes X (n_samples, n_features)
# and y (n_samples, 3)
def test_correct_shapes():
assert orthogonal_mp(X, y[:, 0], n_nonzero_coefs=5).shape == (n_features,)
assert orthogonal_mp(X, y, n_nonzero_coefs=5).shape == (n_features, 3)
def test_correct_shapes_gram():
assert orthogonal_mp_gram(G, Xy[:, 0], n_nonzero_coefs=5).shape == (n_features,)
assert orthogonal_mp_gram(G, Xy, n_nonzero_coefs=5).shape == (n_features, 3)
def test_n_nonzero_coefs():
assert np.count_nonzero(orthogonal_mp(X, y[:, 0], n_nonzero_coefs=5)) <= 5
assert (
np.count_nonzero(orthogonal_mp(X, y[:, 0], n_nonzero_coefs=5, precompute=True))
<= 5
)
def test_tol():
tol = 0.5
gamma = orthogonal_mp(X, y[:, 0], tol=tol)
gamma_gram = orthogonal_mp(X, y[:, 0], tol=tol, precompute=True)
assert np.sum((y[:, 0] - np.dot(X, gamma)) ** 2) <= tol
assert np.sum((y[:, 0] - np.dot(X, gamma_gram)) ** 2) <= tol
def test_with_without_gram():
assert_array_almost_equal(
orthogonal_mp(X, y, n_nonzero_coefs=5),
orthogonal_mp(X, y, n_nonzero_coefs=5, precompute=True),
)
def test_with_without_gram_tol():
assert_array_almost_equal(
orthogonal_mp(X, y, tol=1.0), orthogonal_mp(X, y, tol=1.0, precompute=True)
)
def test_unreachable_accuracy():
assert_array_almost_equal(
orthogonal_mp(X, y, tol=0), orthogonal_mp(X, y, n_nonzero_coefs=n_features)
)
warning_message = (
"Orthogonal matching pursuit ended prematurely "
"due to linear dependence in the dictionary. "
"The requested precision might not have been met."
)
with pytest.warns(RuntimeWarning, match=warning_message):
assert_array_almost_equal(
orthogonal_mp(X, y, tol=0, precompute=True),
orthogonal_mp(X, y, precompute=True, n_nonzero_coefs=n_features),
)
@pytest.mark.parametrize("positional_params", [(X, y), (G, Xy)])
@pytest.mark.parametrize(
"keyword_params",
[{"n_nonzero_coefs": n_features + 1}],
)
def test_bad_input(positional_params, keyword_params):
with pytest.raises(ValueError):
orthogonal_mp(*positional_params, **keyword_params)
def test_perfect_signal_recovery():
(idx,) = gamma[:, 0].nonzero()
gamma_rec = orthogonal_mp(X, y[:, 0], n_nonzero_coefs=5)
gamma_gram = orthogonal_mp_gram(G, Xy[:, 0], n_nonzero_coefs=5)
assert_array_equal(idx, np.flatnonzero(gamma_rec))
assert_array_equal(idx, np.flatnonzero(gamma_gram))
assert_array_almost_equal(gamma[:, 0], gamma_rec, decimal=2)
assert_array_almost_equal(gamma[:, 0], gamma_gram, decimal=2)
def test_orthogonal_mp_gram_readonly():
# Non-regression test for:
# https://github.com/scikit-learn/scikit-learn/issues/5956
(idx,) = gamma[:, 0].nonzero()
G_readonly = G.copy()
G_readonly.setflags(write=False)
Xy_readonly = Xy.copy()
Xy_readonly.setflags(write=False)
gamma_gram = orthogonal_mp_gram(
G_readonly, Xy_readonly[:, 0], n_nonzero_coefs=5, copy_Gram=False, copy_Xy=False
)
assert_array_equal(idx, np.flatnonzero(gamma_gram))
assert_array_almost_equal(gamma[:, 0], gamma_gram, decimal=2)
def test_estimator():
omp = OrthogonalMatchingPursuit(n_nonzero_coefs=n_nonzero_coefs)
omp.fit(X, y[:, 0])
assert omp.coef_.shape == (n_features,)
assert omp.intercept_.shape == ()
assert np.count_nonzero(omp.coef_) <= n_nonzero_coefs
omp.fit(X, y)
assert omp.coef_.shape == (n_targets, n_features)
assert omp.intercept_.shape == (n_targets,)
assert np.count_nonzero(omp.coef_) <= n_targets * n_nonzero_coefs
coef_normalized = omp.coef_[0].copy()
omp.set_params(fit_intercept=True)
omp.fit(X, y[:, 0])
assert_array_almost_equal(coef_normalized, omp.coef_)
omp.set_params(fit_intercept=False)
omp.fit(X, y[:, 0])
assert np.count_nonzero(omp.coef_) <= n_nonzero_coefs
assert omp.coef_.shape == (n_features,)
assert omp.intercept_ == 0
omp.fit(X, y)
assert omp.coef_.shape == (n_targets, n_features)
assert omp.intercept_ == 0
assert np.count_nonzero(omp.coef_) <= n_targets * n_nonzero_coefs
def test_identical_regressors():
newX = X.copy()
newX[:, 1] = newX[:, 0]
gamma = np.zeros(n_features)
gamma[0] = gamma[1] = 1.0
newy = np.dot(newX, gamma)
warning_message = (
"Orthogonal matching pursuit ended prematurely "
"due to linear dependence in the dictionary. "
"The requested precision might not have been met."
)
with pytest.warns(RuntimeWarning, match=warning_message):
orthogonal_mp(newX, newy, n_nonzero_coefs=2)
def test_swapped_regressors():
gamma = np.zeros(n_features)
# X[:, 21] should be selected first, then X[:, 0] selected second,
# which will take X[:, 21]'s place in case the algorithm does
# column swapping for optimization (which is the case at the moment)
gamma[21] = 1.0
gamma[0] = 0.5
new_y = np.dot(X, gamma)
new_Xy = np.dot(X.T, new_y)
gamma_hat = orthogonal_mp(X, new_y, n_nonzero_coefs=2)
gamma_hat_gram = orthogonal_mp_gram(G, new_Xy, n_nonzero_coefs=2)
assert_array_equal(np.flatnonzero(gamma_hat), [0, 21])
assert_array_equal(np.flatnonzero(gamma_hat_gram), [0, 21])
def test_no_atoms():
y_empty = np.zeros_like(y)
Xy_empty = np.dot(X.T, y_empty)
gamma_empty = ignore_warnings(orthogonal_mp)(X, y_empty, n_nonzero_coefs=1)
gamma_empty_gram = ignore_warnings(orthogonal_mp)(G, Xy_empty, n_nonzero_coefs=1)
assert np.all(gamma_empty == 0)
assert np.all(gamma_empty_gram == 0)
def test_omp_path():
path = orthogonal_mp(X, y, n_nonzero_coefs=5, return_path=True)
last = orthogonal_mp(X, y, n_nonzero_coefs=5, return_path=False)
assert path.shape == (n_features, n_targets, 5)
assert_array_almost_equal(path[:, :, -1], last)
path = orthogonal_mp_gram(G, Xy, n_nonzero_coefs=5, return_path=True)
last = orthogonal_mp_gram(G, Xy, n_nonzero_coefs=5, return_path=False)
assert path.shape == (n_features, n_targets, 5)
assert_array_almost_equal(path[:, :, -1], last)
def test_omp_return_path_prop_with_gram():
path = orthogonal_mp(X, y, n_nonzero_coefs=5, return_path=True, precompute=True)
last = orthogonal_mp(X, y, n_nonzero_coefs=5, return_path=False, precompute=True)
assert path.shape == (n_features, n_targets, 5)
assert_array_almost_equal(path[:, :, -1], last)
def test_omp_cv():
y_ = y[:, 0]
gamma_ = gamma[:, 0]
ompcv = OrthogonalMatchingPursuitCV(fit_intercept=False, max_iter=10)
ompcv.fit(X, y_)
assert ompcv.n_nonzero_coefs_ == n_nonzero_coefs
assert_array_almost_equal(ompcv.coef_, gamma_)
omp = OrthogonalMatchingPursuit(
fit_intercept=False, n_nonzero_coefs=ompcv.n_nonzero_coefs_
)
omp.fit(X, y_)
assert_array_almost_equal(ompcv.coef_, omp.coef_)
def test_omp_reaches_least_squares():
# Use small simple data; it's a sanity check but OMP can stop early
rng = check_random_state(0)
n_samples, n_features = (10, 8)
n_targets = 3
X = rng.randn(n_samples, n_features)
Y = rng.randn(n_samples, n_targets)
omp = OrthogonalMatchingPursuit(n_nonzero_coefs=n_features)
lstsq = LinearRegression()
omp.fit(X, Y)
lstsq.fit(X, Y)
assert_array_almost_equal(omp.coef_, lstsq.coef_)
@pytest.mark.parametrize("data_type", (np.float32, np.float64))
def test_omp_gram_dtype_match(data_type):
# verify matching input data type and output data type
coef = orthogonal_mp_gram(
G.astype(data_type), Xy.astype(data_type), n_nonzero_coefs=5
)
assert coef.dtype == data_type
def test_omp_gram_numerical_consistency():
# verify numericaly consistency among np.float32 and np.float64
coef_32 = orthogonal_mp_gram(
G.astype(np.float32), Xy.astype(np.float32), n_nonzero_coefs=5
)
coef_64 = orthogonal_mp_gram(
G.astype(np.float32), Xy.astype(np.float64), n_nonzero_coefs=5
)
assert_allclose(coef_32, coef_64)