import re import sys import warnings from io import StringIO import numpy as np import pytest from scipy import linalg from sklearn.base import clone from sklearn.decomposition import NMF, MiniBatchNMF, non_negative_factorization from sklearn.decomposition import _nmf as nmf # For testing internals from sklearn.exceptions import ConvergenceWarning from sklearn.utils._testing import ( assert_allclose, assert_almost_equal, assert_array_almost_equal, assert_array_equal, ignore_warnings, ) from sklearn.utils.extmath import squared_norm from sklearn.utils.fixes import CSC_CONTAINERS, CSR_CONTAINERS @pytest.mark.parametrize( ["Estimator", "solver"], [[NMF, {"solver": "cd"}], [NMF, {"solver": "mu"}], [MiniBatchNMF, {}]], ) def test_convergence_warning(Estimator, solver): convergence_warning = ( "Maximum number of iterations 1 reached. Increase it to improve convergence." ) A = np.ones((2, 2)) with pytest.warns(ConvergenceWarning, match=convergence_warning): Estimator(max_iter=1, n_components="auto", **solver).fit(A) def test_initialize_nn_output(): # Test that initialization does not return negative values rng = np.random.mtrand.RandomState(42) data = np.abs(rng.randn(10, 10)) for init in ("random", "nndsvd", "nndsvda", "nndsvdar"): W, H = nmf._initialize_nmf(data, 10, init=init, random_state=0) assert not ((W < 0).any() or (H < 0).any()) # TODO(1.6): remove the warning filter for `n_components` @pytest.mark.filterwarnings( r"ignore:The multiplicative update \('mu'\) solver cannot update zeros present in" r" the initialization", "ignore:The default value of `n_components` will change", ) def test_parameter_checking(): # Here we only check for invalid parameter values that are not already # automatically tested in the common tests. A = np.ones((2, 2)) msg = "Invalid beta_loss parameter: solver 'cd' does not handle beta_loss = 1.0" with pytest.raises(ValueError, match=msg): NMF(solver="cd", beta_loss=1.0).fit(A) msg = "Negative values in data passed to" with pytest.raises(ValueError, match=msg): NMF().fit(-A) clf = NMF(2, tol=0.1).fit(A) with pytest.raises(ValueError, match=msg): clf.transform(-A) with pytest.raises(ValueError, match=msg): nmf._initialize_nmf(-A, 2, "nndsvd") for init in ["nndsvd", "nndsvda", "nndsvdar"]: msg = re.escape( "init = '{}' can only be used when " "n_components <= min(n_samples, n_features)".format(init) ) with pytest.raises(ValueError, match=msg): NMF(3, init=init).fit(A) with pytest.raises(ValueError, match=msg): MiniBatchNMF(3, init=init).fit(A) with pytest.raises(ValueError, match=msg): nmf._initialize_nmf(A, 3, init) def test_initialize_close(): # Test NNDSVD error # Test that _initialize_nmf error is less than the standard deviation of # the entries in the matrix. rng = np.random.mtrand.RandomState(42) A = np.abs(rng.randn(10, 10)) W, H = nmf._initialize_nmf(A, 10, init="nndsvd") error = linalg.norm(np.dot(W, H) - A) sdev = linalg.norm(A - A.mean()) assert error <= sdev def test_initialize_variants(): # Test NNDSVD variants correctness # Test that the variants 'nndsvda' and 'nndsvdar' differ from basic # 'nndsvd' only where the basic version has zeros. rng = np.random.mtrand.RandomState(42) data = np.abs(rng.randn(10, 10)) W0, H0 = nmf._initialize_nmf(data, 10, init="nndsvd") Wa, Ha = nmf._initialize_nmf(data, 10, init="nndsvda") War, Har = nmf._initialize_nmf(data, 10, init="nndsvdar", random_state=0) for ref, evl in ((W0, Wa), (W0, War), (H0, Ha), (H0, Har)): assert_almost_equal(evl[ref != 0], ref[ref != 0]) # ignore UserWarning raised when both solver='mu' and init='nndsvd' @ignore_warnings(category=UserWarning) @pytest.mark.parametrize( ["Estimator", "solver"], [[NMF, {"solver": "cd"}], [NMF, {"solver": "mu"}], [MiniBatchNMF, {}]], ) @pytest.mark.parametrize("init", (None, "nndsvd", "nndsvda", "nndsvdar", "random")) @pytest.mark.parametrize("alpha_W", (0.0, 1.0)) @pytest.mark.parametrize("alpha_H", (0.0, 1.0, "same")) def test_nmf_fit_nn_output(Estimator, solver, init, alpha_W, alpha_H): # Test that the decomposition does not contain negative values A = np.c_[5.0 - np.arange(1, 6), 5.0 + np.arange(1, 6)] model = Estimator( n_components=2, init=init, alpha_W=alpha_W, alpha_H=alpha_H, random_state=0, **solver, ) transf = model.fit_transform(A) assert not ((model.components_ < 0).any() or (transf < 0).any()) @pytest.mark.parametrize( ["Estimator", "solver"], [[NMF, {"solver": "cd"}], [NMF, {"solver": "mu"}], [MiniBatchNMF, {}]], ) def test_nmf_fit_close(Estimator, solver): rng = np.random.mtrand.RandomState(42) # Test that the fit is not too far away pnmf = Estimator( 5, init="nndsvdar", random_state=0, max_iter=600, **solver, ) X = np.abs(rng.randn(6, 5)) assert pnmf.fit(X).reconstruction_err_ < 0.1 def test_nmf_true_reconstruction(): # Test that the fit is not too far away from an exact solution # (by construction) n_samples = 15 n_features = 10 n_components = 5 beta_loss = 1 batch_size = 3 max_iter = 1000 rng = np.random.mtrand.RandomState(42) W_true = np.zeros([n_samples, n_components]) W_array = np.abs(rng.randn(n_samples)) for j in range(n_components): W_true[j % n_samples, j] = W_array[j % n_samples] H_true = np.zeros([n_components, n_features]) H_array = np.abs(rng.randn(n_components)) for j in range(n_features): H_true[j % n_components, j] = H_array[j % n_components] X = np.dot(W_true, H_true) model = NMF( n_components=n_components, solver="mu", beta_loss=beta_loss, max_iter=max_iter, random_state=0, ) transf = model.fit_transform(X) X_calc = np.dot(transf, model.components_) assert model.reconstruction_err_ < 0.1 assert_allclose(X, X_calc) mbmodel = MiniBatchNMF( n_components=n_components, beta_loss=beta_loss, batch_size=batch_size, random_state=0, max_iter=max_iter, ) transf = mbmodel.fit_transform(X) X_calc = np.dot(transf, mbmodel.components_) assert mbmodel.reconstruction_err_ < 0.1 assert_allclose(X, X_calc, atol=1) @pytest.mark.parametrize("solver", ["cd", "mu"]) def test_nmf_transform(solver): # Test that fit_transform is equivalent to fit.transform for NMF # Test that NMF.transform returns close values rng = np.random.mtrand.RandomState(42) A = np.abs(rng.randn(6, 5)) m = NMF( solver=solver, n_components=3, init="random", random_state=0, tol=1e-6, ) ft = m.fit_transform(A) t = m.transform(A) assert_allclose(ft, t, atol=1e-1) def test_minibatch_nmf_transform(): # Test that fit_transform is equivalent to fit.transform for MiniBatchNMF # Only guaranteed with fresh restarts rng = np.random.mtrand.RandomState(42) A = np.abs(rng.randn(6, 5)) m = MiniBatchNMF( n_components=3, random_state=0, tol=1e-3, fresh_restarts=True, ) ft = m.fit_transform(A) t = m.transform(A) assert_allclose(ft, t) @pytest.mark.parametrize( ["Estimator", "solver"], [[NMF, {"solver": "cd"}], [NMF, {"solver": "mu"}], [MiniBatchNMF, {}]], ) def test_nmf_transform_custom_init(Estimator, solver): # Smoke test that checks if NMF.transform works with custom initialization random_state = np.random.RandomState(0) A = np.abs(random_state.randn(6, 5)) n_components = 4 avg = np.sqrt(A.mean() / n_components) H_init = np.abs(avg * random_state.randn(n_components, 5)) W_init = np.abs(avg * random_state.randn(6, n_components)) m = Estimator( n_components=n_components, init="custom", random_state=0, tol=1e-3, **solver ) m.fit_transform(A, W=W_init, H=H_init) m.transform(A) @pytest.mark.parametrize("solver", ("cd", "mu")) def test_nmf_inverse_transform(solver): # Test that NMF.inverse_transform returns close values random_state = np.random.RandomState(0) A = np.abs(random_state.randn(6, 4)) m = NMF( solver=solver, n_components=4, init="random", random_state=0, max_iter=1000, ) ft = m.fit_transform(A) A_new = m.inverse_transform(ft) assert_array_almost_equal(A, A_new, decimal=2) # TODO(1.6): remove the warning filter @pytest.mark.filterwarnings("ignore:The default value of `n_components` will change") def test_mbnmf_inverse_transform(): # Test that MiniBatchNMF.transform followed by MiniBatchNMF.inverse_transform # is close to the identity rng = np.random.RandomState(0) A = np.abs(rng.randn(6, 4)) nmf = MiniBatchNMF( random_state=rng, max_iter=500, init="nndsvdar", fresh_restarts=True, ) ft = nmf.fit_transform(A) A_new = nmf.inverse_transform(ft) assert_allclose(A, A_new, rtol=1e-3, atol=1e-2) @pytest.mark.parametrize("Estimator", [NMF, MiniBatchNMF]) def test_n_components_greater_n_features(Estimator): # Smoke test for the case of more components than features. rng = np.random.mtrand.RandomState(42) A = np.abs(rng.randn(30, 10)) Estimator(n_components=15, random_state=0, tol=1e-2).fit(A) @pytest.mark.parametrize( ["Estimator", "solver"], [[NMF, {"solver": "cd"}], [NMF, {"solver": "mu"}], [MiniBatchNMF, {}]], ) @pytest.mark.parametrize("sparse_container", CSC_CONTAINERS + CSR_CONTAINERS) @pytest.mark.parametrize("alpha_W", (0.0, 1.0)) @pytest.mark.parametrize("alpha_H", (0.0, 1.0, "same")) def test_nmf_sparse_input(Estimator, solver, sparse_container, alpha_W, alpha_H): # Test that sparse matrices are accepted as input rng = np.random.mtrand.RandomState(42) A = np.abs(rng.randn(10, 10)) A[:, 2 * np.arange(5)] = 0 A_sparse = sparse_container(A) est1 = Estimator( n_components=5, init="random", alpha_W=alpha_W, alpha_H=alpha_H, random_state=0, tol=0, max_iter=100, **solver, ) est2 = clone(est1) W1 = est1.fit_transform(A) W2 = est2.fit_transform(A_sparse) H1 = est1.components_ H2 = est2.components_ assert_allclose(W1, W2) assert_allclose(H1, H2) @pytest.mark.parametrize( ["Estimator", "solver"], [[NMF, {"solver": "cd"}], [NMF, {"solver": "mu"}], [MiniBatchNMF, {}]], ) @pytest.mark.parametrize("csc_container", CSC_CONTAINERS) def test_nmf_sparse_transform(Estimator, solver, csc_container): # Test that transform works on sparse data. Issue #2124 rng = np.random.mtrand.RandomState(42) A = np.abs(rng.randn(3, 2)) A[1, 1] = 0 A = csc_container(A) model = Estimator(random_state=0, n_components=2, max_iter=400, **solver) A_fit_tr = model.fit_transform(A) A_tr = model.transform(A) assert_allclose(A_fit_tr, A_tr, atol=1e-1) # TODO(1.6): remove the warning filter @pytest.mark.filterwarnings("ignore:The default value of `n_components` will change") @pytest.mark.parametrize("init", ["random", "nndsvd"]) @pytest.mark.parametrize("solver", ("cd", "mu")) @pytest.mark.parametrize("alpha_W", (0.0, 1.0)) @pytest.mark.parametrize("alpha_H", (0.0, 1.0, "same")) def test_non_negative_factorization_consistency(init, solver, alpha_W, alpha_H): # Test that the function is called in the same way, either directly # or through the NMF class max_iter = 500 rng = np.random.mtrand.RandomState(42) A = np.abs(rng.randn(10, 10)) A[:, 2 * np.arange(5)] = 0 W_nmf, H, _ = non_negative_factorization( A, init=init, solver=solver, max_iter=max_iter, alpha_W=alpha_W, alpha_H=alpha_H, random_state=1, tol=1e-2, ) W_nmf_2, H, _ = non_negative_factorization( A, H=H, update_H=False, init=init, solver=solver, max_iter=max_iter, alpha_W=alpha_W, alpha_H=alpha_H, random_state=1, tol=1e-2, ) model_class = NMF( init=init, solver=solver, max_iter=max_iter, alpha_W=alpha_W, alpha_H=alpha_H, random_state=1, tol=1e-2, ) W_cls = model_class.fit_transform(A) W_cls_2 = model_class.transform(A) assert_allclose(W_nmf, W_cls) assert_allclose(W_nmf_2, W_cls_2) def test_non_negative_factorization_checking(): # Note that the validity of parameter types and range of possible values # for scalar numerical or str parameters is already checked in the common # tests. Here we only check for problems that cannot be captured by simple # declarative constraints on the valid parameter values. A = np.ones((2, 2)) # Test parameters checking in public function nnmf = non_negative_factorization msg = re.escape("Negative values in data passed to NMF (input H)") with pytest.raises(ValueError, match=msg): nnmf(A, A, -A, 2, init="custom") msg = re.escape("Negative values in data passed to NMF (input W)") with pytest.raises(ValueError, match=msg): nnmf(A, -A, A, 2, init="custom") msg = re.escape("Array passed to NMF (input H) is full of zeros") with pytest.raises(ValueError, match=msg): nnmf(A, A, 0 * A, 2, init="custom") def _beta_divergence_dense(X, W, H, beta): """Compute the beta-divergence of X and W.H for dense array only. Used as a reference for testing nmf._beta_divergence. """ WH = np.dot(W, H) if beta == 2: return squared_norm(X - WH) / 2 WH_Xnonzero = WH[X != 0] X_nonzero = X[X != 0] np.maximum(WH_Xnonzero, 1e-9, out=WH_Xnonzero) if beta == 1: res = np.sum(X_nonzero * np.log(X_nonzero / WH_Xnonzero)) res += WH.sum() - X.sum() elif beta == 0: div = X_nonzero / WH_Xnonzero res = np.sum(div) - X.size - np.sum(np.log(div)) else: res = (X_nonzero**beta).sum() res += (beta - 1) * (WH**beta).sum() res -= beta * (X_nonzero * (WH_Xnonzero ** (beta - 1))).sum() res /= beta * (beta - 1) return res @pytest.mark.parametrize("csr_container", CSR_CONTAINERS) def test_beta_divergence(csr_container): # Compare _beta_divergence with the reference _beta_divergence_dense n_samples = 20 n_features = 10 n_components = 5 beta_losses = [0.0, 0.5, 1.0, 1.5, 2.0, 3.0] # initialization rng = np.random.mtrand.RandomState(42) X = rng.randn(n_samples, n_features) np.clip(X, 0, None, out=X) X_csr = csr_container(X) W, H = nmf._initialize_nmf(X, n_components, init="random", random_state=42) for beta in beta_losses: ref = _beta_divergence_dense(X, W, H, beta) loss = nmf._beta_divergence(X, W, H, beta) loss_csr = nmf._beta_divergence(X_csr, W, H, beta) assert_almost_equal(ref, loss, decimal=7) assert_almost_equal(ref, loss_csr, decimal=7) @pytest.mark.parametrize("csr_container", CSR_CONTAINERS) def test_special_sparse_dot(csr_container): # Test the function that computes np.dot(W, H), only where X is non zero. n_samples = 10 n_features = 5 n_components = 3 rng = np.random.mtrand.RandomState(42) X = rng.randn(n_samples, n_features) np.clip(X, 0, None, out=X) X_csr = csr_container(X) W = np.abs(rng.randn(n_samples, n_components)) H = np.abs(rng.randn(n_components, n_features)) WH_safe = nmf._special_sparse_dot(W, H, X_csr) WH = nmf._special_sparse_dot(W, H, X) # test that both results have same values, in X_csr nonzero elements ii, jj = X_csr.nonzero() WH_safe_data = np.asarray(WH_safe[ii, jj]).ravel() assert_array_almost_equal(WH_safe_data, WH[ii, jj], decimal=10) # test that WH_safe and X_csr have the same sparse structure assert_array_equal(WH_safe.indices, X_csr.indices) assert_array_equal(WH_safe.indptr, X_csr.indptr) assert_array_equal(WH_safe.shape, X_csr.shape) @ignore_warnings(category=ConvergenceWarning) @pytest.mark.parametrize("csr_container", CSR_CONTAINERS) def test_nmf_multiplicative_update_sparse(csr_container): # Compare sparse and dense input in multiplicative update NMF # Also test continuity of the results with respect to beta_loss parameter n_samples = 20 n_features = 10 n_components = 5 alpha = 0.1 l1_ratio = 0.5 n_iter = 20 # initialization rng = np.random.mtrand.RandomState(1337) X = rng.randn(n_samples, n_features) X = np.abs(X) X_csr = csr_container(X) W0, H0 = nmf._initialize_nmf(X, n_components, init="random", random_state=42) for beta_loss in (-1.2, 0, 0.2, 1.0, 2.0, 2.5): # Reference with dense array X W, H = W0.copy(), H0.copy() W1, H1, _ = non_negative_factorization( X, W, H, n_components, init="custom", update_H=True, solver="mu", beta_loss=beta_loss, max_iter=n_iter, alpha_W=alpha, l1_ratio=l1_ratio, random_state=42, ) # Compare with sparse X W, H = W0.copy(), H0.copy() W2, H2, _ = non_negative_factorization( X_csr, W, H, n_components, init="custom", update_H=True, solver="mu", beta_loss=beta_loss, max_iter=n_iter, alpha_W=alpha, l1_ratio=l1_ratio, random_state=42, ) assert_allclose(W1, W2, atol=1e-7) assert_allclose(H1, H2, atol=1e-7) # Compare with almost same beta_loss, since some values have a specific # behavior, but the results should be continuous w.r.t beta_loss beta_loss -= 1.0e-5 W, H = W0.copy(), H0.copy() W3, H3, _ = non_negative_factorization( X_csr, W, H, n_components, init="custom", update_H=True, solver="mu", beta_loss=beta_loss, max_iter=n_iter, alpha_W=alpha, l1_ratio=l1_ratio, random_state=42, ) assert_allclose(W1, W3, atol=1e-4) assert_allclose(H1, H3, atol=1e-4) @pytest.mark.parametrize("csr_container", CSR_CONTAINERS) def test_nmf_negative_beta_loss(csr_container): # Test that an error is raised if beta_loss < 0 and X contains zeros. # Test that the output has not NaN values when the input contains zeros. n_samples = 6 n_features = 5 n_components = 3 rng = np.random.mtrand.RandomState(42) X = rng.randn(n_samples, n_features) np.clip(X, 0, None, out=X) X_csr = csr_container(X) def _assert_nmf_no_nan(X, beta_loss): W, H, _ = non_negative_factorization( X, init="random", n_components=n_components, solver="mu", beta_loss=beta_loss, random_state=0, max_iter=1000, ) assert not np.any(np.isnan(W)) assert not np.any(np.isnan(H)) msg = "When beta_loss <= 0 and X contains zeros, the solver may diverge." for beta_loss in (-0.6, 0.0): with pytest.raises(ValueError, match=msg): _assert_nmf_no_nan(X, beta_loss) _assert_nmf_no_nan(X + 1e-9, beta_loss) for beta_loss in (0.2, 1.0, 1.2, 2.0, 2.5): _assert_nmf_no_nan(X, beta_loss) _assert_nmf_no_nan(X_csr, beta_loss) # TODO(1.6): remove the warning filter @pytest.mark.filterwarnings("ignore:The default value of `n_components` will change") @pytest.mark.parametrize("beta_loss", [-0.5, 0.0]) def test_minibatch_nmf_negative_beta_loss(beta_loss): """Check that an error is raised if beta_loss < 0 and X contains zeros.""" rng = np.random.RandomState(0) X = rng.normal(size=(6, 5)) X[X < 0] = 0 nmf = MiniBatchNMF(beta_loss=beta_loss, random_state=0) msg = "When beta_loss <= 0 and X contains zeros, the solver may diverge." with pytest.raises(ValueError, match=msg): nmf.fit(X) @pytest.mark.parametrize( ["Estimator", "solver"], [[NMF, {"solver": "cd"}], [NMF, {"solver": "mu"}], [MiniBatchNMF, {}]], ) def test_nmf_regularization(Estimator, solver): # Test the effect of L1 and L2 regularizations n_samples = 6 n_features = 5 n_components = 3 rng = np.random.mtrand.RandomState(42) X = np.abs(rng.randn(n_samples, n_features)) # L1 regularization should increase the number of zeros l1_ratio = 1.0 regul = Estimator( n_components=n_components, alpha_W=0.5, l1_ratio=l1_ratio, random_state=42, **solver, ) model = Estimator( n_components=n_components, alpha_W=0.0, l1_ratio=l1_ratio, random_state=42, **solver, ) W_regul = regul.fit_transform(X) W_model = model.fit_transform(X) H_regul = regul.components_ H_model = model.components_ eps = np.finfo(np.float64).eps W_regul_n_zeros = W_regul[W_regul <= eps].size W_model_n_zeros = W_model[W_model <= eps].size H_regul_n_zeros = H_regul[H_regul <= eps].size H_model_n_zeros = H_model[H_model <= eps].size assert W_regul_n_zeros > W_model_n_zeros assert H_regul_n_zeros > H_model_n_zeros # L2 regularization should decrease the sum of the squared norm # of the matrices W and H l1_ratio = 0.0 regul = Estimator( n_components=n_components, alpha_W=0.5, l1_ratio=l1_ratio, random_state=42, **solver, ) model = Estimator( n_components=n_components, alpha_W=0.0, l1_ratio=l1_ratio, random_state=42, **solver, ) W_regul = regul.fit_transform(X) W_model = model.fit_transform(X) H_regul = regul.components_ H_model = model.components_ assert (linalg.norm(W_model)) ** 2.0 + (linalg.norm(H_model)) ** 2.0 > ( linalg.norm(W_regul) ) ** 2.0 + (linalg.norm(H_regul)) ** 2.0 @ignore_warnings(category=ConvergenceWarning) @pytest.mark.parametrize("solver", ("cd", "mu")) def test_nmf_decreasing(solver): # test that the objective function is decreasing at each iteration n_samples = 20 n_features = 15 n_components = 10 alpha = 0.1 l1_ratio = 0.5 tol = 0.0 # initialization rng = np.random.mtrand.RandomState(42) X = rng.randn(n_samples, n_features) np.abs(X, X) W0, H0 = nmf._initialize_nmf(X, n_components, init="random", random_state=42) for beta_loss in (-1.2, 0, 0.2, 1.0, 2.0, 2.5): if solver != "mu" and beta_loss != 2: # not implemented continue W, H = W0.copy(), H0.copy() previous_loss = None for _ in range(30): # one more iteration starting from the previous results W, H, _ = non_negative_factorization( X, W, H, beta_loss=beta_loss, init="custom", n_components=n_components, max_iter=1, alpha_W=alpha, solver=solver, tol=tol, l1_ratio=l1_ratio, verbose=0, random_state=0, update_H=True, ) loss = ( nmf._beta_divergence(X, W, H, beta_loss) + alpha * l1_ratio * n_features * W.sum() + alpha * l1_ratio * n_samples * H.sum() + alpha * (1 - l1_ratio) * n_features * (W**2).sum() + alpha * (1 - l1_ratio) * n_samples * (H**2).sum() ) if previous_loss is not None: assert previous_loss > loss previous_loss = loss def test_nmf_underflow(): # Regression test for an underflow issue in _beta_divergence rng = np.random.RandomState(0) n_samples, n_features, n_components = 10, 2, 2 X = np.abs(rng.randn(n_samples, n_features)) * 10 W = np.abs(rng.randn(n_samples, n_components)) * 10 H = np.abs(rng.randn(n_components, n_features)) X[0, 0] = 0 ref = nmf._beta_divergence(X, W, H, beta=1.0) X[0, 0] = 1e-323 res = nmf._beta_divergence(X, W, H, beta=1.0) assert_almost_equal(res, ref) # TODO(1.6): remove the warning filter @pytest.mark.filterwarnings("ignore:The default value of `n_components` will change") @pytest.mark.parametrize( "dtype_in, dtype_out", [ (np.float32, np.float32), (np.float64, np.float64), (np.int32, np.float64), (np.int64, np.float64), ], ) @pytest.mark.parametrize( ["Estimator", "solver"], [[NMF, {"solver": "cd"}], [NMF, {"solver": "mu"}], [MiniBatchNMF, {}]], ) def test_nmf_dtype_match(Estimator, solver, dtype_in, dtype_out): # Check that NMF preserves dtype (float32 and float64) X = np.random.RandomState(0).randn(20, 15).astype(dtype_in, copy=False) np.abs(X, out=X) nmf = Estimator( alpha_W=1.0, alpha_H=1.0, tol=1e-2, random_state=0, **solver, ) assert nmf.fit(X).transform(X).dtype == dtype_out assert nmf.fit_transform(X).dtype == dtype_out assert nmf.components_.dtype == dtype_out # TODO(1.6): remove the warning filter @pytest.mark.filterwarnings("ignore:The default value of `n_components` will change") @pytest.mark.parametrize( ["Estimator", "solver"], [[NMF, {"solver": "cd"}], [NMF, {"solver": "mu"}], [MiniBatchNMF, {}]], ) def test_nmf_float32_float64_consistency(Estimator, solver): # Check that the result of NMF is the same between float32 and float64 X = np.random.RandomState(0).randn(50, 7) np.abs(X, out=X) nmf32 = Estimator(random_state=0, tol=1e-3, **solver) W32 = nmf32.fit_transform(X.astype(np.float32)) nmf64 = Estimator(random_state=0, tol=1e-3, **solver) W64 = nmf64.fit_transform(X) assert_allclose(W32, W64, atol=1e-5) # TODO(1.6): remove the warning filter @pytest.mark.filterwarnings("ignore:The default value of `n_components` will change") @pytest.mark.parametrize("Estimator", [NMF, MiniBatchNMF]) def test_nmf_custom_init_dtype_error(Estimator): # Check that an error is raise if custom H and/or W don't have the same # dtype as X. rng = np.random.RandomState(0) X = rng.random_sample((20, 15)) H = rng.random_sample((15, 15)).astype(np.float32) W = rng.random_sample((20, 15)) with pytest.raises(TypeError, match="should have the same dtype as X"): Estimator(init="custom").fit(X, H=H, W=W) with pytest.raises(TypeError, match="should have the same dtype as X"): non_negative_factorization(X, H=H, update_H=False) @pytest.mark.parametrize("beta_loss", [-0.5, 0, 0.5, 1, 1.5, 2, 2.5]) def test_nmf_minibatchnmf_equivalence(beta_loss): # Test that MiniBatchNMF is equivalent to NMF when batch_size = n_samples and # forget_factor 0.0 (stopping criterion put aside) rng = np.random.mtrand.RandomState(42) X = np.abs(rng.randn(48, 5)) nmf = NMF( n_components=5, beta_loss=beta_loss, solver="mu", random_state=0, tol=0, ) mbnmf = MiniBatchNMF( n_components=5, beta_loss=beta_loss, random_state=0, tol=0, max_no_improvement=None, batch_size=X.shape[0], forget_factor=0.0, ) W = nmf.fit_transform(X) mbW = mbnmf.fit_transform(X) assert_allclose(W, mbW) def test_minibatch_nmf_partial_fit(): # Check fit / partial_fit equivalence. Applicable only with fresh restarts. rng = np.random.mtrand.RandomState(42) X = np.abs(rng.randn(100, 5)) n_components = 5 batch_size = 10 max_iter = 2 mbnmf1 = MiniBatchNMF( n_components=n_components, init="custom", random_state=0, max_iter=max_iter, batch_size=batch_size, tol=0, max_no_improvement=None, fresh_restarts=False, ) mbnmf2 = MiniBatchNMF(n_components=n_components, init="custom", random_state=0) # Force the same init of H (W is recomputed anyway) to be able to compare results. W, H = nmf._initialize_nmf( X, n_components=n_components, init="random", random_state=0 ) mbnmf1.fit(X, W=W, H=H) for i in range(max_iter): for j in range(batch_size): mbnmf2.partial_fit(X[j : j + batch_size], W=W[:batch_size], H=H) assert mbnmf1.n_steps_ == mbnmf2.n_steps_ assert_allclose(mbnmf1.components_, mbnmf2.components_) def test_feature_names_out(): """Check feature names out for NMF.""" random_state = np.random.RandomState(0) X = np.abs(random_state.randn(10, 4)) nmf = NMF(n_components=3).fit(X) names = nmf.get_feature_names_out() assert_array_equal([f"nmf{i}" for i in range(3)], names) # TODO(1.6): remove the warning filter @pytest.mark.filterwarnings("ignore:The default value of `n_components` will change") def test_minibatch_nmf_verbose(): # Check verbose mode of MiniBatchNMF for better coverage. A = np.random.RandomState(0).random_sample((100, 10)) nmf = MiniBatchNMF(tol=1e-2, random_state=0, verbose=1) old_stdout = sys.stdout sys.stdout = StringIO() try: nmf.fit(A) finally: sys.stdout = old_stdout # TODO(1.5): remove this test def test_NMF_inverse_transform_W_deprecation(): rng = np.random.mtrand.RandomState(42) A = np.abs(rng.randn(6, 5)) est = NMF( n_components=3, init="random", random_state=0, tol=1e-6, ) Xt = est.fit_transform(A) with pytest.raises(TypeError, match="Missing required positional argument"): est.inverse_transform() with pytest.raises(ValueError, match="Please provide only"): est.inverse_transform(Xt=Xt, W=Xt) with warnings.catch_warnings(record=True): warnings.simplefilter("error") est.inverse_transform(Xt) with pytest.warns(FutureWarning, match="Input argument `W` was renamed to `Xt`"): est.inverse_transform(W=Xt) @pytest.mark.parametrize("Estimator", [NMF, MiniBatchNMF]) def test_nmf_n_components_auto(Estimator): # Check that n_components is correctly inferred # from the provided custom initialization. rng = np.random.RandomState(0) X = rng.random_sample((6, 5)) W = rng.random_sample((6, 2)) H = rng.random_sample((2, 5)) est = Estimator( n_components="auto", init="custom", random_state=0, tol=1e-6, ) est.fit_transform(X, W=W, H=H) assert est._n_components == H.shape[0] def test_nmf_non_negative_factorization_n_components_auto(): # Check that n_components is correctly inferred from the provided # custom initialization. rng = np.random.RandomState(0) X = rng.random_sample((6, 5)) W_init = rng.random_sample((6, 2)) H_init = rng.random_sample((2, 5)) W, H, _ = non_negative_factorization( X, W=W_init, H=H_init, init="custom", n_components="auto" ) assert H.shape == H_init.shape assert W.shape == W_init.shape # TODO(1.6): remove def test_nmf_n_components_default_value_warning(): rng = np.random.RandomState(0) X = rng.random_sample((6, 5)) H = rng.random_sample((2, 5)) with pytest.warns( FutureWarning, match="The default value of `n_components` will change from" ): non_negative_factorization(X, H=H) def test_nmf_n_components_auto_no_h_update(): # Tests that non_negative_factorization does not fail when setting # n_components="auto" also tests that the inferred n_component # value is the right one. rng = np.random.RandomState(0) X = rng.random_sample((6, 5)) H_true = rng.random_sample((2, 5)) W, H, _ = non_negative_factorization( X, H=H_true, n_components="auto", update_H=False ) # should not fail assert_allclose(H, H_true) assert W.shape == (X.shape[0], H_true.shape[0]) def test_nmf_w_h_not_used_warning(): # Check that warnings are raised if user provided W and H are not used # and initialization overrides value of W or H rng = np.random.RandomState(0) X = rng.random_sample((6, 5)) W_init = rng.random_sample((6, 2)) H_init = rng.random_sample((2, 5)) with pytest.warns( RuntimeWarning, match="When init!='custom', provided W or H are ignored", ): non_negative_factorization(X, H=H_init, update_H=True, n_components="auto") with pytest.warns( RuntimeWarning, match="When init!='custom', provided W or H are ignored", ): non_negative_factorization( X, W=W_init, H=H_init, update_H=True, n_components="auto" ) with pytest.warns( RuntimeWarning, match="When update_H=False, the provided initial W is not used." ): # When update_H is False, W is ignored regardless of init # TODO: use the provided W when init="custom". non_negative_factorization( X, W=W_init, H=H_init, update_H=False, n_components="auto" ) def test_nmf_custom_init_shape_error(): # Check that an informative error is raised when custom initialization does not # have the right shape rng = np.random.RandomState(0) X = rng.random_sample((6, 5)) H = rng.random_sample((2, 5)) nmf = NMF(n_components=2, init="custom", random_state=0) with pytest.raises(ValueError, match="Array with wrong first dimension passed"): nmf.fit(X, H=H, W=rng.random_sample((5, 2))) with pytest.raises(ValueError, match="Array with wrong second dimension passed"): nmf.fit(X, H=H, W=rng.random_sample((6, 3)))