ai-content-maker/.venv/Lib/site-packages/sklearn/linear_model/_omp.py

1098 lines
36 KiB
Python
Raw Normal View History

2024-05-03 04:18:51 +03:00
"""Orthogonal matching pursuit algorithms
"""
# Author: Vlad Niculae
#
# License: BSD 3 clause
import warnings
from math import sqrt
from numbers import Integral, Real
import numpy as np
from scipy import linalg
from scipy.linalg.lapack import get_lapack_funcs
from ..base import MultiOutputMixin, RegressorMixin, _fit_context
from ..model_selection import check_cv
from ..utils import Bunch, as_float_array, check_array
from ..utils._param_validation import Interval, StrOptions, validate_params
from ..utils.metadata_routing import (
MetadataRouter,
MethodMapping,
_raise_for_params,
_routing_enabled,
process_routing,
)
from ..utils.parallel import Parallel, delayed
from ._base import LinearModel, _pre_fit
premature = (
"Orthogonal matching pursuit ended prematurely due to linear"
" dependence in the dictionary. The requested precision might"
" not have been met."
)
def _cholesky_omp(X, y, n_nonzero_coefs, tol=None, copy_X=True, return_path=False):
"""Orthogonal Matching Pursuit step using the Cholesky decomposition.
Parameters
----------
X : ndarray of shape (n_samples, n_features)
Input dictionary. Columns are assumed to have unit norm.
y : ndarray of shape (n_samples,)
Input targets.
n_nonzero_coefs : int
Targeted number of non-zero elements.
tol : float, default=None
Targeted squared error, if not None overrides n_nonzero_coefs.
copy_X : bool, default=True
Whether the design matrix X must be copied by the algorithm. A false
value is only helpful if X is already Fortran-ordered, otherwise a
copy is made anyway.
return_path : bool, default=False
Whether to return every value of the nonzero coefficients along the
forward path. Useful for cross-validation.
Returns
-------
gamma : ndarray of shape (n_nonzero_coefs,)
Non-zero elements of the solution.
idx : ndarray of shape (n_nonzero_coefs,)
Indices of the positions of the elements in gamma within the solution
vector.
coef : ndarray of shape (n_features, n_nonzero_coefs)
The first k values of column k correspond to the coefficient value
for the active features at that step. The lower left triangle contains
garbage. Only returned if ``return_path=True``.
n_active : int
Number of active features at convergence.
"""
if copy_X:
X = X.copy("F")
else: # even if we are allowed to overwrite, still copy it if bad order
X = np.asfortranarray(X)
min_float = np.finfo(X.dtype).eps
nrm2, swap = linalg.get_blas_funcs(("nrm2", "swap"), (X,))
(potrs,) = get_lapack_funcs(("potrs",), (X,))
alpha = np.dot(X.T, y)
residual = y
gamma = np.empty(0)
n_active = 0
indices = np.arange(X.shape[1]) # keeping track of swapping
max_features = X.shape[1] if tol is not None else n_nonzero_coefs
L = np.empty((max_features, max_features), dtype=X.dtype)
if return_path:
coefs = np.empty_like(L)
while True:
lam = np.argmax(np.abs(np.dot(X.T, residual)))
if lam < n_active or alpha[lam] ** 2 < min_float:
# atom already selected or inner product too small
warnings.warn(premature, RuntimeWarning, stacklevel=2)
break
if n_active > 0:
# Updates the Cholesky decomposition of X' X
L[n_active, :n_active] = np.dot(X[:, :n_active].T, X[:, lam])
linalg.solve_triangular(
L[:n_active, :n_active],
L[n_active, :n_active],
trans=0,
lower=1,
overwrite_b=True,
check_finite=False,
)
v = nrm2(L[n_active, :n_active]) ** 2
Lkk = linalg.norm(X[:, lam]) ** 2 - v
if Lkk <= min_float: # selected atoms are dependent
warnings.warn(premature, RuntimeWarning, stacklevel=2)
break
L[n_active, n_active] = sqrt(Lkk)
else:
L[0, 0] = linalg.norm(X[:, lam])
X.T[n_active], X.T[lam] = swap(X.T[n_active], X.T[lam])
alpha[n_active], alpha[lam] = alpha[lam], alpha[n_active]
indices[n_active], indices[lam] = indices[lam], indices[n_active]
n_active += 1
# solves LL'x = X'y as a composition of two triangular systems
gamma, _ = potrs(
L[:n_active, :n_active], alpha[:n_active], lower=True, overwrite_b=False
)
if return_path:
coefs[:n_active, n_active - 1] = gamma
residual = y - np.dot(X[:, :n_active], gamma)
if tol is not None and nrm2(residual) ** 2 <= tol:
break
elif n_active == max_features:
break
if return_path:
return gamma, indices[:n_active], coefs[:, :n_active], n_active
else:
return gamma, indices[:n_active], n_active
def _gram_omp(
Gram,
Xy,
n_nonzero_coefs,
tol_0=None,
tol=None,
copy_Gram=True,
copy_Xy=True,
return_path=False,
):
"""Orthogonal Matching Pursuit step on a precomputed Gram matrix.
This function uses the Cholesky decomposition method.
Parameters
----------
Gram : ndarray of shape (n_features, n_features)
Gram matrix of the input data matrix.
Xy : ndarray of shape (n_features,)
Input targets.
n_nonzero_coefs : int
Targeted number of non-zero elements.
tol_0 : float, default=None
Squared norm of y, required if tol is not None.
tol : float, default=None
Targeted squared error, if not None overrides n_nonzero_coefs.
copy_Gram : bool, default=True
Whether the gram matrix must be copied by the algorithm. A false
value is only helpful if it is already Fortran-ordered, otherwise a
copy is made anyway.
copy_Xy : bool, default=True
Whether the covariance vector Xy must be copied by the algorithm.
If False, it may be overwritten.
return_path : bool, default=False
Whether to return every value of the nonzero coefficients along the
forward path. Useful for cross-validation.
Returns
-------
gamma : ndarray of shape (n_nonzero_coefs,)
Non-zero elements of the solution.
idx : ndarray of shape (n_nonzero_coefs,)
Indices of the positions of the elements in gamma within the solution
vector.
coefs : ndarray of shape (n_features, n_nonzero_coefs)
The first k values of column k correspond to the coefficient value
for the active features at that step. The lower left triangle contains
garbage. Only returned if ``return_path=True``.
n_active : int
Number of active features at convergence.
"""
Gram = Gram.copy("F") if copy_Gram else np.asfortranarray(Gram)
if copy_Xy or not Xy.flags.writeable:
Xy = Xy.copy()
min_float = np.finfo(Gram.dtype).eps
nrm2, swap = linalg.get_blas_funcs(("nrm2", "swap"), (Gram,))
(potrs,) = get_lapack_funcs(("potrs",), (Gram,))
indices = np.arange(len(Gram)) # keeping track of swapping
alpha = Xy
tol_curr = tol_0
delta = 0
gamma = np.empty(0)
n_active = 0
max_features = len(Gram) if tol is not None else n_nonzero_coefs
L = np.empty((max_features, max_features), dtype=Gram.dtype)
L[0, 0] = 1.0
if return_path:
coefs = np.empty_like(L)
while True:
lam = np.argmax(np.abs(alpha))
if lam < n_active or alpha[lam] ** 2 < min_float:
# selected same atom twice, or inner product too small
warnings.warn(premature, RuntimeWarning, stacklevel=3)
break
if n_active > 0:
L[n_active, :n_active] = Gram[lam, :n_active]
linalg.solve_triangular(
L[:n_active, :n_active],
L[n_active, :n_active],
trans=0,
lower=1,
overwrite_b=True,
check_finite=False,
)
v = nrm2(L[n_active, :n_active]) ** 2
Lkk = Gram[lam, lam] - v
if Lkk <= min_float: # selected atoms are dependent
warnings.warn(premature, RuntimeWarning, stacklevel=3)
break
L[n_active, n_active] = sqrt(Lkk)
else:
L[0, 0] = sqrt(Gram[lam, lam])
Gram[n_active], Gram[lam] = swap(Gram[n_active], Gram[lam])
Gram.T[n_active], Gram.T[lam] = swap(Gram.T[n_active], Gram.T[lam])
indices[n_active], indices[lam] = indices[lam], indices[n_active]
Xy[n_active], Xy[lam] = Xy[lam], Xy[n_active]
n_active += 1
# solves LL'x = X'y as a composition of two triangular systems
gamma, _ = potrs(
L[:n_active, :n_active], Xy[:n_active], lower=True, overwrite_b=False
)
if return_path:
coefs[:n_active, n_active - 1] = gamma
beta = np.dot(Gram[:, :n_active], gamma)
alpha = Xy - beta
if tol is not None:
tol_curr += delta
delta = np.inner(gamma, beta[:n_active])
tol_curr -= delta
if abs(tol_curr) <= tol:
break
elif n_active == max_features:
break
if return_path:
return gamma, indices[:n_active], coefs[:, :n_active], n_active
else:
return gamma, indices[:n_active], n_active
@validate_params(
{
"X": ["array-like"],
"y": [np.ndarray],
"n_nonzero_coefs": [Interval(Integral, 1, None, closed="left"), None],
"tol": [Interval(Real, 0, None, closed="left"), None],
"precompute": ["boolean", StrOptions({"auto"})],
"copy_X": ["boolean"],
"return_path": ["boolean"],
"return_n_iter": ["boolean"],
},
prefer_skip_nested_validation=True,
)
def orthogonal_mp(
X,
y,
*,
n_nonzero_coefs=None,
tol=None,
precompute=False,
copy_X=True,
return_path=False,
return_n_iter=False,
):
r"""Orthogonal Matching Pursuit (OMP).
Solves n_targets Orthogonal Matching Pursuit problems.
An instance of the problem has the form:
When parametrized by the number of non-zero coefficients using
`n_nonzero_coefs`:
argmin ||y - X\gamma||^2 subject to ||\gamma||_0 <= n_{nonzero coefs}
When parametrized by error using the parameter `tol`:
argmin ||\gamma||_0 subject to ||y - X\gamma||^2 <= tol
Read more in the :ref:`User Guide <omp>`.
Parameters
----------
X : array-like of shape (n_samples, n_features)
Input data. Columns are assumed to have unit norm.
y : ndarray of shape (n_samples,) or (n_samples, n_targets)
Input targets.
n_nonzero_coefs : int, default=None
Desired number of non-zero entries in the solution. If None (by
default) this value is set to 10% of n_features.
tol : float, default=None
Maximum squared norm of the residual. If not None, overrides n_nonzero_coefs.
precompute : 'auto' or bool, default=False
Whether to perform precomputations. Improves performance when n_targets
or n_samples is very large.
copy_X : bool, default=True
Whether the design matrix X must be copied by the algorithm. A false
value is only helpful if X is already Fortran-ordered, otherwise a
copy is made anyway.
return_path : bool, default=False
Whether to return every value of the nonzero coefficients along the
forward path. Useful for cross-validation.
return_n_iter : bool, default=False
Whether or not to return the number of iterations.
Returns
-------
coef : ndarray of shape (n_features,) or (n_features, n_targets)
Coefficients of the OMP solution. If `return_path=True`, this contains
the whole coefficient path. In this case its shape is
(n_features, n_features) or (n_features, n_targets, n_features) and
iterating over the last axis generates coefficients in increasing order
of active features.
n_iters : array-like or int
Number of active features across every target. Returned only if
`return_n_iter` is set to True.
See Also
--------
OrthogonalMatchingPursuit : Orthogonal Matching Pursuit model.
orthogonal_mp_gram : Solve OMP problems using Gram matrix and the product X.T * y.
lars_path : Compute Least Angle Regression or Lasso path using LARS algorithm.
sklearn.decomposition.sparse_encode : Sparse coding.
Notes
-----
Orthogonal matching pursuit was introduced in S. Mallat, Z. Zhang,
Matching pursuits with time-frequency dictionaries, IEEE Transactions on
Signal Processing, Vol. 41, No. 12. (December 1993), pp. 3397-3415.
(https://www.di.ens.fr/~mallat/papiers/MallatPursuit93.pdf)
This implementation is based on Rubinstein, R., Zibulevsky, M. and Elad,
M., Efficient Implementation of the K-SVD Algorithm using Batch Orthogonal
Matching Pursuit Technical Report - CS Technion, April 2008.
https://www.cs.technion.ac.il/~ronrubin/Publications/KSVD-OMP-v2.pdf
"""
X = check_array(X, order="F", copy=copy_X)
copy_X = False
if y.ndim == 1:
y = y.reshape(-1, 1)
y = check_array(y)
if y.shape[1] > 1: # subsequent targets will be affected
copy_X = True
if n_nonzero_coefs is None and tol is None:
# default for n_nonzero_coefs is 0.1 * n_features
# but at least one.
n_nonzero_coefs = max(int(0.1 * X.shape[1]), 1)
if tol is None and n_nonzero_coefs > X.shape[1]:
raise ValueError(
"The number of atoms cannot be more than the number of features"
)
if precompute == "auto":
precompute = X.shape[0] > X.shape[1]
if precompute:
G = np.dot(X.T, X)
G = np.asfortranarray(G)
Xy = np.dot(X.T, y)
if tol is not None:
norms_squared = np.sum((y**2), axis=0)
else:
norms_squared = None
return orthogonal_mp_gram(
G,
Xy,
n_nonzero_coefs=n_nonzero_coefs,
tol=tol,
norms_squared=norms_squared,
copy_Gram=copy_X,
copy_Xy=False,
return_path=return_path,
)
if return_path:
coef = np.zeros((X.shape[1], y.shape[1], X.shape[1]))
else:
coef = np.zeros((X.shape[1], y.shape[1]))
n_iters = []
for k in range(y.shape[1]):
out = _cholesky_omp(
X, y[:, k], n_nonzero_coefs, tol, copy_X=copy_X, return_path=return_path
)
if return_path:
_, idx, coefs, n_iter = out
coef = coef[:, :, : len(idx)]
for n_active, x in enumerate(coefs.T):
coef[idx[: n_active + 1], k, n_active] = x[: n_active + 1]
else:
x, idx, n_iter = out
coef[idx, k] = x
n_iters.append(n_iter)
if y.shape[1] == 1:
n_iters = n_iters[0]
if return_n_iter:
return np.squeeze(coef), n_iters
else:
return np.squeeze(coef)
@validate_params(
{
"Gram": ["array-like"],
"Xy": ["array-like"],
"n_nonzero_coefs": [Interval(Integral, 0, None, closed="neither"), None],
"tol": [Interval(Real, 0, None, closed="left"), None],
"norms_squared": ["array-like", None],
"copy_Gram": ["boolean"],
"copy_Xy": ["boolean"],
"return_path": ["boolean"],
"return_n_iter": ["boolean"],
},
prefer_skip_nested_validation=True,
)
def orthogonal_mp_gram(
Gram,
Xy,
*,
n_nonzero_coefs=None,
tol=None,
norms_squared=None,
copy_Gram=True,
copy_Xy=True,
return_path=False,
return_n_iter=False,
):
"""Gram Orthogonal Matching Pursuit (OMP).
Solves n_targets Orthogonal Matching Pursuit problems using only
the Gram matrix X.T * X and the product X.T * y.
Read more in the :ref:`User Guide <omp>`.
Parameters
----------
Gram : array-like of shape (n_features, n_features)
Gram matrix of the input data: `X.T * X`.
Xy : array-like of shape (n_features,) or (n_features, n_targets)
Input targets multiplied by `X`: `X.T * y`.
n_nonzero_coefs : int, default=None
Desired number of non-zero entries in the solution. If `None` (by
default) this value is set to 10% of n_features.
tol : float, default=None
Maximum squared norm of the residual. If not `None`,
overrides `n_nonzero_coefs`.
norms_squared : array-like of shape (n_targets,), default=None
Squared L2 norms of the lines of `y`. Required if `tol` is not None.
copy_Gram : bool, default=True
Whether the gram matrix must be copied by the algorithm. A `False`
value is only helpful if it is already Fortran-ordered, otherwise a
copy is made anyway.
copy_Xy : bool, default=True
Whether the covariance vector `Xy` must be copied by the algorithm.
If `False`, it may be overwritten.
return_path : bool, default=False
Whether to return every value of the nonzero coefficients along the
forward path. Useful for cross-validation.
return_n_iter : bool, default=False
Whether or not to return the number of iterations.
Returns
-------
coef : ndarray of shape (n_features,) or (n_features, n_targets)
Coefficients of the OMP solution. If `return_path=True`, this contains
the whole coefficient path. In this case its shape is
`(n_features, n_features)` or `(n_features, n_targets, n_features)` and
iterating over the last axis yields coefficients in increasing order
of active features.
n_iters : list or int
Number of active features across every target. Returned only if
`return_n_iter` is set to True.
See Also
--------
OrthogonalMatchingPursuit : Orthogonal Matching Pursuit model (OMP).
orthogonal_mp : Solves n_targets Orthogonal Matching Pursuit problems.
lars_path : Compute Least Angle Regression or Lasso path using
LARS algorithm.
sklearn.decomposition.sparse_encode : Generic sparse coding.
Each column of the result is the solution to a Lasso problem.
Notes
-----
Orthogonal matching pursuit was introduced in G. Mallat, Z. Zhang,
Matching pursuits with time-frequency dictionaries, IEEE Transactions on
Signal Processing, Vol. 41, No. 12. (December 1993), pp. 3397-3415.
(https://www.di.ens.fr/~mallat/papiers/MallatPursuit93.pdf)
This implementation is based on Rubinstein, R., Zibulevsky, M. and Elad,
M., Efficient Implementation of the K-SVD Algorithm using Batch Orthogonal
Matching Pursuit Technical Report - CS Technion, April 2008.
https://www.cs.technion.ac.il/~ronrubin/Publications/KSVD-OMP-v2.pdf
"""
Gram = check_array(Gram, order="F", copy=copy_Gram)
Xy = np.asarray(Xy)
if Xy.ndim > 1 and Xy.shape[1] > 1:
# or subsequent target will be affected
copy_Gram = True
if Xy.ndim == 1:
Xy = Xy[:, np.newaxis]
if tol is not None:
norms_squared = [norms_squared]
if copy_Xy or not Xy.flags.writeable:
# Make the copy once instead of many times in _gram_omp itself.
Xy = Xy.copy()
if n_nonzero_coefs is None and tol is None:
n_nonzero_coefs = int(0.1 * len(Gram))
if tol is not None and norms_squared is None:
raise ValueError(
"Gram OMP needs the precomputed norms in order "
"to evaluate the error sum of squares."
)
if tol is not None and tol < 0:
raise ValueError("Epsilon cannot be negative")
if tol is None and n_nonzero_coefs <= 0:
raise ValueError("The number of atoms must be positive")
if tol is None and n_nonzero_coefs > len(Gram):
raise ValueError(
"The number of atoms cannot be more than the number of features"
)
if return_path:
coef = np.zeros((len(Gram), Xy.shape[1], len(Gram)), dtype=Gram.dtype)
else:
coef = np.zeros((len(Gram), Xy.shape[1]), dtype=Gram.dtype)
n_iters = []
for k in range(Xy.shape[1]):
out = _gram_omp(
Gram,
Xy[:, k],
n_nonzero_coefs,
norms_squared[k] if tol is not None else None,
tol,
copy_Gram=copy_Gram,
copy_Xy=False,
return_path=return_path,
)
if return_path:
_, idx, coefs, n_iter = out
coef = coef[:, :, : len(idx)]
for n_active, x in enumerate(coefs.T):
coef[idx[: n_active + 1], k, n_active] = x[: n_active + 1]
else:
x, idx, n_iter = out
coef[idx, k] = x
n_iters.append(n_iter)
if Xy.shape[1] == 1:
n_iters = n_iters[0]
if return_n_iter:
return np.squeeze(coef), n_iters
else:
return np.squeeze(coef)
class OrthogonalMatchingPursuit(MultiOutputMixin, RegressorMixin, LinearModel):
"""Orthogonal Matching Pursuit model (OMP).
Read more in the :ref:`User Guide <omp>`.
Parameters
----------
n_nonzero_coefs : int, default=None
Desired number of non-zero entries in the solution. If None (by
default) this value is set to 10% of n_features.
tol : float, default=None
Maximum squared norm of the residual. If not None, overrides n_nonzero_coefs.
fit_intercept : bool, default=True
Whether to calculate the intercept for this model. If set
to false, no intercept will be used in calculations
(i.e. data is expected to be centered).
precompute : 'auto' or bool, default='auto'
Whether to use a precomputed Gram and Xy matrix to speed up
calculations. Improves performance when :term:`n_targets` or
:term:`n_samples` is very large. Note that if you already have such
matrices, you can pass them directly to the fit method.
Attributes
----------
coef_ : ndarray of shape (n_features,) or (n_targets, n_features)
Parameter vector (w in the formula).
intercept_ : float or ndarray of shape (n_targets,)
Independent term in decision function.
n_iter_ : int or array-like
Number of active features across every target.
n_nonzero_coefs_ : int
The number of non-zero coefficients in the solution. If
`n_nonzero_coefs` is None and `tol` is None this value is either set
to 10% of `n_features` or 1, whichever is greater.
n_features_in_ : int
Number of features seen during :term:`fit`.
.. versionadded:: 0.24
feature_names_in_ : ndarray of shape (`n_features_in_`,)
Names of features seen during :term:`fit`. Defined only when `X`
has feature names that are all strings.
.. versionadded:: 1.0
See Also
--------
orthogonal_mp : Solves n_targets Orthogonal Matching Pursuit problems.
orthogonal_mp_gram : Solves n_targets Orthogonal Matching Pursuit
problems using only the Gram matrix X.T * X and the product X.T * y.
lars_path : Compute Least Angle Regression or Lasso path using LARS algorithm.
Lars : Least Angle Regression model a.k.a. LAR.
LassoLars : Lasso model fit with Least Angle Regression a.k.a. Lars.
sklearn.decomposition.sparse_encode : Generic sparse coding.
Each column of the result is the solution to a Lasso problem.
OrthogonalMatchingPursuitCV : Cross-validated
Orthogonal Matching Pursuit model (OMP).
Notes
-----
Orthogonal matching pursuit was introduced in G. Mallat, Z. Zhang,
Matching pursuits with time-frequency dictionaries, IEEE Transactions on
Signal Processing, Vol. 41, No. 12. (December 1993), pp. 3397-3415.
(https://www.di.ens.fr/~mallat/papiers/MallatPursuit93.pdf)
This implementation is based on Rubinstein, R., Zibulevsky, M. and Elad,
M., Efficient Implementation of the K-SVD Algorithm using Batch Orthogonal
Matching Pursuit Technical Report - CS Technion, April 2008.
https://www.cs.technion.ac.il/~ronrubin/Publications/KSVD-OMP-v2.pdf
Examples
--------
>>> from sklearn.linear_model import OrthogonalMatchingPursuit
>>> from sklearn.datasets import make_regression
>>> X, y = make_regression(noise=4, random_state=0)
>>> reg = OrthogonalMatchingPursuit().fit(X, y)
>>> reg.score(X, y)
0.9991...
>>> reg.predict(X[:1,])
array([-78.3854...])
"""
_parameter_constraints: dict = {
"n_nonzero_coefs": [Interval(Integral, 1, None, closed="left"), None],
"tol": [Interval(Real, 0, None, closed="left"), None],
"fit_intercept": ["boolean"],
"precompute": [StrOptions({"auto"}), "boolean"],
}
def __init__(
self,
*,
n_nonzero_coefs=None,
tol=None,
fit_intercept=True,
precompute="auto",
):
self.n_nonzero_coefs = n_nonzero_coefs
self.tol = tol
self.fit_intercept = fit_intercept
self.precompute = precompute
@_fit_context(prefer_skip_nested_validation=True)
def fit(self, X, y):
"""Fit the model using X, y as training data.
Parameters
----------
X : array-like of shape (n_samples, n_features)
Training data.
y : array-like of shape (n_samples,) or (n_samples, n_targets)
Target values. Will be cast to X's dtype if necessary.
Returns
-------
self : object
Returns an instance of self.
"""
X, y = self._validate_data(X, y, multi_output=True, y_numeric=True)
n_features = X.shape[1]
X, y, X_offset, y_offset, X_scale, Gram, Xy = _pre_fit(
X, y, None, self.precompute, self.fit_intercept, copy=True
)
if y.ndim == 1:
y = y[:, np.newaxis]
if self.n_nonzero_coefs is None and self.tol is None:
# default for n_nonzero_coefs is 0.1 * n_features
# but at least one.
self.n_nonzero_coefs_ = max(int(0.1 * n_features), 1)
else:
self.n_nonzero_coefs_ = self.n_nonzero_coefs
if Gram is False:
coef_, self.n_iter_ = orthogonal_mp(
X,
y,
n_nonzero_coefs=self.n_nonzero_coefs_,
tol=self.tol,
precompute=False,
copy_X=True,
return_n_iter=True,
)
else:
norms_sq = np.sum(y**2, axis=0) if self.tol is not None else None
coef_, self.n_iter_ = orthogonal_mp_gram(
Gram,
Xy=Xy,
n_nonzero_coefs=self.n_nonzero_coefs_,
tol=self.tol,
norms_squared=norms_sq,
copy_Gram=True,
copy_Xy=True,
return_n_iter=True,
)
self.coef_ = coef_.T
self._set_intercept(X_offset, y_offset, X_scale)
return self
def _omp_path_residues(
X_train,
y_train,
X_test,
y_test,
copy=True,
fit_intercept=True,
max_iter=100,
):
"""Compute the residues on left-out data for a full LARS path.
Parameters
----------
X_train : ndarray of shape (n_samples, n_features)
The data to fit the LARS on.
y_train : ndarray of shape (n_samples)
The target variable to fit LARS on.
X_test : ndarray of shape (n_samples, n_features)
The data to compute the residues on.
y_test : ndarray of shape (n_samples)
The target variable to compute the residues on.
copy : bool, default=True
Whether X_train, X_test, y_train and y_test should be copied. If
False, they may be overwritten.
fit_intercept : bool, default=True
Whether to calculate the intercept for this model. If set
to false, no intercept will be used in calculations
(i.e. data is expected to be centered).
max_iter : int, default=100
Maximum numbers of iterations to perform, therefore maximum features
to include. 100 by default.
Returns
-------
residues : ndarray of shape (n_samples, max_features)
Residues of the prediction on the test data.
"""
if copy:
X_train = X_train.copy()
y_train = y_train.copy()
X_test = X_test.copy()
y_test = y_test.copy()
if fit_intercept:
X_mean = X_train.mean(axis=0)
X_train -= X_mean
X_test -= X_mean
y_mean = y_train.mean(axis=0)
y_train = as_float_array(y_train, copy=False)
y_train -= y_mean
y_test = as_float_array(y_test, copy=False)
y_test -= y_mean
coefs = orthogonal_mp(
X_train,
y_train,
n_nonzero_coefs=max_iter,
tol=None,
precompute=False,
copy_X=False,
return_path=True,
)
if coefs.ndim == 1:
coefs = coefs[:, np.newaxis]
return np.dot(coefs.T, X_test.T) - y_test
class OrthogonalMatchingPursuitCV(RegressorMixin, LinearModel):
"""Cross-validated Orthogonal Matching Pursuit model (OMP).
See glossary entry for :term:`cross-validation estimator`.
Read more in the :ref:`User Guide <omp>`.
Parameters
----------
copy : bool, default=True
Whether the design matrix X must be copied by the algorithm. A false
value is only helpful if X is already Fortran-ordered, otherwise a
copy is made anyway.
fit_intercept : bool, default=True
Whether to calculate the intercept for this model. If set
to false, no intercept will be used in calculations
(i.e. data is expected to be centered).
max_iter : int, default=None
Maximum numbers of iterations to perform, therefore maximum features
to include. 10% of ``n_features`` but at least 5 if available.
cv : int, cross-validation generator or iterable, default=None
Determines the cross-validation splitting strategy.
Possible inputs for cv are:
- None, to use the default 5-fold cross-validation,
- integer, to specify the number of folds.
- :term:`CV splitter`,
- An iterable yielding (train, test) splits as arrays of indices.
For integer/None inputs, :class:`~sklearn.model_selection.KFold` is used.
Refer :ref:`User Guide <cross_validation>` for the various
cross-validation strategies that can be used here.
.. versionchanged:: 0.22
``cv`` default value if None changed from 3-fold to 5-fold.
n_jobs : int, default=None
Number of CPUs to use during the cross validation.
``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.
``-1`` means using all processors. See :term:`Glossary <n_jobs>`
for more details.
verbose : bool or int, default=False
Sets the verbosity amount.
Attributes
----------
intercept_ : float or ndarray of shape (n_targets,)
Independent term in decision function.
coef_ : ndarray of shape (n_features,) or (n_targets, n_features)
Parameter vector (w in the problem formulation).
n_nonzero_coefs_ : int
Estimated number of non-zero coefficients giving the best mean squared
error over the cross-validation folds.
n_iter_ : int or array-like
Number of active features across every target for the model refit with
the best hyperparameters got by cross-validating across all folds.
n_features_in_ : int
Number of features seen during :term:`fit`.
.. versionadded:: 0.24
feature_names_in_ : ndarray of shape (`n_features_in_`,)
Names of features seen during :term:`fit`. Defined only when `X`
has feature names that are all strings.
.. versionadded:: 1.0
See Also
--------
orthogonal_mp : Solves n_targets Orthogonal Matching Pursuit problems.
orthogonal_mp_gram : Solves n_targets Orthogonal Matching Pursuit
problems using only the Gram matrix X.T * X and the product X.T * y.
lars_path : Compute Least Angle Regression or Lasso path using LARS algorithm.
Lars : Least Angle Regression model a.k.a. LAR.
LassoLars : Lasso model fit with Least Angle Regression a.k.a. Lars.
OrthogonalMatchingPursuit : Orthogonal Matching Pursuit model (OMP).
LarsCV : Cross-validated Least Angle Regression model.
LassoLarsCV : Cross-validated Lasso model fit with Least Angle Regression.
sklearn.decomposition.sparse_encode : Generic sparse coding.
Each column of the result is the solution to a Lasso problem.
Notes
-----
In `fit`, once the optimal number of non-zero coefficients is found through
cross-validation, the model is fit again using the entire training set.
Examples
--------
>>> from sklearn.linear_model import OrthogonalMatchingPursuitCV
>>> from sklearn.datasets import make_regression
>>> X, y = make_regression(n_features=100, n_informative=10,
... noise=4, random_state=0)
>>> reg = OrthogonalMatchingPursuitCV(cv=5).fit(X, y)
>>> reg.score(X, y)
0.9991...
>>> reg.n_nonzero_coefs_
10
>>> reg.predict(X[:1,])
array([-78.3854...])
"""
_parameter_constraints: dict = {
"copy": ["boolean"],
"fit_intercept": ["boolean"],
"max_iter": [Interval(Integral, 0, None, closed="left"), None],
"cv": ["cv_object"],
"n_jobs": [Integral, None],
"verbose": ["verbose"],
}
def __init__(
self,
*,
copy=True,
fit_intercept=True,
max_iter=None,
cv=None,
n_jobs=None,
verbose=False,
):
self.copy = copy
self.fit_intercept = fit_intercept
self.max_iter = max_iter
self.cv = cv
self.n_jobs = n_jobs
self.verbose = verbose
@_fit_context(prefer_skip_nested_validation=True)
def fit(self, X, y, **fit_params):
"""Fit the model using X, y as training data.
Parameters
----------
X : array-like of shape (n_samples, n_features)
Training data.
y : array-like of shape (n_samples,)
Target values. Will be cast to X's dtype if necessary.
**fit_params : dict
Parameters to pass to the underlying splitter.
.. versionadded:: 1.4
Only available if `enable_metadata_routing=True`,
which can be set by using
``sklearn.set_config(enable_metadata_routing=True)``.
See :ref:`Metadata Routing User Guide <metadata_routing>` for
more details.
Returns
-------
self : object
Returns an instance of self.
"""
_raise_for_params(fit_params, self, "fit")
X, y = self._validate_data(X, y, y_numeric=True, ensure_min_features=2)
X = as_float_array(X, copy=False, force_all_finite=False)
cv = check_cv(self.cv, classifier=False)
if _routing_enabled():
routed_params = process_routing(self, "fit", **fit_params)
else:
# TODO(SLEP6): remove when metadata routing cannot be disabled.
routed_params = Bunch()
routed_params.splitter = Bunch(split={})
max_iter = (
min(max(int(0.1 * X.shape[1]), 5), X.shape[1])
if not self.max_iter
else self.max_iter
)
cv_paths = Parallel(n_jobs=self.n_jobs, verbose=self.verbose)(
delayed(_omp_path_residues)(
X[train],
y[train],
X[test],
y[test],
self.copy,
self.fit_intercept,
max_iter,
)
for train, test in cv.split(X, **routed_params.splitter.split)
)
min_early_stop = min(fold.shape[0] for fold in cv_paths)
mse_folds = np.array(
[(fold[:min_early_stop] ** 2).mean(axis=1) for fold in cv_paths]
)
best_n_nonzero_coefs = np.argmin(mse_folds.mean(axis=0)) + 1
self.n_nonzero_coefs_ = best_n_nonzero_coefs
omp = OrthogonalMatchingPursuit(
n_nonzero_coefs=best_n_nonzero_coefs,
fit_intercept=self.fit_intercept,
).fit(X, y)
self.coef_ = omp.coef_
self.intercept_ = omp.intercept_
self.n_iter_ = omp.n_iter_
return self
def get_metadata_routing(self):
"""Get metadata routing of this object.
Please check :ref:`User Guide <metadata_routing>` on how the routing
mechanism works.
.. versionadded:: 1.4
Returns
-------
routing : MetadataRouter
A :class:`~sklearn.utils.metadata_routing.MetadataRouter` encapsulating
routing information.
"""
router = MetadataRouter(owner=self.__class__.__name__).add(
splitter=self.cv,
method_mapping=MethodMapping().add(callee="split", caller="fit"),
)
return router