ai-content-maker/.venv/Lib/site-packages/sklearn/utils/tests/test_response.py

import numpy as np
import pytest

from sklearn.datasets import (
    load_iris,
    make_classification,
    make_multilabel_classification,
    make_regression,
)
from sklearn.ensemble import IsolationForest
from sklearn.linear_model import (
    LinearRegression,
    LogisticRegression,
)
from sklearn.multioutput import ClassifierChain
from sklearn.preprocessing import scale
from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
from sklearn.utils._mocking import _MockEstimatorOnOffPrediction
from sklearn.utils._response import _get_response_values, _get_response_values_binary
from sklearn.utils._testing import assert_allclose, assert_array_equal

X, y = load_iris(return_X_y=True)
# scale the data to avoid ConvergenceWarning with LogisticRegression
X = scale(X, copy=False)
X_binary, y_binary = X[:100], y[:100]


@pytest.mark.parametrize(
    "response_method", ["decision_function", "predict_proba", "predict_log_proba"]
)
def test_get_response_values_regressor_error(response_method):
    """Check the error message with regressor an not supported response
    method."""
    my_estimator = _MockEstimatorOnOffPrediction(response_methods=[response_method])
    X = "mocking_data", "mocking_target"
    err_msg = f"{my_estimator.__class__.__name__} should either be a classifier"
    with pytest.raises(ValueError, match=err_msg):
        _get_response_values(my_estimator, X, response_method=response_method)


@pytest.mark.parametrize("return_response_method_used", [True, False])
def test_get_response_values_regressor(return_response_method_used):
    """Check the behaviour of `_get_response_values` with regressor."""
    X, y = make_regression(n_samples=10, random_state=0)
    regressor = LinearRegression().fit(X, y)
    results = _get_response_values(
        regressor,
        X,
        response_method="predict",
        return_response_method_used=return_response_method_used,
    )
    assert_array_equal(results[0], regressor.predict(X))
    assert results[1] is None
    if return_response_method_used:
        assert results[2] == "predict"


@pytest.mark.parametrize(
    "response_method",
    ["predict", "decision_function", ["decision_function", "predict"]],
)
@pytest.mark.parametrize("return_response_method_used", [True, False])
def test_get_response_values_outlier_detection(
    response_method, return_response_method_used
):
    """Check the behaviour of `_get_response_values` with outlier detector."""
    X, y = make_classification(n_samples=50, random_state=0)
    outlier_detector = IsolationForest(random_state=0).fit(X, y)
    results = _get_response_values(
        outlier_detector,
        X,
        response_method=response_method,
        return_response_method_used=return_response_method_used,
    )
    chosen_response_method = (
        response_method[0] if isinstance(response_method, list) else response_method
    )
    prediction_method = getattr(outlier_detector, chosen_response_method)
    assert_array_equal(results[0], prediction_method(X))
    assert results[1] is None
    if return_response_method_used:
        assert results[2] == chosen_response_method


@pytest.mark.parametrize(
    "response_method",
    ["predict_proba", "decision_function", "predict", "predict_log_proba"],
)
def test_get_response_values_classifier_unknown_pos_label(response_method):
    """Check that `_get_response_values` raises the proper error message with
    classifier."""
    X, y = make_classification(n_samples=10, n_classes=2, random_state=0)
    classifier = LogisticRegression().fit(X, y)

    # provide a `pos_label` which is not in `y`
    err_msg = r"pos_label=whatever is not a valid label: It should be one of \[0 1\]"
    with pytest.raises(ValueError, match=err_msg):
        _get_response_values(
            classifier,
            X,
            response_method=response_method,
            pos_label="whatever",
        )


@pytest.mark.parametrize("response_method", ["predict_proba", "predict_log_proba"])
def test_get_response_values_classifier_inconsistent_y_pred_for_binary_proba(
    response_method,
):
    """Check that `_get_response_values` will raise an error when `y_pred` has a
    single class with `predict_proba`."""
    X, y_two_class = make_classification(n_samples=10, n_classes=2, random_state=0)
    y_single_class = np.zeros_like(y_two_class)
    classifier = DecisionTreeClassifier().fit(X, y_single_class)

    err_msg = (
        r"Got predict_proba of shape \(10, 1\), but need classifier with "
        r"two classes"
    )
    with pytest.raises(ValueError, match=err_msg):
        _get_response_values(classifier, X, response_method=response_method)


@pytest.mark.parametrize("return_response_method_used", [True, False])
def test_get_response_values_binary_classifier_decision_function(
    return_response_method_used,
):
    """Check the behaviour of `_get_response_values` with `decision_function`
    and binary classifier."""
    X, y = make_classification(
        n_samples=10,
        n_classes=2,
        weights=[0.3, 0.7],
        random_state=0,
    )
    classifier = LogisticRegression().fit(X, y)
    response_method = "decision_function"

    # default `pos_label`
    results = _get_response_values(
        classifier,
        X,
        response_method=response_method,
        pos_label=None,
        return_response_method_used=return_response_method_used,
    )
    assert_allclose(results[0], classifier.decision_function(X))
    assert results[1] == 1
    if return_response_method_used:
        assert results[2] == "decision_function"

    # when forcing `pos_label=classifier.classes_[0]`
    results = _get_response_values(
        classifier,
        X,
        response_method=response_method,
        pos_label=classifier.classes_[0],
        return_response_method_used=return_response_method_used,
    )
    assert_allclose(results[0], classifier.decision_function(X) * -1)
    assert results[1] == 0
    if return_response_method_used:
        assert results[2] == "decision_function"


@pytest.mark.parametrize("return_response_method_used", [True, False])
@pytest.mark.parametrize("response_method", ["predict_proba", "predict_log_proba"])
def test_get_response_values_binary_classifier_predict_proba(
    return_response_method_used, response_method
):
    """Check that `_get_response_values` with `predict_proba` and binary
    classifier."""
    X, y = make_classification(
        n_samples=10,
        n_classes=2,
        weights=[0.3, 0.7],
        random_state=0,
    )
    classifier = LogisticRegression().fit(X, y)

    # default `pos_label`
    results = _get_response_values(
        classifier,
        X,
        response_method=response_method,
        pos_label=None,
        return_response_method_used=return_response_method_used,
    )
    assert_allclose(results[0], getattr(classifier, response_method)(X)[:, 1])
    assert results[1] == 1
    if return_response_method_used:
        assert len(results) == 3
        assert results[2] == response_method
    else:
        assert len(results) == 2

    # when forcing `pos_label=classifier.classes_[0]`
    y_pred, pos_label, *_ = _get_response_values(
        classifier,
        X,
        response_method=response_method,
        pos_label=classifier.classes_[0],
        return_response_method_used=return_response_method_used,
    )
    assert_allclose(y_pred, getattr(classifier, response_method)(X)[:, 0])
    assert pos_label == 0


@pytest.mark.parametrize(
    "estimator, X, y, err_msg, params",
    [
        (
            DecisionTreeRegressor(),
            X_binary,
            y_binary,
            "Expected 'estimator' to be a binary classifier",
            {"response_method": "auto"},
        ),
        (
            DecisionTreeClassifier(),
            X_binary,
            y_binary,
            r"pos_label=unknown is not a valid label: It should be one of \[0 1\]",
            {"response_method": "auto", "pos_label": "unknown"},
        ),
        (
            DecisionTreeClassifier(),
            X,
            y,
            "be a binary classifier. Got 3 classes instead.",
            {"response_method": "predict_proba"},
        ),
    ],
)
def test_get_response_error(estimator, X, y, err_msg, params):
    """Check that we raise the proper error messages in _get_response_values_binary."""

    estimator.fit(X, y)
    with pytest.raises(ValueError, match=err_msg):
        _get_response_values_binary(estimator, X, **params)


def test_get_response_predict_proba():
    """Check the behaviour of `_get_response_values_binary` using `predict_proba`."""
    classifier = DecisionTreeClassifier().fit(X_binary, y_binary)
    y_proba, pos_label = _get_response_values_binary(
        classifier, X_binary, response_method="predict_proba"
    )
    assert_allclose(y_proba, classifier.predict_proba(X_binary)[:, 1])
    assert pos_label == 1

    y_proba, pos_label = _get_response_values_binary(
        classifier, X_binary, response_method="predict_proba", pos_label=0
    )
    assert_allclose(y_proba, classifier.predict_proba(X_binary)[:, 0])
    assert pos_label == 0


def test_get_response_decision_function():
    """Check the behaviour of `_get_response_values_binary` using decision_function."""
    classifier = LogisticRegression().fit(X_binary, y_binary)
    y_score, pos_label = _get_response_values_binary(
        classifier, X_binary, response_method="decision_function"
    )
    assert_allclose(y_score, classifier.decision_function(X_binary))
    assert pos_label == 1

    y_score, pos_label = _get_response_values_binary(
        classifier, X_binary, response_method="decision_function", pos_label=0
    )
    assert_allclose(y_score, classifier.decision_function(X_binary) * -1)
    assert pos_label == 0


@pytest.mark.parametrize(
    "estimator, response_method",
    [
        (DecisionTreeClassifier(max_depth=2, random_state=0), "predict_proba"),
        (DecisionTreeClassifier(max_depth=2, random_state=0), "predict_log_proba"),
        (LogisticRegression(), "decision_function"),
    ],
)
def test_get_response_values_multiclass(estimator, response_method):
    """Check that we can call `_get_response_values` with a multiclass estimator.
    It should return the predictions untouched.
    """
    estimator.fit(X, y)
    predictions, pos_label = _get_response_values(
        estimator, X, response_method=response_method
    )

    assert pos_label is None
    assert predictions.shape == (X.shape[0], len(estimator.classes_))
    if response_method == "predict_proba":
        assert np.logical_and(predictions >= 0, predictions <= 1).all()
    elif response_method == "predict_log_proba":
        assert (predictions <= 0.0).all()


def test_get_response_values_with_response_list():
    """Check the behaviour of passing a list of responses to `_get_response_values`."""
    classifier = LogisticRegression().fit(X_binary, y_binary)

    # it should use `predict_proba`
    y_pred, pos_label, response_method = _get_response_values(
        classifier,
        X_binary,
        response_method=["predict_proba", "decision_function"],
        return_response_method_used=True,
    )
    assert_allclose(y_pred, classifier.predict_proba(X_binary)[:, 1])
    assert pos_label == 1
    assert response_method == "predict_proba"

    # it should use `decision_function`
    y_pred, pos_label, response_method = _get_response_values(
        classifier,
        X_binary,
        response_method=["decision_function", "predict_proba"],
        return_response_method_used=True,
    )
    assert_allclose(y_pred, classifier.decision_function(X_binary))
    assert pos_label == 1
    assert response_method == "decision_function"


@pytest.mark.parametrize(
    "response_method", ["predict_proba", "decision_function", "predict"]
)
def test_get_response_values_multilabel_indicator(response_method):
    X, Y = make_multilabel_classification(random_state=0)
    estimator = ClassifierChain(LogisticRegression()).fit(X, Y)

    y_pred, pos_label = _get_response_values(
        estimator, X, response_method=response_method
    )
    assert pos_label is None
    assert y_pred.shape == Y.shape

    if response_method == "predict_proba":
        assert np.logical_and(y_pred >= 0, y_pred <= 1).all()
    elif response_method == "decision_function":
        # values returned by `decision_function` are not bounded in [0, 1]
        assert (y_pred < 0).sum() > 0
        assert (y_pred > 1).sum() > 0
    else:  # response_method == "predict"
        assert np.logical_or(y_pred == 0, y_pred == 1).all()
first commit 2024-05-03 04:18:51 +03:00			`import numpy as np`
			`import pytest`

			`from sklearn.datasets import (`
			`load_iris,`
			`make_classification,`
			`make_multilabel_classification,`
			`make_regression,`
			`)`
			`from sklearn.ensemble import IsolationForest`
			`from sklearn.linear_model import (`
			`LinearRegression,`
			`LogisticRegression,`
			`)`
			`from sklearn.multioutput import ClassifierChain`
			`from sklearn.preprocessing import scale`
			`from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor`
			`from sklearn.utils._mocking import _MockEstimatorOnOffPrediction`
			`from sklearn.utils._response import _get_response_values, _get_response_values_binary`
			`from sklearn.utils._testing import assert_allclose, assert_array_equal`

			`X, y = load_iris(return_X_y=True)`
			`# scale the data to avoid ConvergenceWarning with LogisticRegression`
			`X = scale(X, copy=False)`
			`X_binary, y_binary = X[:100], y[:100]`


			`@pytest.mark.parametrize(`
			`"response_method", ["decision_function", "predict_proba", "predict_log_proba"]`
			`)`
			`def test_get_response_values_regressor_error(response_method):`
			`"""Check the error message with regressor an not supported response`
			`method."""`
			`my_estimator = _MockEstimatorOnOffPrediction(response_methods=[response_method])`
			`X = "mocking_data", "mocking_target"`
			`err_msg = f"{my_estimator.__class__.__name__} should either be a classifier"`
			`with pytest.raises(ValueError, match=err_msg):`
			`_get_response_values(my_estimator, X, response_method=response_method)`


			`@pytest.mark.parametrize("return_response_method_used", [True, False])`
			`def test_get_response_values_regressor(return_response_method_used):`
			"""Check the behaviour of `_get_response_values` with regressor."""
			`X, y = make_regression(n_samples=10, random_state=0)`
			`regressor = LinearRegression().fit(X, y)`
			`results = _get_response_values(`
			`regressor,`
			`X,`
			`response_method="predict",`
			`return_response_method_used=return_response_method_used,`
			`)`
			`assert_array_equal(results[0], regressor.predict(X))`
			`assert results[1] is None`
			`if return_response_method_used:`
			`assert results[2] == "predict"`


			`@pytest.mark.parametrize(`
			`"response_method",`
			`["predict", "decision_function", ["decision_function", "predict"]],`
			`)`
			`@pytest.mark.parametrize("return_response_method_used", [True, False])`
			`def test_get_response_values_outlier_detection(`
			`response_method, return_response_method_used`
			`):`
			"""Check the behaviour of `_get_response_values` with outlier detector."""
			`X, y = make_classification(n_samples=50, random_state=0)`
			`outlier_detector = IsolationForest(random_state=0).fit(X, y)`
			`results = _get_response_values(`
			`outlier_detector,`
			`X,`
			`response_method=response_method,`
			`return_response_method_used=return_response_method_used,`
			`)`
			`chosen_response_method = (`
			`response_method[0] if isinstance(response_method, list) else response_method`
			`)`
			`prediction_method = getattr(outlier_detector, chosen_response_method)`
			`assert_array_equal(results[0], prediction_method(X))`
			`assert results[1] is None`
			`if return_response_method_used:`
			`assert results[2] == chosen_response_method`


			`@pytest.mark.parametrize(`
			`"response_method",`
			`["predict_proba", "decision_function", "predict", "predict_log_proba"],`
			`)`
			`def test_get_response_values_classifier_unknown_pos_label(response_method):`
			"""Check that `_get_response_values` raises the proper error message with
			`classifier."""`
			`X, y = make_classification(n_samples=10, n_classes=2, random_state=0)`
			`classifier = LogisticRegression().fit(X, y)`

			# provide a `pos_label` which is not in `y`
			`err_msg = r"pos_label=whatever is not a valid label: It should be one of \[0 1\]"`
			`with pytest.raises(ValueError, match=err_msg):`
			`_get_response_values(`
			`classifier,`
			`X,`
			`response_method=response_method,`
			`pos_label="whatever",`
			`)`


			`@pytest.mark.parametrize("response_method", ["predict_proba", "predict_log_proba"])`
			`def test_get_response_values_classifier_inconsistent_y_pred_for_binary_proba(`
			`response_method,`
			`):`
			"""Check that `_get_response_values` will raise an error when `y_pred` has a
			single class with `predict_proba`."""
			`X, y_two_class = make_classification(n_samples=10, n_classes=2, random_state=0)`
			`y_single_class = np.zeros_like(y_two_class)`
			`classifier = DecisionTreeClassifier().fit(X, y_single_class)`

			`err_msg = (`
			`r"Got predict_proba of shape \(10, 1\), but need classifier with "`
			`r"two classes"`
			`)`
			`with pytest.raises(ValueError, match=err_msg):`
			`_get_response_values(classifier, X, response_method=response_method)`


			`@pytest.mark.parametrize("return_response_method_used", [True, False])`
			`def test_get_response_values_binary_classifier_decision_function(`
			`return_response_method_used,`
			`):`
			"""Check the behaviour of `_get_response_values` with `decision_function`
			`and binary classifier."""`
			`X, y = make_classification(`
			`n_samples=10,`
			`n_classes=2,`
			`weights=[0.3, 0.7],`
			`random_state=0,`
			`)`
			`classifier = LogisticRegression().fit(X, y)`
			`response_method = "decision_function"`

			# default `pos_label`
			`results = _get_response_values(`
			`classifier,`
			`X,`
			`response_method=response_method,`
			`pos_label=None,`
			`return_response_method_used=return_response_method_used,`
			`)`
			`assert_allclose(results[0], classifier.decision_function(X))`
			`assert results[1] == 1`
			`if return_response_method_used:`
			`assert results[2] == "decision_function"`

			# when forcing `pos_label=classifier.classes_[0]`
			`results = _get_response_values(`
			`classifier,`
			`X,`
			`response_method=response_method,`
			`pos_label=classifier.classes_[0],`
			`return_response_method_used=return_response_method_used,`
			`)`
			`assert_allclose(results[0], classifier.decision_function(X) * -1)`
			`assert results[1] == 0`
			`if return_response_method_used:`
			`assert results[2] == "decision_function"`


			`@pytest.mark.parametrize("return_response_method_used", [True, False])`
			`@pytest.mark.parametrize("response_method", ["predict_proba", "predict_log_proba"])`
			`def test_get_response_values_binary_classifier_predict_proba(`
			`return_response_method_used, response_method`
			`):`
			"""Check that `_get_response_values` with `predict_proba` and binary
			`classifier."""`
			`X, y = make_classification(`
			`n_samples=10,`
			`n_classes=2,`
			`weights=[0.3, 0.7],`
			`random_state=0,`
			`)`
			`classifier = LogisticRegression().fit(X, y)`

			# default `pos_label`
			`results = _get_response_values(`
			`classifier,`
			`X,`
			`response_method=response_method,`
			`pos_label=None,`
			`return_response_method_used=return_response_method_used,`
			`)`
			`assert_allclose(results[0], getattr(classifier, response_method)(X)[:, 1])`
			`assert results[1] == 1`
			`if return_response_method_used:`
			`assert len(results) == 3`
			`assert results[2] == response_method`
			`else:`
			`assert len(results) == 2`

			# when forcing `pos_label=classifier.classes_[0]`
			`y_pred, pos_label, *_ = _get_response_values(`
			`classifier,`
			`X,`
			`response_method=response_method,`
			`pos_label=classifier.classes_[0],`
			`return_response_method_used=return_response_method_used,`
			`)`
			`assert_allclose(y_pred, getattr(classifier, response_method)(X)[:, 0])`
			`assert pos_label == 0`


			`@pytest.mark.parametrize(`
			`"estimator, X, y, err_msg, params",`
			`[`
			`(`
			`DecisionTreeRegressor(),`
			`X_binary,`
			`y_binary,`
			`"Expected 'estimator' to be a binary classifier",`
			`{"response_method": "auto"},`
			`),`
			`(`
			`DecisionTreeClassifier(),`
			`X_binary,`
			`y_binary,`
			`r"pos_label=unknown is not a valid label: It should be one of \[0 1\]",`
			`{"response_method": "auto", "pos_label": "unknown"},`
			`),`
			`(`
			`DecisionTreeClassifier(),`
			`X,`
			`y,`
			`"be a binary classifier. Got 3 classes instead.",`
			`{"response_method": "predict_proba"},`
			`),`
			`],`
			`)`
			`def test_get_response_error(estimator, X, y, err_msg, params):`
			`"""Check that we raise the proper error messages in _get_response_values_binary."""`

			`estimator.fit(X, y)`
			`with pytest.raises(ValueError, match=err_msg):`
			`_get_response_values_binary(estimator, X, **params)`


			`def test_get_response_predict_proba():`
			"""Check the behaviour of `_get_response_values_binary` using `predict_proba`."""
			`classifier = DecisionTreeClassifier().fit(X_binary, y_binary)`
			`y_proba, pos_label = _get_response_values_binary(`
			`classifier, X_binary, response_method="predict_proba"`
			`)`
			`assert_allclose(y_proba, classifier.predict_proba(X_binary)[:, 1])`
			`assert pos_label == 1`

			`y_proba, pos_label = _get_response_values_binary(`
			`classifier, X_binary, response_method="predict_proba", pos_label=0`
			`)`
			`assert_allclose(y_proba, classifier.predict_proba(X_binary)[:, 0])`
			`assert pos_label == 0`


			`def test_get_response_decision_function():`
			"""Check the behaviour of `_get_response_values_binary` using decision_function."""
			`classifier = LogisticRegression().fit(X_binary, y_binary)`
			`y_score, pos_label = _get_response_values_binary(`
			`classifier, X_binary, response_method="decision_function"`
			`)`
			`assert_allclose(y_score, classifier.decision_function(X_binary))`
			`assert pos_label == 1`

			`y_score, pos_label = _get_response_values_binary(`
			`classifier, X_binary, response_method="decision_function", pos_label=0`
			`)`
			`assert_allclose(y_score, classifier.decision_function(X_binary) * -1)`
			`assert pos_label == 0`


			`@pytest.mark.parametrize(`
			`"estimator, response_method",`
			`[`
			`(DecisionTreeClassifier(max_depth=2, random_state=0), "predict_proba"),`
			`(DecisionTreeClassifier(max_depth=2, random_state=0), "predict_log_proba"),`
			`(LogisticRegression(), "decision_function"),`
			`],`
			`)`
			`def test_get_response_values_multiclass(estimator, response_method):`
			"""Check that we can call `_get_response_values` with a multiclass estimator.
			`It should return the predictions untouched.`
			`"""`
			`estimator.fit(X, y)`
			`predictions, pos_label = _get_response_values(`
			`estimator, X, response_method=response_method`
			`)`

			`assert pos_label is None`
			`assert predictions.shape == (X.shape[0], len(estimator.classes_))`
			`if response_method == "predict_proba":`
			`assert np.logical_and(predictions >= 0, predictions <= 1).all()`
			`elif response_method == "predict_log_proba":`
			`assert (predictions <= 0.0).all()`


			`def test_get_response_values_with_response_list():`
			"""Check the behaviour of passing a list of responses to `_get_response_values`."""
			`classifier = LogisticRegression().fit(X_binary, y_binary)`

			# it should use `predict_proba`
			`y_pred, pos_label, response_method = _get_response_values(`
			`classifier,`
			`X_binary,`
			`response_method=["predict_proba", "decision_function"],`
			`return_response_method_used=True,`
			`)`
			`assert_allclose(y_pred, classifier.predict_proba(X_binary)[:, 1])`
			`assert pos_label == 1`
			`assert response_method == "predict_proba"`

			# it should use `decision_function`
			`y_pred, pos_label, response_method = _get_response_values(`
			`classifier,`
			`X_binary,`
			`response_method=["decision_function", "predict_proba"],`
			`return_response_method_used=True,`
			`)`
			`assert_allclose(y_pred, classifier.decision_function(X_binary))`
			`assert pos_label == 1`
			`assert response_method == "decision_function"`


			`@pytest.mark.parametrize(`
			`"response_method", ["predict_proba", "decision_function", "predict"]`
			`)`
			`def test_get_response_values_multilabel_indicator(response_method):`
			`X, Y = make_multilabel_classification(random_state=0)`
			`estimator = ClassifierChain(LogisticRegression()).fit(X, Y)`

			`y_pred, pos_label = _get_response_values(`
			`estimator, X, response_method=response_method`
			`)`
			`assert pos_label is None`
			`assert y_pred.shape == Y.shape`

			`if response_method == "predict_proba":`
			`assert np.logical_and(y_pred >= 0, y_pred <= 1).all()`
			`elif response_method == "decision_function":`
			# values returned by `decision_function` are not bounded in [0, 1]
			`assert (y_pred < 0).sum() > 0`
			`assert (y_pred > 1).sum() > 0`
			`else: # response_method == "predict"`
			`assert np.logical_or(y_pred == 0, y_pred == 1).all()`