ai-content-maker/.venv/Lib/site-packages/sklearn/metrics/cluster/tests/test_unsupervised.py

import warnings

import numpy as np
import pytest
from numpy.testing import assert_allclose
from scipy.sparse import issparse

from sklearn import datasets
from sklearn.metrics import pairwise_distances
from sklearn.metrics.cluster import (
    calinski_harabasz_score,
    davies_bouldin_score,
    silhouette_samples,
    silhouette_score,
)
from sklearn.metrics.cluster._unsupervised import _silhouette_reduce
from sklearn.utils._testing import assert_array_equal
from sklearn.utils.fixes import (
    CSC_CONTAINERS,
    CSR_CONTAINERS,
    DOK_CONTAINERS,
    LIL_CONTAINERS,
)


@pytest.mark.parametrize(
    "sparse_container",
    [None] + CSR_CONTAINERS + CSC_CONTAINERS + DOK_CONTAINERS + LIL_CONTAINERS,
)
@pytest.mark.parametrize("sample_size", [None, "half"])
def test_silhouette(sparse_container, sample_size):
    # Tests the Silhouette Coefficient.
    dataset = datasets.load_iris()
    X, y = dataset.data, dataset.target
    if sparse_container is not None:
        X = sparse_container(X)
    sample_size = int(X.shape[0] / 2) if sample_size == "half" else sample_size

    D = pairwise_distances(X, metric="euclidean")
    # Given that the actual labels are used, we can assume that S would be positive.
    score_precomputed = silhouette_score(
        D, y, metric="precomputed", sample_size=sample_size, random_state=0
    )
    score_euclidean = silhouette_score(
        X, y, metric="euclidean", sample_size=sample_size, random_state=0
    )
    assert score_precomputed > 0
    assert score_euclidean > 0
    assert score_precomputed == pytest.approx(score_euclidean)


def test_cluster_size_1():
    # Assert Silhouette Coefficient == 0 when there is 1 sample in a cluster
    # (cluster 0). We also test the case where there are identical samples
    # as the only members of a cluster (cluster 2). To our knowledge, this case
    # is not discussed in reference material, and we choose for it a sample
    # score of 1.
    X = [[0.0], [1.0], [1.0], [2.0], [3.0], [3.0]]
    labels = np.array([0, 1, 1, 1, 2, 2])

    # Cluster 0: 1 sample -> score of 0 by Rousseeuw's convention
    # Cluster 1: intra-cluster = [.5, .5, 1]
    #            inter-cluster = [1, 1, 1]
    #            silhouette    = [.5, .5, 0]
    # Cluster 2: intra-cluster = [0, 0]
    #            inter-cluster = [arbitrary, arbitrary]
    #            silhouette    = [1., 1.]

    silhouette = silhouette_score(X, labels)
    assert not np.isnan(silhouette)
    ss = silhouette_samples(X, labels)
    assert_array_equal(ss, [0, 0.5, 0.5, 0, 1, 1])


def test_silhouette_paper_example():
    # Explicitly check per-sample results against Rousseeuw (1987)
    # Data from Table 1
    lower = [
        5.58,
        7.00,
        6.50,
        7.08,
        7.00,
        3.83,
        4.83,
        5.08,
        8.17,
        5.83,
        2.17,
        5.75,
        6.67,
        6.92,
        4.92,
        6.42,
        5.00,
        5.58,
        6.00,
        4.67,
        6.42,
        3.42,
        5.50,
        6.42,
        6.42,
        5.00,
        3.92,
        6.17,
        2.50,
        4.92,
        6.25,
        7.33,
        4.50,
        2.25,
        6.33,
        2.75,
        6.08,
        6.67,
        4.25,
        2.67,
        6.00,
        6.17,
        6.17,
        6.92,
        6.17,
        5.25,
        6.83,
        4.50,
        3.75,
        5.75,
        5.42,
        6.08,
        5.83,
        6.67,
        3.67,
        4.75,
        3.00,
        6.08,
        6.67,
        5.00,
        5.58,
        4.83,
        6.17,
        5.67,
        6.50,
        6.92,
    ]
    D = np.zeros((12, 12))
    D[np.tril_indices(12, -1)] = lower
    D += D.T

    names = [
        "BEL",
        "BRA",
        "CHI",
        "CUB",
        "EGY",
        "FRA",
        "IND",
        "ISR",
        "USA",
        "USS",
        "YUG",
        "ZAI",
    ]

    # Data from Figure 2
    labels1 = [1, 1, 2, 2, 1, 1, 2, 1, 1, 2, 2, 1]
    expected1 = {
        "USA": 0.43,
        "BEL": 0.39,
        "FRA": 0.35,
        "ISR": 0.30,
        "BRA": 0.22,
        "EGY": 0.20,
        "ZAI": 0.19,
        "CUB": 0.40,
        "USS": 0.34,
        "CHI": 0.33,
        "YUG": 0.26,
        "IND": -0.04,
    }
    score1 = 0.28

    # Data from Figure 3
    labels2 = [1, 2, 3, 3, 1, 1, 2, 1, 1, 3, 3, 2]
    expected2 = {
        "USA": 0.47,
        "FRA": 0.44,
        "BEL": 0.42,
        "ISR": 0.37,
        "EGY": 0.02,
        "ZAI": 0.28,
        "BRA": 0.25,
        "IND": 0.17,
        "CUB": 0.48,
        "USS": 0.44,
        "YUG": 0.31,
        "CHI": 0.31,
    }
    score2 = 0.33

    for labels, expected, score in [
        (labels1, expected1, score1),
        (labels2, expected2, score2),
    ]:
        expected = [expected[name] for name in names]
        # we check to 2dp because that's what's in the paper
        pytest.approx(
            expected,
            silhouette_samples(D, np.array(labels), metric="precomputed"),
            abs=1e-2,
        )
        pytest.approx(
            score, silhouette_score(D, np.array(labels), metric="precomputed"), abs=1e-2
        )


def test_correct_labelsize():
    # Assert 1 < n_labels < n_samples
    dataset = datasets.load_iris()
    X = dataset.data

    # n_labels = n_samples
    y = np.arange(X.shape[0])
    err_msg = (
        r"Number of labels is %d\. Valid values are 2 "
        r"to n_samples - 1 \(inclusive\)" % len(np.unique(y))
    )
    with pytest.raises(ValueError, match=err_msg):
        silhouette_score(X, y)

    # n_labels = 1
    y = np.zeros(X.shape[0])
    err_msg = (
        r"Number of labels is %d\. Valid values are 2 "
        r"to n_samples - 1 \(inclusive\)" % len(np.unique(y))
    )
    with pytest.raises(ValueError, match=err_msg):
        silhouette_score(X, y)


def test_non_encoded_labels():
    dataset = datasets.load_iris()
    X = dataset.data
    labels = dataset.target
    assert silhouette_score(X, labels * 2 + 10) == silhouette_score(X, labels)
    assert_array_equal(
        silhouette_samples(X, labels * 2 + 10), silhouette_samples(X, labels)
    )


def test_non_numpy_labels():
    dataset = datasets.load_iris()
    X = dataset.data
    y = dataset.target
    assert silhouette_score(list(X), list(y)) == silhouette_score(X, y)


@pytest.mark.parametrize("dtype", (np.float32, np.float64))
def test_silhouette_nonzero_diag(dtype):
    # Make sure silhouette_samples requires diagonal to be zero.
    # Non-regression test for #12178

    # Construct a zero-diagonal matrix
    dists = pairwise_distances(
        np.array([[0.2, 0.1, 0.12, 1.34, 1.11, 1.6]], dtype=dtype).T
    )
    labels = [0, 0, 0, 1, 1, 1]

    # small values on the diagonal are OK
    dists[2][2] = np.finfo(dists.dtype).eps * 10
    silhouette_samples(dists, labels, metric="precomputed")

    # values bigger than eps * 100 are not
    dists[2][2] = np.finfo(dists.dtype).eps * 1000
    with pytest.raises(ValueError, match="contains non-zero"):
        silhouette_samples(dists, labels, metric="precomputed")


@pytest.mark.parametrize(
    "sparse_container",
    CSC_CONTAINERS + CSR_CONTAINERS + DOK_CONTAINERS + LIL_CONTAINERS,
)
def test_silhouette_samples_precomputed_sparse(sparse_container):
    """Check that silhouette_samples works for sparse matrices correctly."""
    X = np.array([[0.2, 0.1, 0.1, 0.2, 0.1, 1.6, 0.2, 0.1]], dtype=np.float32).T
    y = [0, 0, 0, 0, 1, 1, 1, 1]
    pdist_dense = pairwise_distances(X)
    pdist_sparse = sparse_container(pdist_dense)
    assert issparse(pdist_sparse)
    output_with_sparse_input = silhouette_samples(pdist_sparse, y, metric="precomputed")
    output_with_dense_input = silhouette_samples(pdist_dense, y, metric="precomputed")
    assert_allclose(output_with_sparse_input, output_with_dense_input)


@pytest.mark.parametrize(
    "sparse_container",
    CSC_CONTAINERS + CSR_CONTAINERS + DOK_CONTAINERS + LIL_CONTAINERS,
)
def test_silhouette_samples_euclidean_sparse(sparse_container):
    """Check that silhouette_samples works for sparse matrices correctly."""
    X = np.array([[0.2, 0.1, 0.1, 0.2, 0.1, 1.6, 0.2, 0.1]], dtype=np.float32).T
    y = [0, 0, 0, 0, 1, 1, 1, 1]
    pdist_dense = pairwise_distances(X)
    pdist_sparse = sparse_container(pdist_dense)
    assert issparse(pdist_sparse)
    output_with_sparse_input = silhouette_samples(pdist_sparse, y)
    output_with_dense_input = silhouette_samples(pdist_dense, y)
    assert_allclose(output_with_sparse_input, output_with_dense_input)


@pytest.mark.parametrize(
    "sparse_container", CSC_CONTAINERS + DOK_CONTAINERS + LIL_CONTAINERS
)
def test_silhouette_reduce(sparse_container):
    """Check for non-CSR input to private method `_silhouette_reduce`."""
    X = np.array([[0.2, 0.1, 0.1, 0.2, 0.1, 1.6, 0.2, 0.1]], dtype=np.float32).T
    pdist_dense = pairwise_distances(X)
    pdist_sparse = sparse_container(pdist_dense)
    y = [0, 0, 0, 0, 1, 1, 1, 1]
    label_freqs = np.bincount(y)
    with pytest.raises(
        TypeError,
        match="Expected CSR matrix. Please pass sparse matrix in CSR format.",
    ):
        _silhouette_reduce(pdist_sparse, start=0, labels=y, label_freqs=label_freqs)


def assert_raises_on_only_one_label(func):
    """Assert message when there is only one label"""
    rng = np.random.RandomState(seed=0)
    with pytest.raises(ValueError, match="Number of labels is"):
        func(rng.rand(10, 2), np.zeros(10))


def assert_raises_on_all_points_same_cluster(func):
    """Assert message when all point are in different clusters"""
    rng = np.random.RandomState(seed=0)
    with pytest.raises(ValueError, match="Number of labels is"):
        func(rng.rand(10, 2), np.arange(10))


def test_calinski_harabasz_score():
    assert_raises_on_only_one_label(calinski_harabasz_score)

    assert_raises_on_all_points_same_cluster(calinski_harabasz_score)

    # Assert the value is 1. when all samples are equals
    assert 1.0 == calinski_harabasz_score(np.ones((10, 2)), [0] * 5 + [1] * 5)

    # Assert the value is 0. when all the mean cluster are equal
    assert 0.0 == calinski_harabasz_score([[-1, -1], [1, 1]] * 10, [0] * 10 + [1] * 10)

    # General case (with non numpy arrays)
    X = (
        [[0, 0], [1, 1]] * 5
        + [[3, 3], [4, 4]] * 5
        + [[0, 4], [1, 3]] * 5
        + [[3, 1], [4, 0]] * 5
    )
    labels = [0] * 10 + [1] * 10 + [2] * 10 + [3] * 10
    pytest.approx(calinski_harabasz_score(X, labels), 45 * (40 - 4) / (5 * (4 - 1)))


def test_davies_bouldin_score():
    assert_raises_on_only_one_label(davies_bouldin_score)
    assert_raises_on_all_points_same_cluster(davies_bouldin_score)

    # Assert the value is 0. when all samples are equals
    assert davies_bouldin_score(np.ones((10, 2)), [0] * 5 + [1] * 5) == pytest.approx(
        0.0
    )

    # Assert the value is 0. when all the mean cluster are equal
    assert davies_bouldin_score(
        [[-1, -1], [1, 1]] * 10, [0] * 10 + [1] * 10
    ) == pytest.approx(0.0)

    # General case (with non numpy arrays)
    X = (
        [[0, 0], [1, 1]] * 5
        + [[3, 3], [4, 4]] * 5
        + [[0, 4], [1, 3]] * 5
        + [[3, 1], [4, 0]] * 5
    )
    labels = [0] * 10 + [1] * 10 + [2] * 10 + [3] * 10
    pytest.approx(davies_bouldin_score(X, labels), 2 * np.sqrt(0.5) / 3)

    # Ensure divide by zero warning is not raised in general case
    with warnings.catch_warnings():
        warnings.simplefilter("error", RuntimeWarning)
        davies_bouldin_score(X, labels)

    # General case - cluster have one sample
    X = [[0, 0], [2, 2], [3, 3], [5, 5]]
    labels = [0, 0, 1, 2]
    pytest.approx(davies_bouldin_score(X, labels), (5.0 / 4) / 3)


def test_silhouette_score_integer_precomputed():
    """Check that silhouette_score works for precomputed metrics that are integers.

    Non-regression test for #22107.
    """
    result = silhouette_score(
        [[0, 1, 2], [1, 0, 1], [2, 1, 0]], [0, 0, 1], metric="precomputed"
    )
    assert result == pytest.approx(1 / 6)

    # non-zero on diagonal for ints raises an error
    with pytest.raises(ValueError, match="contains non-zero"):
        silhouette_score(
            [[1, 1, 2], [1, 0, 1], [2, 1, 0]], [0, 0, 1], metric="precomputed"
        )
first commit 2024-05-03 04:18:51 +03:00			`import warnings`

			`import numpy as np`
			`import pytest`
			`from numpy.testing import assert_allclose`
			`from scipy.sparse import issparse`

			`from sklearn import datasets`
			`from sklearn.metrics import pairwise_distances`
			`from sklearn.metrics.cluster import (`
			`calinski_harabasz_score,`
			`davies_bouldin_score,`
			`silhouette_samples,`
			`silhouette_score,`
			`)`
			`from sklearn.metrics.cluster._unsupervised import _silhouette_reduce`
			`from sklearn.utils._testing import assert_array_equal`
			`from sklearn.utils.fixes import (`
			`CSC_CONTAINERS,`
			`CSR_CONTAINERS,`
			`DOK_CONTAINERS,`
			`LIL_CONTAINERS,`
			`)`


			`@pytest.mark.parametrize(`
			`"sparse_container",`
			`[None] + CSR_CONTAINERS + CSC_CONTAINERS + DOK_CONTAINERS + LIL_CONTAINERS,`
			`)`
			`@pytest.mark.parametrize("sample_size", [None, "half"])`
			`def test_silhouette(sparse_container, sample_size):`
			`# Tests the Silhouette Coefficient.`
			`dataset = datasets.load_iris()`
			`X, y = dataset.data, dataset.target`
			`if sparse_container is not None:`
			`X = sparse_container(X)`
			`sample_size = int(X.shape[0] / 2) if sample_size == "half" else sample_size`

			`D = pairwise_distances(X, metric="euclidean")`
			`# Given that the actual labels are used, we can assume that S would be positive.`
			`score_precomputed = silhouette_score(`
			`D, y, metric="precomputed", sample_size=sample_size, random_state=0`
			`)`
			`score_euclidean = silhouette_score(`
			`X, y, metric="euclidean", sample_size=sample_size, random_state=0`
			`)`
			`assert score_precomputed > 0`
			`assert score_euclidean > 0`
			`assert score_precomputed == pytest.approx(score_euclidean)`


			`def test_cluster_size_1():`
			`# Assert Silhouette Coefficient == 0 when there is 1 sample in a cluster`
			`# (cluster 0). We also test the case where there are identical samples`
			`# as the only members of a cluster (cluster 2). To our knowledge, this case`
			`# is not discussed in reference material, and we choose for it a sample`
			`# score of 1.`
			`X = [[0.0], [1.0], [1.0], [2.0], [3.0], [3.0]]`
			`labels = np.array([0, 1, 1, 1, 2, 2])`

			`# Cluster 0: 1 sample -> score of 0 by Rousseeuw's convention`
			`# Cluster 1: intra-cluster = [.5, .5, 1]`
			`# inter-cluster = [1, 1, 1]`
			`# silhouette = [.5, .5, 0]`
			`# Cluster 2: intra-cluster = [0, 0]`
			`# inter-cluster = [arbitrary, arbitrary]`
			`# silhouette = [1., 1.]`

			`silhouette = silhouette_score(X, labels)`
			`assert not np.isnan(silhouette)`
			`ss = silhouette_samples(X, labels)`
			`assert_array_equal(ss, [0, 0.5, 0.5, 0, 1, 1])`


			`def test_silhouette_paper_example():`
			`# Explicitly check per-sample results against Rousseeuw (1987)`
			`# Data from Table 1`
			`lower = [`
			`5.58,`
			`7.00,`
			`6.50,`
			`7.08,`
			`7.00,`
			`3.83,`
			`4.83,`
			`5.08,`
			`8.17,`
			`5.83,`
			`2.17,`
			`5.75,`
			`6.67,`
			`6.92,`
			`4.92,`
			`6.42,`
			`5.00,`
			`5.58,`
			`6.00,`
			`4.67,`
			`6.42,`
			`3.42,`
			`5.50,`
			`6.42,`
			`6.42,`
			`5.00,`
			`3.92,`
			`6.17,`
			`2.50,`
			`4.92,`
			`6.25,`
			`7.33,`
			`4.50,`
			`2.25,`
			`6.33,`
			`2.75,`
			`6.08,`
			`6.67,`
			`4.25,`
			`2.67,`
			`6.00,`
			`6.17,`
			`6.17,`
			`6.92,`
			`6.17,`
			`5.25,`
			`6.83,`
			`4.50,`
			`3.75,`
			`5.75,`
			`5.42,`
			`6.08,`
			`5.83,`
			`6.67,`
			`3.67,`
			`4.75,`
			`3.00,`
			`6.08,`
			`6.67,`
			`5.00,`
			`5.58,`
			`4.83,`
			`6.17,`
			`5.67,`
			`6.50,`
			`6.92,`
			`]`
			`D = np.zeros((12, 12))`
			`D[np.tril_indices(12, -1)] = lower`
			`D += D.T`

			`names = [`
			`"BEL",`
			`"BRA",`
			`"CHI",`
			`"CUB",`
			`"EGY",`
			`"FRA",`
			`"IND",`
			`"ISR",`
			`"USA",`
			`"USS",`
			`"YUG",`
			`"ZAI",`
			`]`

			`# Data from Figure 2`
			`labels1 = [1, 1, 2, 2, 1, 1, 2, 1, 1, 2, 2, 1]`
			`expected1 = {`
			`"USA": 0.43,`
			`"BEL": 0.39,`
			`"FRA": 0.35,`
			`"ISR": 0.30,`
			`"BRA": 0.22,`
			`"EGY": 0.20,`
			`"ZAI": 0.19,`
			`"CUB": 0.40,`
			`"USS": 0.34,`
			`"CHI": 0.33,`
			`"YUG": 0.26,`
			`"IND": -0.04,`
			`}`
			`score1 = 0.28`

			`# Data from Figure 3`
			`labels2 = [1, 2, 3, 3, 1, 1, 2, 1, 1, 3, 3, 2]`
			`expected2 = {`
			`"USA": 0.47,`
			`"FRA": 0.44,`
			`"BEL": 0.42,`
			`"ISR": 0.37,`
			`"EGY": 0.02,`
			`"ZAI": 0.28,`
			`"BRA": 0.25,`
			`"IND": 0.17,`
			`"CUB": 0.48,`
			`"USS": 0.44,`
			`"YUG": 0.31,`
			`"CHI": 0.31,`
			`}`
			`score2 = 0.33`

			`for labels, expected, score in [`
			`(labels1, expected1, score1),`
			`(labels2, expected2, score2),`
			`]:`
			`expected = [expected[name] for name in names]`
			`# we check to 2dp because that's what's in the paper`
			`pytest.approx(`
			`expected,`
			`silhouette_samples(D, np.array(labels), metric="precomputed"),`
			`abs=1e-2,`
			`)`
			`pytest.approx(`
			`score, silhouette_score(D, np.array(labels), metric="precomputed"), abs=1e-2`
			`)`


			`def test_correct_labelsize():`
			`# Assert 1 < n_labels < n_samples`
			`dataset = datasets.load_iris()`
			`X = dataset.data`

			`# n_labels = n_samples`
			`y = np.arange(X.shape[0])`
			`err_msg = (`
			`r"Number of labels is %d\. Valid values are 2 "`
			`r"to n_samples - 1 \(inclusive\)" % len(np.unique(y))`
			`)`
			`with pytest.raises(ValueError, match=err_msg):`
			`silhouette_score(X, y)`

			`# n_labels = 1`
			`y = np.zeros(X.shape[0])`
			`err_msg = (`
			`r"Number of labels is %d\. Valid values are 2 "`
			`r"to n_samples - 1 \(inclusive\)" % len(np.unique(y))`
			`)`
			`with pytest.raises(ValueError, match=err_msg):`
			`silhouette_score(X, y)`


			`def test_non_encoded_labels():`
			`dataset = datasets.load_iris()`
			`X = dataset.data`
			`labels = dataset.target`
			`assert silhouette_score(X, labels * 2 + 10) == silhouette_score(X, labels)`
			`assert_array_equal(`
			`silhouette_samples(X, labels * 2 + 10), silhouette_samples(X, labels)`
			`)`


			`def test_non_numpy_labels():`
			`dataset = datasets.load_iris()`
			`X = dataset.data`
			`y = dataset.target`
			`assert silhouette_score(list(X), list(y)) == silhouette_score(X, y)`


			`@pytest.mark.parametrize("dtype", (np.float32, np.float64))`
			`def test_silhouette_nonzero_diag(dtype):`
			`# Make sure silhouette_samples requires diagonal to be zero.`
			`# Non-regression test for #12178`

			`# Construct a zero-diagonal matrix`
			`dists = pairwise_distances(`
			`np.array([[0.2, 0.1, 0.12, 1.34, 1.11, 1.6]], dtype=dtype).T`
			`)`
			`labels = [0, 0, 0, 1, 1, 1]`

			`# small values on the diagonal are OK`
			`dists[2][2] = np.finfo(dists.dtype).eps * 10`
			`silhouette_samples(dists, labels, metric="precomputed")`

			`# values bigger than eps * 100 are not`
			`dists[2][2] = np.finfo(dists.dtype).eps * 1000`
			`with pytest.raises(ValueError, match="contains non-zero"):`
			`silhouette_samples(dists, labels, metric="precomputed")`


			`@pytest.mark.parametrize(`
			`"sparse_container",`
			`CSC_CONTAINERS + CSR_CONTAINERS + DOK_CONTAINERS + LIL_CONTAINERS,`
			`)`
			`def test_silhouette_samples_precomputed_sparse(sparse_container):`
			`"""Check that silhouette_samples works for sparse matrices correctly."""`
			`X = np.array([[0.2, 0.1, 0.1, 0.2, 0.1, 1.6, 0.2, 0.1]], dtype=np.float32).T`
			`y = [0, 0, 0, 0, 1, 1, 1, 1]`
			`pdist_dense = pairwise_distances(X)`
			`pdist_sparse = sparse_container(pdist_dense)`
			`assert issparse(pdist_sparse)`
			`output_with_sparse_input = silhouette_samples(pdist_sparse, y, metric="precomputed")`
			`output_with_dense_input = silhouette_samples(pdist_dense, y, metric="precomputed")`
			`assert_allclose(output_with_sparse_input, output_with_dense_input)`


			`@pytest.mark.parametrize(`
			`"sparse_container",`
			`CSC_CONTAINERS + CSR_CONTAINERS + DOK_CONTAINERS + LIL_CONTAINERS,`
			`)`
			`def test_silhouette_samples_euclidean_sparse(sparse_container):`
			`"""Check that silhouette_samples works for sparse matrices correctly."""`
			`X = np.array([[0.2, 0.1, 0.1, 0.2, 0.1, 1.6, 0.2, 0.1]], dtype=np.float32).T`
			`y = [0, 0, 0, 0, 1, 1, 1, 1]`
			`pdist_dense = pairwise_distances(X)`
			`pdist_sparse = sparse_container(pdist_dense)`
			`assert issparse(pdist_sparse)`
			`output_with_sparse_input = silhouette_samples(pdist_sparse, y)`
			`output_with_dense_input = silhouette_samples(pdist_dense, y)`
			`assert_allclose(output_with_sparse_input, output_with_dense_input)`


			`@pytest.mark.parametrize(`
			`"sparse_container", CSC_CONTAINERS + DOK_CONTAINERS + LIL_CONTAINERS`
			`)`
			`def test_silhouette_reduce(sparse_container):`
			"""Check for non-CSR input to private method `_silhouette_reduce`."""
			`X = np.array([[0.2, 0.1, 0.1, 0.2, 0.1, 1.6, 0.2, 0.1]], dtype=np.float32).T`
			`pdist_dense = pairwise_distances(X)`
			`pdist_sparse = sparse_container(pdist_dense)`
			`y = [0, 0, 0, 0, 1, 1, 1, 1]`
			`label_freqs = np.bincount(y)`
			`with pytest.raises(`
			`TypeError,`
			`match="Expected CSR matrix. Please pass sparse matrix in CSR format.",`
			`):`
			`_silhouette_reduce(pdist_sparse, start=0, labels=y, label_freqs=label_freqs)`


			`def assert_raises_on_only_one_label(func):`
			`"""Assert message when there is only one label"""`
			`rng = np.random.RandomState(seed=0)`
			`with pytest.raises(ValueError, match="Number of labels is"):`
			`func(rng.rand(10, 2), np.zeros(10))`


			`def assert_raises_on_all_points_same_cluster(func):`
			`"""Assert message when all point are in different clusters"""`
			`rng = np.random.RandomState(seed=0)`
			`with pytest.raises(ValueError, match="Number of labels is"):`
			`func(rng.rand(10, 2), np.arange(10))`


			`def test_calinski_harabasz_score():`
			`assert_raises_on_only_one_label(calinski_harabasz_score)`

			`assert_raises_on_all_points_same_cluster(calinski_harabasz_score)`

			`# Assert the value is 1. when all samples are equals`
			`assert 1.0 == calinski_harabasz_score(np.ones((10, 2)), [0] * 5 + [1] * 5)`

			`# Assert the value is 0. when all the mean cluster are equal`
			`assert 0.0 == calinski_harabasz_score([[-1, -1], [1, 1]] * 10, [0] * 10 + [1] * 10)`

			`# General case (with non numpy arrays)`
			`X = (`
			`[[0, 0], [1, 1]] * 5`
			`+ [[3, 3], [4, 4]] * 5`
			`+ [[0, 4], [1, 3]] * 5`
			`+ [[3, 1], [4, 0]] * 5`
			`)`
			`labels = [0] * 10 + [1] * 10 + [2] * 10 + [3] * 10`
			`pytest.approx(calinski_harabasz_score(X, labels), 45 * (40 - 4) / (5 * (4 - 1)))`


			`def test_davies_bouldin_score():`
			`assert_raises_on_only_one_label(davies_bouldin_score)`
			`assert_raises_on_all_points_same_cluster(davies_bouldin_score)`

			`# Assert the value is 0. when all samples are equals`
			`assert davies_bouldin_score(np.ones((10, 2)), [0] * 5 + [1] * 5) == pytest.approx(`
			`0.0`
			`)`

			`# Assert the value is 0. when all the mean cluster are equal`
			`assert davies_bouldin_score(`
			`[[-1, -1], [1, 1]] * 10, [0] * 10 + [1] * 10`
			`) == pytest.approx(0.0)`

			`# General case (with non numpy arrays)`
			`X = (`
			`[[0, 0], [1, 1]] * 5`
			`+ [[3, 3], [4, 4]] * 5`
			`+ [[0, 4], [1, 3]] * 5`
			`+ [[3, 1], [4, 0]] * 5`
			`)`
			`labels = [0] * 10 + [1] * 10 + [2] * 10 + [3] * 10`
			`pytest.approx(davies_bouldin_score(X, labels), 2 * np.sqrt(0.5) / 3)`

			`# Ensure divide by zero warning is not raised in general case`
			`with warnings.catch_warnings():`
			`warnings.simplefilter("error", RuntimeWarning)`
			`davies_bouldin_score(X, labels)`

			`# General case - cluster have one sample`
			`X = [[0, 0], [2, 2], [3, 3], [5, 5]]`
			`labels = [0, 0, 1, 2]`
			`pytest.approx(davies_bouldin_score(X, labels), (5.0 / 4) / 3)`


			`def test_silhouette_score_integer_precomputed():`
			`"""Check that silhouette_score works for precomputed metrics that are integers.`

			`Non-regression test for #22107.`
			`"""`
			`result = silhouette_score(`
			`[[0, 1, 2], [1, 0, 1], [2, 1, 0]], [0, 0, 1], metric="precomputed"`
			`)`
			`assert result == pytest.approx(1 / 6)`

			`# non-zero on diagonal for ints raises an error`
			`with pytest.raises(ValueError, match="contains non-zero"):`
			`silhouette_score(`
			`[[1, 1, 2], [1, 0, 1], [2, 1, 0]], [0, 0, 1], metric="precomputed"`
			`)`