ai-content-maker/.venv/Lib/site-packages/sklearn/impute/_knn.py

# Authors: Ashim Bhattarai <ashimb9@gmail.com>
#          Thomas J Fan <thomasjpfan@gmail.com>
# License: BSD 3 clause

from numbers import Integral

import numpy as np

from ..base import _fit_context
from ..metrics import pairwise_distances_chunked
from ..metrics.pairwise import _NAN_METRICS
from ..neighbors._base import _get_weights
from ..utils import is_scalar_nan
from ..utils._mask import _get_mask
from ..utils._param_validation import Hidden, Interval, StrOptions
from ..utils.validation import FLOAT_DTYPES, _check_feature_names_in, check_is_fitted
from ._base import _BaseImputer


class KNNImputer(_BaseImputer):
    """Imputation for completing missing values using k-Nearest Neighbors.

    Each sample's missing values are imputed using the mean value from
    `n_neighbors` nearest neighbors found in the training set. Two samples are
    close if the features that neither is missing are close.

    Read more in the :ref:`User Guide <knnimpute>`.

    .. versionadded:: 0.22

    Parameters
    ----------
    missing_values : int, float, str, np.nan or None, default=np.nan
        The placeholder for the missing values. All occurrences of
        `missing_values` will be imputed. For pandas' dataframes with
        nullable integer dtypes with missing values, `missing_values`
        should be set to np.nan, since `pd.NA` will be converted to np.nan.

    n_neighbors : int, default=5
        Number of neighboring samples to use for imputation.

    weights : {'uniform', 'distance'} or callable, default='uniform'
        Weight function used in prediction.  Possible values:

        - 'uniform' : uniform weights. All points in each neighborhood are
          weighted equally.
        - 'distance' : weight points by the inverse of their distance.
          in this case, closer neighbors of a query point will have a
          greater influence than neighbors which are further away.
        - callable : a user-defined function which accepts an
          array of distances, and returns an array of the same shape
          containing the weights.

    metric : {'nan_euclidean'} or callable, default='nan_euclidean'
        Distance metric for searching neighbors. Possible values:

        - 'nan_euclidean'
        - callable : a user-defined function which conforms to the definition
          of ``_pairwise_callable(X, Y, metric, **kwds)``. The function
          accepts two arrays, X and Y, and a `missing_values` keyword in
          `kwds` and returns a scalar distance value.

    copy : bool, default=True
        If True, a copy of X will be created. If False, imputation will
        be done in-place whenever possible.

    add_indicator : bool, default=False
        If True, a :class:`MissingIndicator` transform will stack onto the
        output of the imputer's transform. This allows a predictive estimator
        to account for missingness despite imputation. If a feature has no
        missing values at fit/train time, the feature won't appear on the
        missing indicator even if there are missing values at transform/test
        time.

    keep_empty_features : bool, default=False
        If True, features that consist exclusively of missing values when
        `fit` is called are returned in results when `transform` is called.
        The imputed value is always `0`.

        .. versionadded:: 1.2

    Attributes
    ----------
    indicator_ : :class:`~sklearn.impute.MissingIndicator`
        Indicator used to add binary indicators for missing values.
        ``None`` if add_indicator is False.

    n_features_in_ : int
        Number of features seen during :term:`fit`.

        .. versionadded:: 0.24

    feature_names_in_ : ndarray of shape (`n_features_in_`,)
        Names of features seen during :term:`fit`. Defined only when `X`
        has feature names that are all strings.

        .. versionadded:: 1.0

    See Also
    --------
    SimpleImputer : Univariate imputer for completing missing values
        with simple strategies.
    IterativeImputer : Multivariate imputer that estimates values to impute for
        each feature with missing values from all the others.

    References
    ----------
    * `Olga Troyanskaya, Michael Cantor, Gavin Sherlock, Pat Brown, Trevor
      Hastie, Robert Tibshirani, David Botstein and Russ B. Altman, Missing
      value estimation methods for DNA microarrays, BIOINFORMATICS Vol. 17
      no. 6, 2001 Pages 520-525.
      <https://academic.oup.com/bioinformatics/article/17/6/520/272365>`_

    Examples
    --------
    >>> import numpy as np
    >>> from sklearn.impute import KNNImputer
    >>> X = [[1, 2, np.nan], [3, 4, 3], [np.nan, 6, 5], [8, 8, 7]]
    >>> imputer = KNNImputer(n_neighbors=2)
    >>> imputer.fit_transform(X)
    array([[1. , 2. , 4. ],
           [3. , 4. , 3. ],
           [5.5, 6. , 5. ],
           [8. , 8. , 7. ]])

    For a more detailed example see
    :ref:`sphx_glr_auto_examples_impute_plot_missing_values.py`.
    """

    _parameter_constraints: dict = {
        **_BaseImputer._parameter_constraints,
        "n_neighbors": [Interval(Integral, 1, None, closed="left")],
        "weights": [StrOptions({"uniform", "distance"}), callable, Hidden(None)],
        "metric": [StrOptions(set(_NAN_METRICS)), callable],
        "copy": ["boolean"],
    }

    def __init__(
        self,
        *,
        missing_values=np.nan,
        n_neighbors=5,
        weights="uniform",
        metric="nan_euclidean",
        copy=True,
        add_indicator=False,
        keep_empty_features=False,
    ):
        super().__init__(
            missing_values=missing_values,
            add_indicator=add_indicator,
            keep_empty_features=keep_empty_features,
        )
        self.n_neighbors = n_neighbors
        self.weights = weights
        self.metric = metric
        self.copy = copy

    def _calc_impute(self, dist_pot_donors, n_neighbors, fit_X_col, mask_fit_X_col):
        """Helper function to impute a single column.

        Parameters
        ----------
        dist_pot_donors : ndarray of shape (n_receivers, n_potential_donors)
            Distance matrix between the receivers and potential donors from
            training set. There must be at least one non-nan distance between
            a receiver and a potential donor.

        n_neighbors : int
            Number of neighbors to consider.

        fit_X_col : ndarray of shape (n_potential_donors,)
            Column of potential donors from training set.

        mask_fit_X_col : ndarray of shape (n_potential_donors,)
            Missing mask for fit_X_col.

        Returns
        -------
        imputed_values: ndarray of shape (n_receivers,)
            Imputed values for receiver.
        """
        # Get donors
        donors_idx = np.argpartition(dist_pot_donors, n_neighbors - 1, axis=1)[
            :, :n_neighbors
        ]

        # Get weight matrix from distance matrix
        donors_dist = dist_pot_donors[
            np.arange(donors_idx.shape[0])[:, None], donors_idx
        ]

        weight_matrix = _get_weights(donors_dist, self.weights)

        # fill nans with zeros
        if weight_matrix is not None:
            weight_matrix[np.isnan(weight_matrix)] = 0.0

        # Retrieve donor values and calculate kNN average
        donors = fit_X_col.take(donors_idx)
        donors_mask = mask_fit_X_col.take(donors_idx)
        donors = np.ma.array(donors, mask=donors_mask)

        return np.ma.average(donors, axis=1, weights=weight_matrix).data

    @_fit_context(prefer_skip_nested_validation=True)
    def fit(self, X, y=None):
        """Fit the imputer on X.

        Parameters
        ----------
        X : array-like shape of (n_samples, n_features)
            Input data, where `n_samples` is the number of samples and
            `n_features` is the number of features.

        y : Ignored
            Not used, present here for API consistency by convention.

        Returns
        -------
        self : object
            The fitted `KNNImputer` class instance.
        """
        # Check data integrity and calling arguments
        if not is_scalar_nan(self.missing_values):
            force_all_finite = True
        else:
            force_all_finite = "allow-nan"

        X = self._validate_data(
            X,
            accept_sparse=False,
            dtype=FLOAT_DTYPES,
            force_all_finite=force_all_finite,
            copy=self.copy,
        )

        self._fit_X = X
        self._mask_fit_X = _get_mask(self._fit_X, self.missing_values)
        self._valid_mask = ~np.all(self._mask_fit_X, axis=0)

        super()._fit_indicator(self._mask_fit_X)

        return self

    def transform(self, X):
        """Impute all missing values in X.

        Parameters
        ----------
        X : array-like of shape (n_samples, n_features)
            The input data to complete.

        Returns
        -------
        X : array-like of shape (n_samples, n_output_features)
            The imputed dataset. `n_output_features` is the number of features
            that is not always missing during `fit`.
        """

        check_is_fitted(self)
        if not is_scalar_nan(self.missing_values):
            force_all_finite = True
        else:
            force_all_finite = "allow-nan"
        X = self._validate_data(
            X,
            accept_sparse=False,
            dtype=FLOAT_DTYPES,
            force_all_finite=force_all_finite,
            copy=self.copy,
            reset=False,
        )

        mask = _get_mask(X, self.missing_values)
        mask_fit_X = self._mask_fit_X
        valid_mask = self._valid_mask

        X_indicator = super()._transform_indicator(mask)

        # Removes columns where the training data is all nan
        if not np.any(mask):
            # No missing values in X
            if self.keep_empty_features:
                Xc = X
                Xc[:, ~valid_mask] = 0
            else:
                Xc = X[:, valid_mask]

            # Even if there are no missing values in X, we still concatenate Xc
            # with the missing value indicator matrix, X_indicator.
            # This is to ensure that the output maintains consistency in terms
            # of columns, regardless of whether missing values exist in X or not.
            return super()._concatenate_indicator(Xc, X_indicator)

        row_missing_idx = np.flatnonzero(mask.any(axis=1))

        non_missing_fix_X = np.logical_not(mask_fit_X)

        # Maps from indices from X to indices in dist matrix
        dist_idx_map = np.zeros(X.shape[0], dtype=int)
        dist_idx_map[row_missing_idx] = np.arange(row_missing_idx.shape[0])

        def process_chunk(dist_chunk, start):
            row_missing_chunk = row_missing_idx[start : start + len(dist_chunk)]

            # Find and impute missing by column
            for col in range(X.shape[1]):
                if not valid_mask[col]:
                    # column was all missing during training
                    continue

                col_mask = mask[row_missing_chunk, col]
                if not np.any(col_mask):
                    # column has no missing values
                    continue

                (potential_donors_idx,) = np.nonzero(non_missing_fix_X[:, col])

                # receivers_idx are indices in X
                receivers_idx = row_missing_chunk[np.flatnonzero(col_mask)]

                # distances for samples that needed imputation for column
                dist_subset = dist_chunk[dist_idx_map[receivers_idx] - start][
                    :, potential_donors_idx
                ]

                # receivers with all nan distances impute with mean
                all_nan_dist_mask = np.isnan(dist_subset).all(axis=1)
                all_nan_receivers_idx = receivers_idx[all_nan_dist_mask]

                if all_nan_receivers_idx.size:
                    col_mean = np.ma.array(
                        self._fit_X[:, col], mask=mask_fit_X[:, col]
                    ).mean()
                    X[all_nan_receivers_idx, col] = col_mean

                    if len(all_nan_receivers_idx) == len(receivers_idx):
                        # all receivers imputed with mean
                        continue

                    # receivers with at least one defined distance
                    receivers_idx = receivers_idx[~all_nan_dist_mask]
                    dist_subset = dist_chunk[dist_idx_map[receivers_idx] - start][
                        :, potential_donors_idx
                    ]

                n_neighbors = min(self.n_neighbors, len(potential_donors_idx))
                value = self._calc_impute(
                    dist_subset,
                    n_neighbors,
                    self._fit_X[potential_donors_idx, col],
                    mask_fit_X[potential_donors_idx, col],
                )
                X[receivers_idx, col] = value

        # process in fixed-memory chunks
        gen = pairwise_distances_chunked(
            X[row_missing_idx, :],
            self._fit_X,
            metric=self.metric,
            missing_values=self.missing_values,
            force_all_finite=force_all_finite,
            reduce_func=process_chunk,
        )
        for chunk in gen:
            # process_chunk modifies X in place. No return value.
            pass

        if self.keep_empty_features:
            Xc = X
            Xc[:, ~valid_mask] = 0
        else:
            Xc = X[:, valid_mask]

        return super()._concatenate_indicator(Xc, X_indicator)

    def get_feature_names_out(self, input_features=None):
        """Get output feature names for transformation.

        Parameters
        ----------
        input_features : array-like of str or None, default=None
            Input features.

            - If `input_features` is `None`, then `feature_names_in_` is
              used as feature names in. If `feature_names_in_` is not defined,
              then the following input feature names are generated:
              `["x0", "x1", ..., "x(n_features_in_ - 1)"]`.
            - If `input_features` is an array-like, then `input_features` must
              match `feature_names_in_` if `feature_names_in_` is defined.

        Returns
        -------
        feature_names_out : ndarray of str objects
            Transformed feature names.
        """
        check_is_fitted(self, "n_features_in_")
        input_features = _check_feature_names_in(self, input_features)
        names = input_features[self._valid_mask]
        return self._concatenate_indicator_feature_names_out(names, input_features)
first commit 2024-05-03 04:18:51 +03:00			`# Authors: Ashim Bhattarai <ashimb9@gmail.com>`
			`# Thomas J Fan <thomasjpfan@gmail.com>`
			`# License: BSD 3 clause`

			`from numbers import Integral`

			`import numpy as np`

			`from ..base import _fit_context`
			`from ..metrics import pairwise_distances_chunked`
			`from ..metrics.pairwise import _NAN_METRICS`
			`from ..neighbors._base import _get_weights`
			`from ..utils import is_scalar_nan`
			`from ..utils._mask import _get_mask`
			`from ..utils._param_validation import Hidden, Interval, StrOptions`
			`from ..utils.validation import FLOAT_DTYPES, _check_feature_names_in, check_is_fitted`
			`from ._base import _BaseImputer`


			`class KNNImputer(_BaseImputer):`
			`"""Imputation for completing missing values using k-Nearest Neighbors.`

			`Each sample's missing values are imputed using the mean value from`
			`n_neighbors` nearest neighbors found in the training set. Two samples are
			`close if the features that neither is missing are close.`

			Read more in the :ref:`User Guide <knnimpute>`.

			`.. versionadded:: 0.22`

			`Parameters`
			`----------`
			`missing_values : int, float, str, np.nan or None, default=np.nan`
			`The placeholder for the missing values. All occurrences of`
			`missing_values` will be imputed. For pandas' dataframes with
			nullable integer dtypes with missing values, `missing_values`
			should be set to np.nan, since `pd.NA` will be converted to np.nan.

			`n_neighbors : int, default=5`
			`Number of neighboring samples to use for imputation.`

			`weights : {'uniform', 'distance'} or callable, default='uniform'`
			`Weight function used in prediction. Possible values:`

			`- 'uniform' : uniform weights. All points in each neighborhood are`
			`weighted equally.`
			`- 'distance' : weight points by the inverse of their distance.`
			`in this case, closer neighbors of a query point will have a`
			`greater influence than neighbors which are further away.`
			`- callable : a user-defined function which accepts an`
			`array of distances, and returns an array of the same shape`
			`containing the weights.`

			`metric : {'nan_euclidean'} or callable, default='nan_euclidean'`
			`Distance metric for searching neighbors. Possible values:`

			`- 'nan_euclidean'`
			`- callable : a user-defined function which conforms to the definition`
			of ``_pairwise_callable(X, Y, metric, **kwds)``. The function
			accepts two arrays, X and Y, and a `missing_values` keyword in
			`kwds` and returns a scalar distance value.

			`copy : bool, default=True`
			`If True, a copy of X will be created. If False, imputation will`
			`be done in-place whenever possible.`

			`add_indicator : bool, default=False`
			If True, a :class:`MissingIndicator` transform will stack onto the
			`output of the imputer's transform. This allows a predictive estimator`
			`to account for missingness despite imputation. If a feature has no`
			`missing values at fit/train time, the feature won't appear on the`
			`missing indicator even if there are missing values at transform/test`
			`time.`

			`keep_empty_features : bool, default=False`
			`If True, features that consist exclusively of missing values when`
			`fit` is called are returned in results when `transform` is called.
			The imputed value is always `0`.

			`.. versionadded:: 1.2`

			`Attributes`
			`----------`
			indicator_ : :class:`~sklearn.impute.MissingIndicator`
			`Indicator used to add binary indicators for missing values.`
			``None`` if add_indicator is False.

			`n_features_in_ : int`
			Number of features seen during :term:`fit`.

			`.. versionadded:: 0.24`

			feature_names_in_ : ndarray of shape (`n_features_in_`,)
			Names of features seen during :term:`fit`. Defined only when `X`
			`has feature names that are all strings.`

			`.. versionadded:: 1.0`

			`See Also`
			`--------`
			`SimpleImputer : Univariate imputer for completing missing values`
			`with simple strategies.`
			`IterativeImputer : Multivariate imputer that estimates values to impute for`
			`each feature with missing values from all the others.`

			`References`
			`----------`
			* `Olga Troyanskaya, Michael Cantor, Gavin Sherlock, Pat Brown, Trevor
			`Hastie, Robert Tibshirani, David Botstein and Russ B. Altman, Missing`
			`value estimation methods for DNA microarrays, BIOINFORMATICS Vol. 17`
			`no. 6, 2001 Pages 520-525.`
			<https://academic.oup.com/bioinformatics/article/17/6/520/272365>`_

			`Examples`
			`--------`
			`>>> import numpy as np`
			`>>> from sklearn.impute import KNNImputer`
			`>>> X = [[1, 2, np.nan], [3, 4, 3], [np.nan, 6, 5], [8, 8, 7]]`
			`>>> imputer = KNNImputer(n_neighbors=2)`
			`>>> imputer.fit_transform(X)`
			`array([[1. , 2. , 4. ],`
			`[3. , 4. , 3. ],`
			`[5.5, 6. , 5. ],`
			`[8. , 8. , 7. ]])`

			`For a more detailed example see`
			:ref:`sphx_glr_auto_examples_impute_plot_missing_values.py`.
			`"""`

			`_parameter_constraints: dict = {`
			`**_BaseImputer._parameter_constraints,`
			`"n_neighbors": [Interval(Integral, 1, None, closed="left")],`
			`"weights": [StrOptions({"uniform", "distance"}), callable, Hidden(None)],`
			`"metric": [StrOptions(set(_NAN_METRICS)), callable],`
			`"copy": ["boolean"],`
			`}`

			`def __init__(`
			`self,`
			`*,`
			`missing_values=np.nan,`
			`n_neighbors=5,`
			`weights="uniform",`
			`metric="nan_euclidean",`
			`copy=True,`
			`add_indicator=False,`
			`keep_empty_features=False,`
			`):`
			`super().__init__(`
			`missing_values=missing_values,`
			`add_indicator=add_indicator,`
			`keep_empty_features=keep_empty_features,`
			`)`
			`self.n_neighbors = n_neighbors`
			`self.weights = weights`
			`self.metric = metric`
			`self.copy = copy`

			`def _calc_impute(self, dist_pot_donors, n_neighbors, fit_X_col, mask_fit_X_col):`
			`"""Helper function to impute a single column.`

			`Parameters`
			`----------`
			`dist_pot_donors : ndarray of shape (n_receivers, n_potential_donors)`
			`Distance matrix between the receivers and potential donors from`
			`training set. There must be at least one non-nan distance between`
			`a receiver and a potential donor.`

			`n_neighbors : int`
			`Number of neighbors to consider.`

			`fit_X_col : ndarray of shape (n_potential_donors,)`
			`Column of potential donors from training set.`

			`mask_fit_X_col : ndarray of shape (n_potential_donors,)`
			`Missing mask for fit_X_col.`

			`Returns`
			`-------`
			`imputed_values: ndarray of shape (n_receivers,)`
			`Imputed values for receiver.`
			`"""`
			`# Get donors`
			`donors_idx = np.argpartition(dist_pot_donors, n_neighbors - 1, axis=1)[`
			`:, :n_neighbors`
			`]`

			`# Get weight matrix from distance matrix`
			`donors_dist = dist_pot_donors[`
			`np.arange(donors_idx.shape[0])[:, None], donors_idx`
			`]`

			`weight_matrix = _get_weights(donors_dist, self.weights)`

			`# fill nans with zeros`
			`if weight_matrix is not None:`
			`weight_matrix[np.isnan(weight_matrix)] = 0.0`

			`# Retrieve donor values and calculate kNN average`
			`donors = fit_X_col.take(donors_idx)`
			`donors_mask = mask_fit_X_col.take(donors_idx)`
			`donors = np.ma.array(donors, mask=donors_mask)`

			`return np.ma.average(donors, axis=1, weights=weight_matrix).data`

			`@_fit_context(prefer_skip_nested_validation=True)`
			`def fit(self, X, y=None):`
			`"""Fit the imputer on X.`

			`Parameters`
			`----------`
			`X : array-like shape of (n_samples, n_features)`
			Input data, where `n_samples` is the number of samples and
			`n_features` is the number of features.

			`y : Ignored`
			`Not used, present here for API consistency by convention.`

			`Returns`
			`-------`
			`self : object`
			The fitted `KNNImputer` class instance.
			`"""`
			`# Check data integrity and calling arguments`
			`if not is_scalar_nan(self.missing_values):`
			`force_all_finite = True`
			`else:`
			`force_all_finite = "allow-nan"`

			`X = self._validate_data(`
			`X,`
			`accept_sparse=False,`
			`dtype=FLOAT_DTYPES,`
			`force_all_finite=force_all_finite,`
			`copy=self.copy,`
			`)`

			`self._fit_X = X`
			`self._mask_fit_X = _get_mask(self._fit_X, self.missing_values)`
			`self._valid_mask = ~np.all(self._mask_fit_X, axis=0)`

			`super()._fit_indicator(self._mask_fit_X)`

			`return self`

			`def transform(self, X):`
			`"""Impute all missing values in X.`

			`Parameters`
			`----------`
			`X : array-like of shape (n_samples, n_features)`
			`The input data to complete.`

			`Returns`
			`-------`
			`X : array-like of shape (n_samples, n_output_features)`
			The imputed dataset. `n_output_features` is the number of features
			that is not always missing during `fit`.
			`"""`

			`check_is_fitted(self)`
			`if not is_scalar_nan(self.missing_values):`
			`force_all_finite = True`
			`else:`
			`force_all_finite = "allow-nan"`
			`X = self._validate_data(`
			`X,`
			`accept_sparse=False,`
			`dtype=FLOAT_DTYPES,`
			`force_all_finite=force_all_finite,`
			`copy=self.copy,`
			`reset=False,`
			`)`

			`mask = _get_mask(X, self.missing_values)`
			`mask_fit_X = self._mask_fit_X`
			`valid_mask = self._valid_mask`

			`X_indicator = super()._transform_indicator(mask)`

			`# Removes columns where the training data is all nan`
			`if not np.any(mask):`
			`# No missing values in X`
			`if self.keep_empty_features:`
			`Xc = X`
			`Xc[:, ~valid_mask] = 0`
			`else:`
			`Xc = X[:, valid_mask]`

			`# Even if there are no missing values in X, we still concatenate Xc`
			`# with the missing value indicator matrix, X_indicator.`
			`# This is to ensure that the output maintains consistency in terms`
			`# of columns, regardless of whether missing values exist in X or not.`
			`return super()._concatenate_indicator(Xc, X_indicator)`

			`row_missing_idx = np.flatnonzero(mask.any(axis=1))`

			`non_missing_fix_X = np.logical_not(mask_fit_X)`

			`# Maps from indices from X to indices in dist matrix`
			`dist_idx_map = np.zeros(X.shape[0], dtype=int)`
			`dist_idx_map[row_missing_idx] = np.arange(row_missing_idx.shape[0])`

			`def process_chunk(dist_chunk, start):`
			`row_missing_chunk = row_missing_idx[start : start + len(dist_chunk)]`

			`# Find and impute missing by column`
			`for col in range(X.shape[1]):`
			`if not valid_mask[col]:`
			`# column was all missing during training`
			`continue`

			`col_mask = mask[row_missing_chunk, col]`
			`if not np.any(col_mask):`
			`# column has no missing values`
			`continue`

			`(potential_donors_idx,) = np.nonzero(non_missing_fix_X[:, col])`

			`# receivers_idx are indices in X`
			`receivers_idx = row_missing_chunk[np.flatnonzero(col_mask)]`

			`# distances for samples that needed imputation for column`
			`dist_subset = dist_chunk[dist_idx_map[receivers_idx] - start][`
			`:, potential_donors_idx`
			`]`

			`# receivers with all nan distances impute with mean`
			`all_nan_dist_mask = np.isnan(dist_subset).all(axis=1)`
			`all_nan_receivers_idx = receivers_idx[all_nan_dist_mask]`

			`if all_nan_receivers_idx.size:`
			`col_mean = np.ma.array(`
			`self._fit_X[:, col], mask=mask_fit_X[:, col]`
			`).mean()`
			`X[all_nan_receivers_idx, col] = col_mean`

			`if len(all_nan_receivers_idx) == len(receivers_idx):`
			`# all receivers imputed with mean`
			`continue`

			`# receivers with at least one defined distance`
			`receivers_idx = receivers_idx[~all_nan_dist_mask]`
			`dist_subset = dist_chunk[dist_idx_map[receivers_idx] - start][`
			`:, potential_donors_idx`
			`]`

			`n_neighbors = min(self.n_neighbors, len(potential_donors_idx))`
			`value = self._calc_impute(`
			`dist_subset,`
			`n_neighbors,`
			`self._fit_X[potential_donors_idx, col],`
			`mask_fit_X[potential_donors_idx, col],`
			`)`
			`X[receivers_idx, col] = value`

			`# process in fixed-memory chunks`
			`gen = pairwise_distances_chunked(`
			`X[row_missing_idx, :],`
			`self._fit_X,`
			`metric=self.metric,`
			`missing_values=self.missing_values,`
			`force_all_finite=force_all_finite,`
			`reduce_func=process_chunk,`
			`)`
			`for chunk in gen:`
			`# process_chunk modifies X in place. No return value.`
			`pass`

			`if self.keep_empty_features:`
			`Xc = X`
			`Xc[:, ~valid_mask] = 0`
			`else:`
			`Xc = X[:, valid_mask]`

			`return super()._concatenate_indicator(Xc, X_indicator)`

			`def get_feature_names_out(self, input_features=None):`
			`"""Get output feature names for transformation.`

			`Parameters`
			`----------`
			`input_features : array-like of str or None, default=None`
			`Input features.`

			- If `input_features` is `None`, then `feature_names_in_` is
			used as feature names in. If `feature_names_in_` is not defined,
			`then the following input feature names are generated:`
			`["x0", "x1", ..., "x(n_features_in_ - 1)"]`.
			- If `input_features` is an array-like, then `input_features` must
			match `feature_names_in_` if `feature_names_in_` is defined.

			`Returns`
			`-------`
			`feature_names_out : ndarray of str objects`
			`Transformed feature names.`
			`"""`
			`check_is_fitted(self, "n_features_in_")`
			`input_features = _check_feature_names_in(self, input_features)`
			`names = input_features[self._valid_mask]`
			`return self._concatenate_indicator_feature_names_out(names, input_features)`