ai-content-maker/.venv/Lib/site-packages/torch/nn/modules/normalization.py

import torch
import numbers
from torch.nn.parameter import Parameter
from .module import Module
from ._functions import CrossMapLRN2d as _cross_map_lrn2d
from .. import functional as F
from .. import init

from torch import Tensor, Size
from typing import Union, List, Tuple

__all__ = ['LocalResponseNorm', 'CrossMapLRN2d', 'LayerNorm', 'GroupNorm']

class LocalResponseNorm(Module):
    r"""Applies local response normalization over an input signal.

    The input signal is composed of several input planes, where channels occupy the second dimension.
    Applies normalization across channels.

    .. math::
        b_{c} = a_{c}\left(k + \frac{\alpha}{n}
        \sum_{c'=\max(0, c-n/2)}^{\min(N-1,c+n/2)}a_{c'}^2\right)^{-\beta}

    Args:
        size: amount of neighbouring channels used for normalization
        alpha: multiplicative factor. Default: 0.0001
        beta: exponent. Default: 0.75
        k: additive factor. Default: 1

    Shape:
        - Input: :math:`(N, C, *)`
        - Output: :math:`(N, C, *)` (same shape as input)

    Examples::

        >>> lrn = nn.LocalResponseNorm(2)
        >>> signal_2d = torch.randn(32, 5, 24, 24)
        >>> signal_4d = torch.randn(16, 5, 7, 7, 7, 7)
        >>> output_2d = lrn(signal_2d)
        >>> output_4d = lrn(signal_4d)

    """

    __constants__ = ['size', 'alpha', 'beta', 'k']
    size: int
    alpha: float
    beta: float
    k: float

    def __init__(self, size: int, alpha: float = 1e-4, beta: float = 0.75, k: float = 1.) -> None:
        super().__init__()
        self.size = size
        self.alpha = alpha
        self.beta = beta
        self.k = k

    def forward(self, input: Tensor) -> Tensor:
        return F.local_response_norm(input, self.size, self.alpha, self.beta,
                                     self.k)

    def extra_repr(self):
        return '{size}, alpha={alpha}, beta={beta}, k={k}'.format(**self.__dict__)


class CrossMapLRN2d(Module):
    size: int
    alpha: float
    beta: float
    k: float

    def __init__(self, size: int, alpha: float = 1e-4, beta: float = 0.75, k: float = 1) -> None:
        super().__init__()
        self.size = size
        self.alpha = alpha
        self.beta = beta
        self.k = k

    def forward(self, input: Tensor) -> Tensor:
        return _cross_map_lrn2d.apply(input, self.size, self.alpha, self.beta,
                                      self.k)

    def extra_repr(self) -> str:
        return '{size}, alpha={alpha}, beta={beta}, k={k}'.format(**self.__dict__)


_shape_t = Union[int, List[int], Size]


class LayerNorm(Module):
    r"""Applies Layer Normalization over a mini-batch of inputs.

    This layer implements the operation as described in
    the paper `Layer Normalization <https://arxiv.org/abs/1607.06450>`__

    .. math::
        y = \frac{x - \mathrm{E}[x]}{ \sqrt{\mathrm{Var}[x] + \epsilon}} * \gamma + \beta

    The mean and standard-deviation are calculated over the last `D` dimensions, where `D`
    is the dimension of :attr:`normalized_shape`. For example, if :attr:`normalized_shape`
    is ``(3, 5)`` (a 2-dimensional shape), the mean and standard-deviation are computed over
    the last 2 dimensions of the input (i.e. ``input.mean((-2, -1))``).
    :math:`\gamma` and :math:`\beta` are learnable affine transform parameters of
    :attr:`normalized_shape` if :attr:`elementwise_affine` is ``True``.
    The standard-deviation is calculated via the biased estimator, equivalent to
    `torch.var(input, unbiased=False)`.

    .. note::
        Unlike Batch Normalization and Instance Normalization, which applies
        scalar scale and bias for each entire channel/plane with the
        :attr:`affine` option, Layer Normalization applies per-element scale and
        bias with :attr:`elementwise_affine`.

    This layer uses statistics computed from input data in both training and
    evaluation modes.

    Args:
        normalized_shape (int or list or torch.Size): input shape from an expected input
            of size

            .. math::
                [* \times \text{normalized\_shape}[0] \times \text{normalized\_shape}[1]
                    \times \ldots \times \text{normalized\_shape}[-1]]

            If a single integer is used, it is treated as a singleton list, and this module will
            normalize over the last dimension which is expected to be of that specific size.
        eps: a value added to the denominator for numerical stability. Default: 1e-5
        elementwise_affine: a boolean value that when set to ``True``, this module
            has learnable per-element affine parameters initialized to ones (for weights)
            and zeros (for biases). Default: ``True``.
        bias: If set to ``False``, the layer will not learn an additive bias (only relevant if
            :attr:`elementwise_affine` is ``True``). Default: ``True``.

    Attributes:
        weight: the learnable weights of the module of shape
            :math:`\text{normalized\_shape}` when :attr:`elementwise_affine` is set to ``True``.
            The values are initialized to 1.
        bias:   the learnable bias of the module of shape
                :math:`\text{normalized\_shape}` when :attr:`elementwise_affine` is set to ``True``.
                The values are initialized to 0.

    Shape:
        - Input: :math:`(N, *)`
        - Output: :math:`(N, *)` (same shape as input)

    Examples::

        >>> # NLP Example
        >>> batch, sentence_length, embedding_dim = 20, 5, 10
        >>> embedding = torch.randn(batch, sentence_length, embedding_dim)
        >>> layer_norm = nn.LayerNorm(embedding_dim)
        >>> # Activate module
        >>> layer_norm(embedding)
        >>>
        >>> # Image Example
        >>> N, C, H, W = 20, 5, 10, 10
        >>> input = torch.randn(N, C, H, W)
        >>> # Normalize over the last three dimensions (i.e. the channel and spatial dimensions)
        >>> # as shown in the image below
        >>> layer_norm = nn.LayerNorm([C, H, W])
        >>> output = layer_norm(input)

    .. image:: ../_static/img/nn/layer_norm.jpg
        :scale: 50 %

    """

    __constants__ = ['normalized_shape', 'eps', 'elementwise_affine']
    normalized_shape: Tuple[int, ...]
    eps: float
    elementwise_affine: bool

    def __init__(self, normalized_shape: _shape_t, eps: float = 1e-5, elementwise_affine: bool = True,
                 bias: bool = True, device=None, dtype=None) -> None:
        factory_kwargs = {'device': device, 'dtype': dtype}
        super().__init__()
        if isinstance(normalized_shape, numbers.Integral):
            # mypy error: incompatible types in assignment
            normalized_shape = (normalized_shape,)  # type: ignore[assignment]
        self.normalized_shape = tuple(normalized_shape)  # type: ignore[arg-type]
        self.eps = eps
        self.elementwise_affine = elementwise_affine
        if self.elementwise_affine:
            self.weight = Parameter(torch.empty(self.normalized_shape, **factory_kwargs))
            if bias:
                self.bias = Parameter(torch.empty(self.normalized_shape, **factory_kwargs))
            else:
                self.register_parameter('bias', None)
        else:
            self.register_parameter('weight', None)
            self.register_parameter('bias', None)

        self.reset_parameters()

    def reset_parameters(self) -> None:
        if self.elementwise_affine:
            init.ones_(self.weight)
            if self.bias is not None:
                init.zeros_(self.bias)

    def forward(self, input: Tensor) -> Tensor:
        return F.layer_norm(
            input, self.normalized_shape, self.weight, self.bias, self.eps)

    def extra_repr(self) -> str:
        return '{normalized_shape}, eps={eps}, ' \
            'elementwise_affine={elementwise_affine}'.format(**self.__dict__)


class GroupNorm(Module):
    r"""Applies Group Normalization over a mini-batch of inputs.

    This layer implements the operation as described in
    the paper `Group Normalization <https://arxiv.org/abs/1803.08494>`__

    .. math::
        y = \frac{x - \mathrm{E}[x]}{ \sqrt{\mathrm{Var}[x] + \epsilon}} * \gamma + \beta

    The input channels are separated into :attr:`num_groups` groups, each containing
    ``num_channels / num_groups`` channels. :attr:`num_channels` must be divisible by
    :attr:`num_groups`. The mean and standard-deviation are calculated
    separately over the each group. :math:`\gamma` and :math:`\beta` are learnable
    per-channel affine transform parameter vectors of size :attr:`num_channels` if
    :attr:`affine` is ``True``.
    The standard-deviation is calculated via the biased estimator, equivalent to
    `torch.var(input, unbiased=False)`.

    This layer uses statistics computed from input data in both training and
    evaluation modes.

    Args:
        num_groups (int): number of groups to separate the channels into
        num_channels (int): number of channels expected in input
        eps: a value added to the denominator for numerical stability. Default: 1e-5
        affine: a boolean value that when set to ``True``, this module
            has learnable per-channel affine parameters initialized to ones (for weights)
            and zeros (for biases). Default: ``True``.

    Shape:
        - Input: :math:`(N, C, *)` where :math:`C=\text{num\_channels}`
        - Output: :math:`(N, C, *)` (same shape as input)

    Examples::

        >>> input = torch.randn(20, 6, 10, 10)
        >>> # Separate 6 channels into 3 groups
        >>> m = nn.GroupNorm(3, 6)
        >>> # Separate 6 channels into 6 groups (equivalent with InstanceNorm)
        >>> m = nn.GroupNorm(6, 6)
        >>> # Put all 6 channels into a single group (equivalent with LayerNorm)
        >>> m = nn.GroupNorm(1, 6)
        >>> # Activating the module
        >>> output = m(input)
    """

    __constants__ = ['num_groups', 'num_channels', 'eps', 'affine']
    num_groups: int
    num_channels: int
    eps: float
    affine: bool

    def __init__(self, num_groups: int, num_channels: int, eps: float = 1e-5, affine: bool = True,
                 device=None, dtype=None) -> None:
        factory_kwargs = {'device': device, 'dtype': dtype}
        super().__init__()
        if num_channels % num_groups != 0:
            raise ValueError('num_channels must be divisible by num_groups')

        self.num_groups = num_groups
        self.num_channels = num_channels
        self.eps = eps
        self.affine = affine
        if self.affine:
            self.weight = Parameter(torch.empty(num_channels, **factory_kwargs))
            self.bias = Parameter(torch.empty(num_channels, **factory_kwargs))
        else:
            self.register_parameter('weight', None)
            self.register_parameter('bias', None)

        self.reset_parameters()

    def reset_parameters(self) -> None:
        if self.affine:
            init.ones_(self.weight)
            init.zeros_(self.bias)

    def forward(self, input: Tensor) -> Tensor:
        return F.group_norm(
            input, self.num_groups, self.weight, self.bias, self.eps)

    def extra_repr(self) -> str:
        return '{num_groups}, {num_channels}, eps={eps}, ' \
            'affine={affine}'.format(**self.__dict__)


# TODO: ContrastiveNorm2d
# TODO: DivisiveNorm2d
# TODO: SubtractiveNorm2d
first commit 2024-05-03 04:18:51 +03:00			`import torch`
			`import numbers`
			`from torch.nn.parameter import Parameter`
			`from .module import Module`
			`from ._functions import CrossMapLRN2d as _cross_map_lrn2d`
			`from .. import functional as F`
			`from .. import init`

			`from torch import Tensor, Size`
			`from typing import Union, List, Tuple`

			`__all__ = ['LocalResponseNorm', 'CrossMapLRN2d', 'LayerNorm', 'GroupNorm']`

			`class LocalResponseNorm(Module):`
			`r"""Applies local response normalization over an input signal.`

			`The input signal is composed of several input planes, where channels occupy the second dimension.`
			`Applies normalization across channels.`

			`.. math::`
			`b_{c} = a_{c}\left(k + \frac{\alpha}{n}`
			`\sum_{c'=\max(0, c-n/2)}^{\min(N-1,c+n/2)}a_{c'}^2\right)^{-\beta}`

			`Args:`
			`size: amount of neighbouring channels used for normalization`
			`alpha: multiplicative factor. Default: 0.0001`
			`beta: exponent. Default: 0.75`
			`k: additive factor. Default: 1`

			`Shape:`
			- Input: :math:`(N, C, *)`
			- Output: :math:`(N, C, *)` (same shape as input)

			`Examples::`

			`>>> lrn = nn.LocalResponseNorm(2)`
			`>>> signal_2d = torch.randn(32, 5, 24, 24)`
			`>>> signal_4d = torch.randn(16, 5, 7, 7, 7, 7)`
			`>>> output_2d = lrn(signal_2d)`
			`>>> output_4d = lrn(signal_4d)`

			`"""`

			`__constants__ = ['size', 'alpha', 'beta', 'k']`
			`size: int`
			`alpha: float`
			`beta: float`
			`k: float`

			`def __init__(self, size: int, alpha: float = 1e-4, beta: float = 0.75, k: float = 1.) -> None:`
			`super().__init__()`
			`self.size = size`
			`self.alpha = alpha`
			`self.beta = beta`
			`self.k = k`

			`def forward(self, input: Tensor) -> Tensor:`
			`return F.local_response_norm(input, self.size, self.alpha, self.beta,`
			`self.k)`

			`def extra_repr(self):`
			`return '{size}, alpha={alpha}, beta={beta}, k={k}'.format(**self.__dict__)`


			`class CrossMapLRN2d(Module):`
			`size: int`
			`alpha: float`
			`beta: float`
			`k: float`

			`def __init__(self, size: int, alpha: float = 1e-4, beta: float = 0.75, k: float = 1) -> None:`
			`super().__init__()`
			`self.size = size`
			`self.alpha = alpha`
			`self.beta = beta`
			`self.k = k`

			`def forward(self, input: Tensor) -> Tensor:`
			`return _cross_map_lrn2d.apply(input, self.size, self.alpha, self.beta,`
			`self.k)`

			`def extra_repr(self) -> str:`
			`return '{size}, alpha={alpha}, beta={beta}, k={k}'.format(**self.__dict__)`


			`_shape_t = Union[int, List[int], Size]`


			`class LayerNorm(Module):`
			`r"""Applies Layer Normalization over a mini-batch of inputs.`

			`This layer implements the operation as described in`
			the paper `Layer Normalization <https://arxiv.org/abs/1607.06450>`__

			`.. math::`
			`y = \frac{x - \mathrm{E}[x]}{ \sqrt{\mathrm{Var}[x] + \epsilon}} * \gamma + \beta`

			The mean and standard-deviation are calculated over the last `D` dimensions, where `D`
			is the dimension of :attr:`normalized_shape`. For example, if :attr:`normalized_shape`
			is ``(3, 5)`` (a 2-dimensional shape), the mean and standard-deviation are computed over
			the last 2 dimensions of the input (i.e. ``input.mean((-2, -1))``).
			:math:`\gamma` and :math:`\beta` are learnable affine transform parameters of
			:attr:`normalized_shape` if :attr:`elementwise_affine` is ``True``.
			`The standard-deviation is calculated via the biased estimator, equivalent to`
			`torch.var(input, unbiased=False)`.

			`.. note::`
			`Unlike Batch Normalization and Instance Normalization, which applies`
			`scalar scale and bias for each entire channel/plane with the`
			:attr:`affine` option, Layer Normalization applies per-element scale and
			bias with :attr:`elementwise_affine`.

			`This layer uses statistics computed from input data in both training and`
			`evaluation modes.`

			`Args:`
			`normalized_shape (int or list or torch.Size): input shape from an expected input`
			`of size`

			`.. math::`
			`[* \times \text{normalized\_shape}[0] \times \text{normalized\_shape}[1]`
			`\times \ldots \times \text{normalized\_shape}[-1]]`

			`If a single integer is used, it is treated as a singleton list, and this module will`
			`normalize over the last dimension which is expected to be of that specific size.`
			`eps: a value added to the denominator for numerical stability. Default: 1e-5`
			elementwise_affine: a boolean value that when set to ``True``, this module
			`has learnable per-element affine parameters initialized to ones (for weights)`
			and zeros (for biases). Default: ``True``.
			bias: If set to ``False``, the layer will not learn an additive bias (only relevant if
			:attr:`elementwise_affine` is ``True``). Default: ``True``.

			`Attributes:`
			`weight: the learnable weights of the module of shape`
			:math:`\text{normalized\_shape}` when :attr:`elementwise_affine` is set to ``True``.
			`The values are initialized to 1.`
			`bias: the learnable bias of the module of shape`
			:math:`\text{normalized\_shape}` when :attr:`elementwise_affine` is set to ``True``.
			`The values are initialized to 0.`

			`Shape:`
			- Input: :math:`(N, *)`
			- Output: :math:`(N, *)` (same shape as input)

			`Examples::`

			`>>> # NLP Example`
			`>>> batch, sentence_length, embedding_dim = 20, 5, 10`
			`>>> embedding = torch.randn(batch, sentence_length, embedding_dim)`
			`>>> layer_norm = nn.LayerNorm(embedding_dim)`
			`>>> # Activate module`
			`>>> layer_norm(embedding)`
			`>>>`
			`>>> # Image Example`
			`>>> N, C, H, W = 20, 5, 10, 10`
			`>>> input = torch.randn(N, C, H, W)`
			`>>> # Normalize over the last three dimensions (i.e. the channel and spatial dimensions)`
			`>>> # as shown in the image below`
			`>>> layer_norm = nn.LayerNorm([C, H, W])`
			`>>> output = layer_norm(input)`

			`.. image:: ../_static/img/nn/layer_norm.jpg`
			`:scale: 50 %`

			`"""`

			`__constants__ = ['normalized_shape', 'eps', 'elementwise_affine']`
			`normalized_shape: Tuple[int, ...]`
			`eps: float`
			`elementwise_affine: bool`

			`def __init__(self, normalized_shape: _shape_t, eps: float = 1e-5, elementwise_affine: bool = True,`
			`bias: bool = True, device=None, dtype=None) -> None:`
			`factory_kwargs = {'device': device, 'dtype': dtype}`
			`super().__init__()`
			`if isinstance(normalized_shape, numbers.Integral):`
			`# mypy error: incompatible types in assignment`
			`normalized_shape = (normalized_shape,) # type: ignore[assignment]`
			`self.normalized_shape = tuple(normalized_shape) # type: ignore[arg-type]`
			`self.eps = eps`
			`self.elementwise_affine = elementwise_affine`
			`if self.elementwise_affine:`
			`self.weight = Parameter(torch.empty(self.normalized_shape, **factory_kwargs))`
			`if bias:`
			`self.bias = Parameter(torch.empty(self.normalized_shape, **factory_kwargs))`
			`else:`
			`self.register_parameter('bias', None)`
			`else:`
			`self.register_parameter('weight', None)`
			`self.register_parameter('bias', None)`

			`self.reset_parameters()`

			`def reset_parameters(self) -> None:`
			`if self.elementwise_affine:`
			`init.ones_(self.weight)`
			`if self.bias is not None:`
			`init.zeros_(self.bias)`

			`def forward(self, input: Tensor) -> Tensor:`
			`return F.layer_norm(`
			`input, self.normalized_shape, self.weight, self.bias, self.eps)`

			`def extra_repr(self) -> str:`
			`return '{normalized_shape}, eps={eps}, ' \`
			`'elementwise_affine={elementwise_affine}'.format(**self.__dict__)`


			`class GroupNorm(Module):`
			`r"""Applies Group Normalization over a mini-batch of inputs.`

			`This layer implements the operation as described in`
			the paper `Group Normalization <https://arxiv.org/abs/1803.08494>`__

			`.. math::`
			`y = \frac{x - \mathrm{E}[x]}{ \sqrt{\mathrm{Var}[x] + \epsilon}} * \gamma + \beta`

			The input channels are separated into :attr:`num_groups` groups, each containing
			``num_channels / num_groups`` channels. :attr:`num_channels` must be divisible by
			:attr:`num_groups`. The mean and standard-deviation are calculated
			separately over the each group. :math:`\gamma` and :math:`\beta` are learnable
			per-channel affine transform parameter vectors of size :attr:`num_channels` if
			:attr:`affine` is ``True``.
			`The standard-deviation is calculated via the biased estimator, equivalent to`
			`torch.var(input, unbiased=False)`.

			`This layer uses statistics computed from input data in both training and`
			`evaluation modes.`

			`Args:`
			`num_groups (int): number of groups to separate the channels into`
			`num_channels (int): number of channels expected in input`
			`eps: a value added to the denominator for numerical stability. Default: 1e-5`
			affine: a boolean value that when set to ``True``, this module
			`has learnable per-channel affine parameters initialized to ones (for weights)`
			and zeros (for biases). Default: ``True``.

			`Shape:`
			- Input: :math:`(N, C, *)` where :math:`C=\text{num\_channels}`
			- Output: :math:`(N, C, *)` (same shape as input)

			`Examples::`

			`>>> input = torch.randn(20, 6, 10, 10)`
			`>>> # Separate 6 channels into 3 groups`
			`>>> m = nn.GroupNorm(3, 6)`
			`>>> # Separate 6 channels into 6 groups (equivalent with InstanceNorm)`
			`>>> m = nn.GroupNorm(6, 6)`
			`>>> # Put all 6 channels into a single group (equivalent with LayerNorm)`
			`>>> m = nn.GroupNorm(1, 6)`
			`>>> # Activating the module`
			`>>> output = m(input)`
			`"""`

			`__constants__ = ['num_groups', 'num_channels', 'eps', 'affine']`
			`num_groups: int`
			`num_channels: int`
			`eps: float`
			`affine: bool`

			`def __init__(self, num_groups: int, num_channels: int, eps: float = 1e-5, affine: bool = True,`
			`device=None, dtype=None) -> None:`
			`factory_kwargs = {'device': device, 'dtype': dtype}`
			`super().__init__()`
			`if num_channels % num_groups != 0:`
			`raise ValueError('num_channels must be divisible by num_groups')`

			`self.num_groups = num_groups`
			`self.num_channels = num_channels`
			`self.eps = eps`
			`self.affine = affine`
			`if self.affine:`
			`self.weight = Parameter(torch.empty(num_channels, **factory_kwargs))`
			`self.bias = Parameter(torch.empty(num_channels, **factory_kwargs))`
			`else:`
			`self.register_parameter('weight', None)`
			`self.register_parameter('bias', None)`

			`self.reset_parameters()`

			`def reset_parameters(self) -> None:`
			`if self.affine:`
			`init.ones_(self.weight)`
			`init.zeros_(self.bias)`

			`def forward(self, input: Tensor) -> Tensor:`
			`return F.group_norm(`
			`input, self.num_groups, self.weight, self.bias, self.eps)`

			`def extra_repr(self) -> str:`
			`return '{num_groups}, {num_channels}, eps={eps}, ' \`
			`'affine={affine}'.format(**self.__dict__)`


			`# TODO: ContrastiveNorm2d`
			`# TODO: DivisiveNorm2d`
			`# TODO: SubtractiveNorm2d`