ai-content-maker/.venv/Lib/site-packages/torch/nn/modules/linear.py

import math
from typing import Any

import torch
from torch import Tensor
from torch.nn.parameter import Parameter, UninitializedParameter
from .. import functional as F
from .. import init
from .module import Module
from .lazy import LazyModuleMixin


__all__ = [
    'Bilinear',
    'Identity',
    'LazyLinear',
    'Linear',
]


class Identity(Module):
    r"""A placeholder identity operator that is argument-insensitive.

    Args:
        args: any argument (unused)
        kwargs: any keyword argument (unused)

    Shape:
        - Input: :math:`(*)`, where :math:`*` means any number of dimensions.
        - Output: :math:`(*)`, same shape as the input.

    Examples::

        >>> m = nn.Identity(54, unused_argument1=0.1, unused_argument2=False)
        >>> input = torch.randn(128, 20)
        >>> output = m(input)
        >>> print(output.size())
        torch.Size([128, 20])

    """

    def __init__(self, *args: Any, **kwargs: Any) -> None:
        super().__init__()

    def forward(self, input: Tensor) -> Tensor:
        return input


class Linear(Module):
    r"""Applies a linear transformation to the incoming data: :math:`y = xA^T + b`.

    This module supports :ref:`TensorFloat32<tf32_on_ampere>`.

    On certain ROCm devices, when using float16 inputs this module will use :ref:`different precision<fp16_on_mi200>` for backward.

    Args:
        in_features: size of each input sample
        out_features: size of each output sample
        bias: If set to ``False``, the layer will not learn an additive bias.
            Default: ``True``

    Shape:
        - Input: :math:`(*, H_{in})` where :math:`*` means any number of
          dimensions including none and :math:`H_{in} = \text{in\_features}`.
        - Output: :math:`(*, H_{out})` where all but the last dimension
          are the same shape as the input and :math:`H_{out} = \text{out\_features}`.

    Attributes:
        weight: the learnable weights of the module of shape
            :math:`(\text{out\_features}, \text{in\_features})`. The values are
            initialized from :math:`\mathcal{U}(-\sqrt{k}, \sqrt{k})`, where
            :math:`k = \frac{1}{\text{in\_features}}`
        bias:   the learnable bias of the module of shape :math:`(\text{out\_features})`.
                If :attr:`bias` is ``True``, the values are initialized from
                :math:`\mathcal{U}(-\sqrt{k}, \sqrt{k})` where
                :math:`k = \frac{1}{\text{in\_features}}`

    Examples::

        >>> m = nn.Linear(20, 30)
        >>> input = torch.randn(128, 20)
        >>> output = m(input)
        >>> print(output.size())
        torch.Size([128, 30])
    """

    __constants__ = ['in_features', 'out_features']
    in_features: int
    out_features: int
    weight: Tensor

    def __init__(self, in_features: int, out_features: int, bias: bool = True,
                 device=None, dtype=None) -> None:
        factory_kwargs = {'device': device, 'dtype': dtype}
        super().__init__()
        self.in_features = in_features
        self.out_features = out_features
        self.weight = Parameter(torch.empty((out_features, in_features), **factory_kwargs))
        if bias:
            self.bias = Parameter(torch.empty(out_features, **factory_kwargs))
        else:
            self.register_parameter('bias', None)
        self.reset_parameters()

    def reset_parameters(self) -> None:
        # Setting a=sqrt(5) in kaiming_uniform is the same as initializing with
        # uniform(-1/sqrt(in_features), 1/sqrt(in_features)). For details, see
        # https://github.com/pytorch/pytorch/issues/57109
        init.kaiming_uniform_(self.weight, a=math.sqrt(5))
        if self.bias is not None:
            fan_in, _ = init._calculate_fan_in_and_fan_out(self.weight)
            bound = 1 / math.sqrt(fan_in) if fan_in > 0 else 0
            init.uniform_(self.bias, -bound, bound)

    def forward(self, input: Tensor) -> Tensor:
        return F.linear(input, self.weight, self.bias)

    def extra_repr(self) -> str:
        return f'in_features={self.in_features}, out_features={self.out_features}, bias={self.bias is not None}'


# This class exists solely to avoid triggering an obscure error when scripting
# an improperly quantized attention layer. See this issue for details:
# https://github.com/pytorch/pytorch/issues/58969
# TODO: fail fast on quantization API usage error, then remove this class
# and replace uses of it with plain Linear
class NonDynamicallyQuantizableLinear(Linear):
    def __init__(self, in_features: int, out_features: int, bias: bool = True,
                 device=None, dtype=None) -> None:
        super().__init__(in_features, out_features, bias=bias,
                         device=device, dtype=dtype)


class Bilinear(Module):
    r"""Applies a bilinear transformation to the incoming data: :math:`y = x_1^T A x_2 + b`.

    Args:
        in1_features: size of each first input sample
        in2_features: size of each second input sample
        out_features: size of each output sample
        bias: If set to False, the layer will not learn an additive bias.
            Default: ``True``

    Shape:
        - Input1: :math:`(*, H_{in1})` where :math:`H_{in1}=\text{in1\_features}` and
          :math:`*` means any number of additional dimensions including none. All but the last dimension
          of the inputs should be the same.
        - Input2: :math:`(*, H_{in2})` where :math:`H_{in2}=\text{in2\_features}`.
        - Output: :math:`(*, H_{out})` where :math:`H_{out}=\text{out\_features}`
          and all but the last dimension are the same shape as the input.

    Attributes:
        weight: the learnable weights of the module of shape
            :math:`(\text{out\_features}, \text{in1\_features}, \text{in2\_features})`.
            The values are initialized from :math:`\mathcal{U}(-\sqrt{k}, \sqrt{k})`, where
            :math:`k = \frac{1}{\text{in1\_features}}`
        bias:   the learnable bias of the module of shape :math:`(\text{out\_features})`.
                If :attr:`bias` is ``True``, the values are initialized from
                :math:`\mathcal{U}(-\sqrt{k}, \sqrt{k})`, where
                :math:`k = \frac{1}{\text{in1\_features}}`

    Examples::

        >>> m = nn.Bilinear(20, 30, 40)
        >>> input1 = torch.randn(128, 20)
        >>> input2 = torch.randn(128, 30)
        >>> output = m(input1, input2)
        >>> print(output.size())
        torch.Size([128, 40])
    """

    __constants__ = ['in1_features', 'in2_features', 'out_features']
    in1_features: int
    in2_features: int
    out_features: int
    weight: Tensor

    def __init__(self, in1_features: int, in2_features: int, out_features: int, bias: bool = True,
                 device=None, dtype=None) -> None:
        factory_kwargs = {'device': device, 'dtype': dtype}
        super().__init__()
        self.in1_features = in1_features
        self.in2_features = in2_features
        self.out_features = out_features
        self.weight = Parameter(torch.empty((out_features, in1_features, in2_features), **factory_kwargs))

        if bias:
            self.bias = Parameter(torch.empty(out_features, **factory_kwargs))
        else:
            self.register_parameter('bias', None)
        self.reset_parameters()

    def reset_parameters(self) -> None:
        bound = 1 / math.sqrt(self.weight.size(1))
        init.uniform_(self.weight, -bound, bound)
        if self.bias is not None:
            init.uniform_(self.bias, -bound, bound)

    def forward(self, input1: Tensor, input2: Tensor) -> Tensor:
        return F.bilinear(input1, input2, self.weight, self.bias)

    def extra_repr(self) -> str:
        return 'in1_features={}, in2_features={}, out_features={}, bias={}'.format(
            self.in1_features, self.in2_features, self.out_features, self.bias is not None
        )


class LazyLinear(LazyModuleMixin, Linear):
    r"""A :class:`torch.nn.Linear` module where `in_features` is inferred.

    In this module, the `weight` and `bias` are of :class:`torch.nn.UninitializedParameter`
    class. They will be initialized after the first call to ``forward`` is done and the
    module will become a regular :class:`torch.nn.Linear` module. The ``in_features`` argument
    of the :class:`Linear` is inferred from the ``input.shape[-1]``.

    Check the :class:`torch.nn.modules.lazy.LazyModuleMixin` for further documentation
    on lazy modules and their limitations.

    Args:
        out_features: size of each output sample
        bias: If set to ``False``, the layer will not learn an additive bias.
            Default: ``True``

    Attributes:
        weight: the learnable weights of the module of shape
            :math:`(\text{out\_features}, \text{in\_features})`. The values are
            initialized from :math:`\mathcal{U}(-\sqrt{k}, \sqrt{k})`, where
            :math:`k = \frac{1}{\text{in\_features}}`
        bias:   the learnable bias of the module of shape :math:`(\text{out\_features})`.
                If :attr:`bias` is ``True``, the values are initialized from
                :math:`\mathcal{U}(-\sqrt{k}, \sqrt{k})` where
                :math:`k = \frac{1}{\text{in\_features}}`


    """

    cls_to_become = Linear  # type: ignore[assignment]
    weight: UninitializedParameter
    bias: UninitializedParameter  # type: ignore[assignment]

    def __init__(self, out_features: int, bias: bool = True,
                 device=None, dtype=None) -> None:
        factory_kwargs = {'device': device, 'dtype': dtype}
        # bias is hardcoded to False to avoid creating tensor
        # that will soon be overwritten.
        super().__init__(0, 0, False)
        self.weight = UninitializedParameter(**factory_kwargs)
        self.out_features = out_features
        if bias:
            self.bias = UninitializedParameter(**factory_kwargs)

    def reset_parameters(self) -> None:
        if not self.has_uninitialized_params() and self.in_features != 0:
            super().reset_parameters()

    def initialize_parameters(self, input) -> None:  # type: ignore[override]
        if self.has_uninitialized_params():
            with torch.no_grad():
                self.in_features = input.shape[-1]
                self.weight.materialize((self.out_features, self.in_features))
                if self.bias is not None:
                    self.bias.materialize((self.out_features,))
                self.reset_parameters()
# TODO: PartialLinear - maybe in sparse?
first commit 2024-05-03 04:18:51 +03:00			`import math`
			`from typing import Any`

			`import torch`
			`from torch import Tensor`
			`from torch.nn.parameter import Parameter, UninitializedParameter`
			`from .. import functional as F`
			`from .. import init`
			`from .module import Module`
			`from .lazy import LazyModuleMixin`


			`__all__ = [`
			`'Bilinear',`
			`'Identity',`
			`'LazyLinear',`
			`'Linear',`
			`]`


			`class Identity(Module):`
			`r"""A placeholder identity operator that is argument-insensitive.`

			`Args:`
			`args: any argument (unused)`
			`kwargs: any keyword argument (unused)`

			`Shape:`
			- Input: :math:`()`, where :math:`` means any number of dimensions.
			- Output: :math:`(*)`, same shape as the input.

			`Examples::`

			`>>> m = nn.Identity(54, unused_argument1=0.1, unused_argument2=False)`
			`>>> input = torch.randn(128, 20)`
			`>>> output = m(input)`
			`>>> print(output.size())`
			`torch.Size([128, 20])`

			`"""`

			`def __init__(self, args: Any, *kwargs: Any) -> None:`
			`super().__init__()`

			`def forward(self, input: Tensor) -> Tensor:`
			`return input`


			`class Linear(Module):`
			r"""Applies a linear transformation to the incoming data: :math:`y = xA^T + b`.

			This module supports :ref:`TensorFloat32<tf32_on_ampere>`.

			On certain ROCm devices, when using float16 inputs this module will use :ref:`different precision<fp16_on_mi200>` for backward.

			`Args:`
			`in_features: size of each input sample`
			`out_features: size of each output sample`
			bias: If set to ``False``, the layer will not learn an additive bias.
			Default: ``True``

			`Shape:`
			- Input: :math:`(, H_{in})` where :math:`` means any number of
			dimensions including none and :math:`H_{in} = \text{in\_features}`.
			- Output: :math:`(*, H_{out})` where all but the last dimension
			are the same shape as the input and :math:`H_{out} = \text{out\_features}`.

			`Attributes:`
			`weight: the learnable weights of the module of shape`
			:math:`(\text{out\_features}, \text{in\_features})`. The values are
			initialized from :math:`\mathcal{U}(-\sqrt{k}, \sqrt{k})`, where
			:math:`k = \frac{1}{\text{in\_features}}`
			bias: the learnable bias of the module of shape :math:`(\text{out\_features})`.
			If :attr:`bias` is ``True``, the values are initialized from
			:math:`\mathcal{U}(-\sqrt{k}, \sqrt{k})` where
			:math:`k = \frac{1}{\text{in\_features}}`

			`Examples::`

			`>>> m = nn.Linear(20, 30)`
			`>>> input = torch.randn(128, 20)`
			`>>> output = m(input)`
			`>>> print(output.size())`
			`torch.Size([128, 30])`
			`"""`

			`__constants__ = ['in_features', 'out_features']`
			`in_features: int`
			`out_features: int`
			`weight: Tensor`

			`def __init__(self, in_features: int, out_features: int, bias: bool = True,`
			`device=None, dtype=None) -> None:`
			`factory_kwargs = {'device': device, 'dtype': dtype}`
			`super().__init__()`
			`self.in_features = in_features`
			`self.out_features = out_features`
			`self.weight = Parameter(torch.empty((out_features, in_features), **factory_kwargs))`
			`if bias:`
			`self.bias = Parameter(torch.empty(out_features, **factory_kwargs))`
			`else:`
			`self.register_parameter('bias', None)`
			`self.reset_parameters()`

			`def reset_parameters(self) -> None:`
			`# Setting a=sqrt(5) in kaiming_uniform is the same as initializing with`
			`# uniform(-1/sqrt(in_features), 1/sqrt(in_features)). For details, see`
			`# https://github.com/pytorch/pytorch/issues/57109`
			`init.kaiming_uniform_(self.weight, a=math.sqrt(5))`
			`if self.bias is not None:`
			`fan_in, _ = init._calculate_fan_in_and_fan_out(self.weight)`
			`bound = 1 / math.sqrt(fan_in) if fan_in > 0 else 0`
			`init.uniform_(self.bias, -bound, bound)`

			`def forward(self, input: Tensor) -> Tensor:`
			`return F.linear(input, self.weight, self.bias)`

			`def extra_repr(self) -> str:`
			`return f'in_features={self.in_features}, out_features={self.out_features}, bias={self.bias is not None}'`


			`# This class exists solely to avoid triggering an obscure error when scripting`
			`# an improperly quantized attention layer. See this issue for details:`
			`# https://github.com/pytorch/pytorch/issues/58969`
			`# TODO: fail fast on quantization API usage error, then remove this class`
			`# and replace uses of it with plain Linear`
			`class NonDynamicallyQuantizableLinear(Linear):`
			`def __init__(self, in_features: int, out_features: int, bias: bool = True,`
			`device=None, dtype=None) -> None:`
			`super().__init__(in_features, out_features, bias=bias,`
			`device=device, dtype=dtype)`


			`class Bilinear(Module):`
			r"""Applies a bilinear transformation to the incoming data: :math:`y = x_1^T A x_2 + b`.

			`Args:`
			`in1_features: size of each first input sample`
			`in2_features: size of each second input sample`
			`out_features: size of each output sample`
			`bias: If set to False, the layer will not learn an additive bias.`
			Default: ``True``

			`Shape:`
			- Input1: :math:`(*, H_{in1})` where :math:`H_{in1}=\text{in1\_features}` and
			:math:`*` means any number of additional dimensions including none. All but the last dimension
			`of the inputs should be the same.`
			- Input2: :math:`(*, H_{in2})` where :math:`H_{in2}=\text{in2\_features}`.
			- Output: :math:`(*, H_{out})` where :math:`H_{out}=\text{out\_features}`
			`and all but the last dimension are the same shape as the input.`

			`Attributes:`
			`weight: the learnable weights of the module of shape`
			:math:`(\text{out\_features}, \text{in1\_features}, \text{in2\_features})`.
			The values are initialized from :math:`\mathcal{U}(-\sqrt{k}, \sqrt{k})`, where
			:math:`k = \frac{1}{\text{in1\_features}}`
			bias: the learnable bias of the module of shape :math:`(\text{out\_features})`.
			If :attr:`bias` is ``True``, the values are initialized from
			:math:`\mathcal{U}(-\sqrt{k}, \sqrt{k})`, where
			:math:`k = \frac{1}{\text{in1\_features}}`

			`Examples::`

			`>>> m = nn.Bilinear(20, 30, 40)`
			`>>> input1 = torch.randn(128, 20)`
			`>>> input2 = torch.randn(128, 30)`
			`>>> output = m(input1, input2)`
			`>>> print(output.size())`
			`torch.Size([128, 40])`
			`"""`

			`__constants__ = ['in1_features', 'in2_features', 'out_features']`
			`in1_features: int`
			`in2_features: int`
			`out_features: int`
			`weight: Tensor`

			`def __init__(self, in1_features: int, in2_features: int, out_features: int, bias: bool = True,`
			`device=None, dtype=None) -> None:`
			`factory_kwargs = {'device': device, 'dtype': dtype}`
			`super().__init__()`
			`self.in1_features = in1_features`
			`self.in2_features = in2_features`
			`self.out_features = out_features`
			`self.weight = Parameter(torch.empty((out_features, in1_features, in2_features), **factory_kwargs))`

			`if bias:`
			`self.bias = Parameter(torch.empty(out_features, **factory_kwargs))`
			`else:`
			`self.register_parameter('bias', None)`
			`self.reset_parameters()`

			`def reset_parameters(self) -> None:`
			`bound = 1 / math.sqrt(self.weight.size(1))`
			`init.uniform_(self.weight, -bound, bound)`
			`if self.bias is not None:`
			`init.uniform_(self.bias, -bound, bound)`

			`def forward(self, input1: Tensor, input2: Tensor) -> Tensor:`
			`return F.bilinear(input1, input2, self.weight, self.bias)`

			`def extra_repr(self) -> str:`
			`return 'in1_features={}, in2_features={}, out_features={}, bias={}'.format(`
			`self.in1_features, self.in2_features, self.out_features, self.bias is not None`
			`)`


			`class LazyLinear(LazyModuleMixin, Linear):`
			r"""A :class:`torch.nn.Linear` module where `in_features` is inferred.

			In this module, the `weight` and `bias` are of :class:`torch.nn.UninitializedParameter`
			class. They will be initialized after the first call to ``forward`` is done and the
			module will become a regular :class:`torch.nn.Linear` module. The ``in_features`` argument
			of the :class:`Linear` is inferred from the ``input.shape[-1]``.

			Check the :class:`torch.nn.modules.lazy.LazyModuleMixin` for further documentation
			`on lazy modules and their limitations.`

			`Args:`
			`out_features: size of each output sample`
			bias: If set to ``False``, the layer will not learn an additive bias.
			Default: ``True``

			`Attributes:`
			`weight: the learnable weights of the module of shape`
			:math:`(\text{out\_features}, \text{in\_features})`. The values are
			initialized from :math:`\mathcal{U}(-\sqrt{k}, \sqrt{k})`, where
			:math:`k = \frac{1}{\text{in\_features}}`
			bias: the learnable bias of the module of shape :math:`(\text{out\_features})`.
			If :attr:`bias` is ``True``, the values are initialized from
			:math:`\mathcal{U}(-\sqrt{k}, \sqrt{k})` where
			:math:`k = \frac{1}{\text{in\_features}}`


			`"""`

			`cls_to_become = Linear # type: ignore[assignment]`
			`weight: UninitializedParameter`
			`bias: UninitializedParameter # type: ignore[assignment]`

			`def __init__(self, out_features: int, bias: bool = True,`
			`device=None, dtype=None) -> None:`
			`factory_kwargs = {'device': device, 'dtype': dtype}`
			`# bias is hardcoded to False to avoid creating tensor`
			`# that will soon be overwritten.`
			`super().__init__(0, 0, False)`
			`self.weight = UninitializedParameter(**factory_kwargs)`
			`self.out_features = out_features`
			`if bias:`
			`self.bias = UninitializedParameter(**factory_kwargs)`

			`def reset_parameters(self) -> None:`
			`if not self.has_uninitialized_params() and self.in_features != 0:`
			`super().reset_parameters()`

			`def initialize_parameters(self, input) -> None: # type: ignore[override]`
			`if self.has_uninitialized_params():`
			`with torch.no_grad():`
			`self.in_features = input.shape[-1]`
			`self.weight.materialize((self.out_features, self.in_features))`
			`if self.bias is not None:`
			`self.bias.materialize((self.out_features,))`
			`self.reset_parameters()`
			`# TODO: PartialLinear - maybe in sparse?`