ai-content-maker/.venv/Lib/site-packages/TTS/tts/layers/generic/pos_encoding.py

import math

import torch
from torch import nn


class PositionalEncoding(nn.Module):
    """Sinusoidal positional encoding for non-recurrent neural networks.
    Implementation based on "Attention Is All You Need"

    Args:
       channels (int): embedding size
       dropout_p (float): dropout rate applied to the output.
       max_len (int): maximum sequence length.
       use_scale (bool): whether to use a learnable scaling coefficient.
    """

    def __init__(self, channels, dropout_p=0.0, max_len=5000, use_scale=False):
        super().__init__()
        if channels % 2 != 0:
            raise ValueError(
                "Cannot use sin/cos positional encoding with " "odd channels (got channels={:d})".format(channels)
            )
        self.use_scale = use_scale
        if use_scale:
            self.scale = torch.nn.Parameter(torch.ones(1))
        pe = torch.zeros(max_len, channels)
        position = torch.arange(0, max_len).unsqueeze(1)
        div_term = torch.pow(10000, torch.arange(0, channels, 2).float() / channels)
        pe[:, 0::2] = torch.sin(position.float() * div_term)
        pe[:, 1::2] = torch.cos(position.float() * div_term)
        pe = pe.unsqueeze(0).transpose(1, 2)
        self.register_buffer("pe", pe)
        if dropout_p > 0:
            self.dropout = nn.Dropout(p=dropout_p)
        self.channels = channels

    def forward(self, x, mask=None, first_idx=None, last_idx=None):
        """
        Shapes:
            x: [B, C, T]
            mask: [B, 1, T]
            first_idx: int
            last_idx: int
        """

        x = x * math.sqrt(self.channels)
        if first_idx is None:
            if self.pe.size(2) < x.size(2):
                raise RuntimeError(
                    f"Sequence is {x.size(2)} but PositionalEncoding is"
                    f" limited to {self.pe.size(2)}. See max_len argument."
                )
            if mask is not None:
                pos_enc = self.pe[:, :, : x.size(2)] * mask
            else:
                pos_enc = self.pe[:, :, : x.size(2)]
            if self.use_scale:
                x = x + self.scale * pos_enc
            else:
                x = x + pos_enc
        else:
            if self.use_scale:
                x = x + self.scale * self.pe[:, :, first_idx:last_idx]
            else:
                x = x + self.pe[:, :, first_idx:last_idx]
        if hasattr(self, "dropout"):
            x = self.dropout(x)
        return x
first commit 2024-05-03 04:18:51 +03:00			`import math`

			`import torch`
			`from torch import nn`


			`class PositionalEncoding(nn.Module):`
			`"""Sinusoidal positional encoding for non-recurrent neural networks.`
			`Implementation based on "Attention Is All You Need"`

			`Args:`
			`channels (int): embedding size`
			`dropout_p (float): dropout rate applied to the output.`
			`max_len (int): maximum sequence length.`
			`use_scale (bool): whether to use a learnable scaling coefficient.`
			`"""`

			`def __init__(self, channels, dropout_p=0.0, max_len=5000, use_scale=False):`
			`super().__init__()`
			`if channels % 2 != 0:`
			`raise ValueError(`
			`"Cannot use sin/cos positional encoding with " "odd channels (got channels={:d})".format(channels)`
			`)`
			`self.use_scale = use_scale`
			`if use_scale:`
			`self.scale = torch.nn.Parameter(torch.ones(1))`
			`pe = torch.zeros(max_len, channels)`
			`position = torch.arange(0, max_len).unsqueeze(1)`
			`div_term = torch.pow(10000, torch.arange(0, channels, 2).float() / channels)`
			`pe[:, 0::2] = torch.sin(position.float() * div_term)`
			`pe[:, 1::2] = torch.cos(position.float() * div_term)`
			`pe = pe.unsqueeze(0).transpose(1, 2)`
			`self.register_buffer("pe", pe)`
			`if dropout_p > 0:`
			`self.dropout = nn.Dropout(p=dropout_p)`
			`self.channels = channels`

			`def forward(self, x, mask=None, first_idx=None, last_idx=None):`
			`"""`
			`Shapes:`
			`x: [B, C, T]`
			`mask: [B, 1, T]`
			`first_idx: int`
			`last_idx: int`
			`"""`

			`x = x * math.sqrt(self.channels)`
			`if first_idx is None:`
			`if self.pe.size(2) < x.size(2):`
			`raise RuntimeError(`
			`f"Sequence is {x.size(2)} but PositionalEncoding is"`
			`f" limited to {self.pe.size(2)}. See max_len argument."`
			`)`
			`if mask is not None:`
			`pos_enc = self.pe[:, :, : x.size(2)] * mask`
			`else:`
			`pos_enc = self.pe[:, :, : x.size(2)]`
			`if self.use_scale:`
			`x = x + self.scale * pos_enc`
			`else:`
			`x = x + pos_enc`
			`else:`
			`if self.use_scale:`
			`x = x + self.scale * self.pe[:, :, first_idx:last_idx]`
			`else:`
			`x = x + self.pe[:, :, first_idx:last_idx]`
			`if hasattr(self, "dropout"):`
			`x = self.dropout(x)`
			`return x`