ai-content-maker/.venv/Lib/site-packages/thinc/loss.py

from abc import abstractmethod
from typing import (
    Any,
    Dict,
    Generic,
    List,
    Optional,
    Sequence,
    Tuple,
    TypeVar,
    Union,
    cast,
)

from .config import registry
from .types import Floats2d, Ints1d
from .util import get_array_module, to_categorical

LossT = TypeVar("LossT")
GradT = TypeVar("GradT")
GuessT = TypeVar("GuessT")
TruthT = TypeVar("TruthT")
IntsOrFloats = Union[Ints1d, Floats2d]
IntsOrFloatsOrStrs = Union[Ints1d, Floats2d, Sequence[int], Sequence[str]]


class Loss(Generic[GuessT, TruthT, GradT, LossT]):  # pragma: no cover
    """Base class for classes computing the loss / gradient. The class can
    be initialized with settings if needed. It provides get_loss and
    get_grad as separate methods to allow calculating them separately. It
    also provides a __call__ method that returns a tuple of both.
    """

    def __init__(self, **kwargs: Any) -> None:
        ...

    def __call__(self, guesses: GuessT, truths: TruthT) -> Tuple[GradT, LossT]:
        return self.get_grad(guesses, truths), self.get_loss(guesses, truths)

    @abstractmethod
    def get_grad(self, guesses: GuessT, truths: TruthT) -> GradT:
        ...

    @abstractmethod
    def get_loss(self, guesses: GuessT, truths: TruthT) -> LossT:
        ...


class CategoricalCrossentropy(Loss):
    names: Optional[Sequence[str]]
    missing_value: Optional[Union[str, int]]
    _name_to_i: Dict[str, int]

    def __init__(
        self,
        *,
        normalize: bool = True,
        names: Optional[Sequence[str]] = None,
        missing_value: Optional[Union[str, int]] = None,
        neg_prefix: Optional[str] = None,
        label_smoothing: float = 0.0,
    ):
        self.normalize = normalize
        self.names = names
        self.missing_value = missing_value
        self.neg_prefix = neg_prefix
        self.label_smoothing = label_smoothing
        if names is not None:
            self._name_to_i = {name: i for i, name in enumerate(names)}
        else:
            self._name_to_i = {}

    def convert_truths(self, truths, guesses: Floats2d) -> Tuple[Floats2d, Floats2d]:
        xp = get_array_module(guesses)
        missing = []
        negatives_mask = None
        if self.names:
            negatives_mask = xp.ones((len(truths), len(self.names)), dtype="f")
        missing_value = self.missing_value
        # Convert list of ints or list of strings
        if isinstance(truths, list):
            truths = list(truths)
            if len(truths):
                if isinstance(truths[0], int):
                    for i, value in enumerate(truths):
                        if value == missing_value:
                            missing.append(i)
                else:
                    if self.names is None:
                        msg = (
                            "Cannot calculate loss from list of strings without names. "
                            "You can pass the names as a keyword argument when you "
                            "create the loss object, "
                            "e.g. CategoricalCrossentropy(names=['dog', 'cat'])"
                        )
                        raise ValueError(msg)
                    for i, value in enumerate(truths):
                        if value == missing_value:
                            truths[i] = self.names[0]
                            missing.append(i)
                        elif (
                            value
                            and self.neg_prefix
                            and value.startswith(self.neg_prefix)
                        ):
                            truths[i] = value[len(self.neg_prefix) :]
                            neg_index = self._name_to_i[truths[i]]
                            negatives_mask[i] = 0  # type: ignore
                            negatives_mask[i][neg_index] = -1  # type: ignore
                    truths = [self._name_to_i[name] for name in truths]
            truths = xp.asarray(truths, dtype="i")
            mask = _make_mask(guesses, missing)
        else:
            mask = _make_mask_by_value(truths, guesses, missing_value)
        if truths.ndim != guesses.ndim:
            # transform categorical values to one-hot encoding
            truths = to_categorical(
                cast(Ints1d, truths),
                n_classes=guesses.shape[-1],
                label_smoothing=self.label_smoothing,
            )
        else:
            if self.label_smoothing:
                raise ValueError(
                    "Label smoothing is only applied, when truths have type "
                    "List[str], List[int] or Ints1d, but it seems like Floats2d "
                    "was provided."
                )
        # Transform negative annotations to a 0 for the negated value
        # + mask all other values for that row
        if negatives_mask is not None:
            truths *= negatives_mask
            truths[truths == -1] = 0
            negatives_mask[negatives_mask == -1] = 1
            mask *= negatives_mask
        return truths, mask

    def __call__(
        self, guesses: Floats2d, truths: IntsOrFloatsOrStrs
    ) -> Tuple[Floats2d, float]:
        d_truth = self.get_grad(guesses, truths)
        return (d_truth, self._get_loss_from_grad(d_truth))

    def get_grad(self, guesses: Floats2d, truths: IntsOrFloatsOrStrs) -> Floats2d:
        target, mask = self.convert_truths(truths, guesses)
        xp = get_array_module(target)
        if guesses.shape != target.shape:  # pragma: no cover
            err = f"Cannot calculate CategoricalCrossentropy loss: mismatched shapes: {guesses.shape} vs {target.shape}."
            raise ValueError(err)
        if xp.any(guesses > 1) or xp.any(guesses < 0):  # pragma: no cover
            err = f"Cannot calculate CategoricalCrossentropy loss with guesses outside the [0,1] interval."
            raise ValueError(err)
        if xp.any(target > 1) or xp.any(target < 0):  # pragma: no cover
            err = f"Cannot calculate CategoricalCrossentropy loss with truth values outside the [0,1] interval."
            raise ValueError(err)
        difference = guesses - target
        difference *= mask
        if self.normalize:
            difference = difference / guesses.shape[0]
        return difference

    def get_loss(self, guesses: Floats2d, truths: IntsOrFloatsOrStrs) -> float:
        d_truth = self.get_grad(guesses, truths)
        return self._get_loss_from_grad(d_truth)

    def _get_loss_from_grad(self, d_truth: Floats2d) -> float:
        # TODO: Add overload for axis=None case to sum
        return (d_truth**2).sum()  # type: ignore


@registry.losses("CategoricalCrossentropy.v1")
def configure_CategoricalCrossentropy_v1(
    *,
    normalize: bool = True,
    names: Optional[Sequence[str]] = None,
    missing_value: Optional[Union[str, int]] = None,
) -> CategoricalCrossentropy:
    return CategoricalCrossentropy(
        normalize=normalize, names=names, missing_value=missing_value
    )


@registry.losses("CategoricalCrossentropy.v2")
def configure_CategoricalCrossentropy_v2(
    *,
    normalize: bool = True,
    names: Optional[Sequence[str]] = None,
    missing_value: Optional[Union[str, int]] = None,
    neg_prefix: Optional[str] = None,
) -> CategoricalCrossentropy:
    return CategoricalCrossentropy(
        normalize=normalize,
        names=names,
        missing_value=missing_value,
        neg_prefix=neg_prefix,
    )


@registry.losses("CategoricalCrossentropy.v3")
def configure_CategoricalCrossentropy_v3(
    *,
    normalize: bool = True,
    names: Optional[Sequence[str]] = None,
    missing_value: Optional[Union[str, int]] = None,
    neg_prefix: Optional[str] = None,
    label_smoothing: float = 0.0,
) -> CategoricalCrossentropy:
    return CategoricalCrossentropy(
        normalize=normalize,
        names=names,
        missing_value=missing_value,
        neg_prefix=neg_prefix,
        label_smoothing=label_smoothing,
    )


class SequenceCategoricalCrossentropy(Loss):
    def __init__(
        self,
        *,
        normalize: bool = True,
        names: Optional[Sequence[str]] = None,
        missing_value: Optional[Union[str, int]] = None,
        neg_prefix: Optional[str] = None,
        label_smoothing: float = 0.0,
    ):
        self.cc = CategoricalCrossentropy(
            normalize=False,
            names=names,
            missing_value=missing_value,
            neg_prefix=neg_prefix,
            label_smoothing=label_smoothing,
        )
        self.normalize = normalize

    def __call__(
        self, guesses: Sequence[Floats2d], truths: Sequence[IntsOrFloatsOrStrs]
    ) -> Tuple[List[Floats2d], float]:
        grads = self.get_grad(guesses, truths)
        loss = self._get_loss_from_grad(grads)
        return grads, loss

    def get_grad(
        self, guesses: Sequence[Floats2d], truths: Sequence[IntsOrFloatsOrStrs]
    ) -> List[Floats2d]:
        err = "Cannot calculate SequenceCategoricalCrossentropy loss: guesses and truths must be same length"
        if len(guesses) != len(truths):  # pragma: no cover
            raise ValueError(err)
        n = len(guesses)
        d_scores = []
        for yh, y in zip(guesses, truths):
            d_yh = self.cc.get_grad(yh, y)
            if self.normalize:
                d_yh /= n
            d_scores.append(d_yh)
        return d_scores

    def get_loss(
        self, guesses: Sequence[Floats2d], truths: Sequence[IntsOrFloatsOrStrs]
    ) -> float:
        return self._get_loss_from_grad(self.get_grad(guesses, truths))

    def _get_loss_from_grad(self, grads: Sequence[Floats2d]) -> float:
        loss = 0.0
        for grad in grads:
            loss += self.cc._get_loss_from_grad(grad)
        return loss


@registry.losses("SequenceCategoricalCrossentropy.v1")
def configure_SequenceCategoricalCrossentropy_v1(
    *, normalize: bool = True, names: Optional[Sequence[str]] = None
) -> SequenceCategoricalCrossentropy:
    return SequenceCategoricalCrossentropy(normalize=normalize, names=names)


@registry.losses("SequenceCategoricalCrossentropy.v2")
def configure_SequenceCategoricalCrossentropy_v2(
    *,
    normalize: bool = True,
    names: Optional[Sequence[str]] = None,
    neg_prefix: Optional[str] = None,
) -> SequenceCategoricalCrossentropy:
    return SequenceCategoricalCrossentropy(
        normalize=normalize, names=names, neg_prefix=neg_prefix
    )


@registry.losses("SequenceCategoricalCrossentropy.v3")
def configure_SequenceCategoricalCrossentropy_v3(
    *,
    normalize: bool = True,
    names: Optional[Sequence[str]] = None,
    missing_value: Optional[Union[str, int]] = None,
    neg_prefix: Optional[str] = None,
    label_smoothing: float = 0.0,
) -> SequenceCategoricalCrossentropy:
    return SequenceCategoricalCrossentropy(
        normalize=normalize,
        names=names,
        missing_value=missing_value,
        neg_prefix=neg_prefix,
        label_smoothing=label_smoothing,
    )


class L2Distance(Loss):
    def __init__(self, *, normalize: bool = True):
        self.normalize = normalize

    def __call__(self, guesses: Floats2d, truths: Floats2d) -> Tuple[Floats2d, float]:
        return self.get_grad(guesses, truths), self.get_loss(guesses, truths)

    def get_grad(self, guesses: Floats2d, truths: Floats2d) -> Floats2d:
        if guesses.shape != truths.shape:  # pragma: no cover
            err = f"Cannot calculate L2 distance: mismatched shapes: {guesses.shape} vs {truths.shape}."
            raise ValueError(err)
        difference = guesses - truths
        if self.normalize:
            difference = difference / guesses.shape[0]
        return difference

    def get_loss(self, guesses: Floats2d, truths: Floats2d) -> float:
        if guesses.shape != truths.shape:  # pragma: no cover
            err = f"Cannot calculate L2 distance: mismatched shapes: {guesses.shape} vs {truths.shape}."
            raise ValueError(err)
        d_truth = self.get_grad(guesses, truths)
        # TODO: Add overload for axis=None case to sum
        return (d_truth**2).sum()  # type: ignore


@registry.losses("L2Distance.v1")
def configure_L2Distance(*, normalize: bool = True) -> L2Distance:
    return L2Distance(normalize=normalize)


class CosineDistance(Loss):
    def __init__(self, *, normalize: bool = True, ignore_zeros: bool = False):
        self.normalize = normalize
        self.ignore_zeros = ignore_zeros

    def __call__(self, guesses: Floats2d, truths: Floats2d) -> Tuple[Floats2d, float]:
        return self.get_grad(guesses, truths), self.get_loss(guesses, truths)

    def get_similarity(self, guesses: Floats2d, truths: Floats2d) -> float:
        if guesses.shape != truths.shape:  # pragma: no cover
            err = f"Cannot calculate cosine similarity: mismatched shapes: {guesses.shape} vs {truths.shape}."
            raise ValueError(err)

        xp = get_array_module(guesses)
        # Add a small constant to avoid 0 vectors
        yh = guesses + 1e-8
        y = truths + 1e-8
        norm_yh = xp.linalg.norm(yh, axis=1, keepdims=True)
        norm_y = xp.linalg.norm(y, axis=1, keepdims=True)
        mul_norms = norm_yh * norm_y
        cosine = (yh * y).sum(axis=1, keepdims=True) / mul_norms
        return cosine

    def get_grad(self, guesses: Floats2d, truths: Floats2d) -> Floats2d:
        if guesses.shape != truths.shape:  # pragma: no cover
            err = f"Cannot calculate cosine similarity: mismatched shapes: {guesses.shape} vs {truths.shape}."
            raise ValueError(err)

        # Note: not using get_distance() here to avoid duplicating certain calculations
        xp = get_array_module(guesses)
        # Find the zero vectors
        if self.ignore_zeros:
            zero_indices = xp.abs(truths).sum(axis=1) == 0
        # Add a small constant to avoid 0 vectors
        yh = guesses + 1e-8
        y = truths + 1e-8
        # https://math.stackexchange.com/questions/1923613/partial-derivative-of-cosinesimilarity
        norm_yh = xp.linalg.norm(yh, axis=1, keepdims=True)
        norm_y = xp.linalg.norm(y, axis=1, keepdims=True)
        mul_norms = norm_yh * norm_y
        cosine = (yh * y).sum(axis=1, keepdims=True) / mul_norms
        d_yh = (y / mul_norms) - (cosine * (yh / norm_yh**2))
        if self.ignore_zeros:
            # If the target was a zero vector, don't count it in the loss.
            d_yh[zero_indices] = 0
        if self.normalize:
            d_yh = d_yh / guesses.shape[0]
        return -d_yh

    def get_loss(self, guesses: Floats2d, truths: Floats2d) -> float:
        if guesses.shape != truths.shape:  # pragma: no cover
            err = f"Cannot calculate cosine similarity: mismatched shapes: {guesses.shape} vs {truths.shape}."
            raise ValueError(err)

        xp = get_array_module(guesses)
        cosine = self.get_similarity(guesses, truths)
        losses = xp.abs(cosine - 1)
        if self.ignore_zeros:
            # If the target was a zero vector, don't count it in the loss.
            zero_indices = xp.abs(truths).sum(axis=1) == 0
            losses[zero_indices] = 0
        if self.normalize:
            losses = losses / guesses.shape[0]
        loss = losses.sum()
        return loss


@registry.losses("CosineDistance.v1")
def configure_CosineDistance(
    *, normalize: bool = True, ignore_zeros: bool = False
) -> CosineDistance:
    return CosineDistance(normalize=normalize, ignore_zeros=ignore_zeros)


def _make_mask(guesses, missing) -> Floats2d:
    xp = get_array_module(guesses)
    mask = xp.ones(guesses.shape, dtype="f")
    mask[missing] = 0
    return mask


def _make_mask_by_value(truths, guesses, missing_value) -> Floats2d:
    xp = get_array_module(guesses)
    mask = xp.ones(guesses.shape, dtype="f")

    if missing_value is not None:
        if truths.ndim == 1:
            mask[truths == missing_value] = 0.0
        else:
            # In 2D truths, labels are encoded as one-hot vectors, so we can get
            # the label indices using argmax.
            labels = xp.argmax(truths, axis=-1)
            mask[labels == missing_value] = 0.0

    return mask


__all__ = [
    "SequenceCategoricalCrossentropy",
    "CategoricalCrossentropy",
    "L2Distance",
    "CosineDistance",
]
first commit 2024-05-03 04:18:51 +03:00			`from abc import abstractmethod`
			`from typing import (`
			`Any,`
			`Dict,`
			`Generic,`
			`List,`
			`Optional,`
			`Sequence,`
			`Tuple,`
			`TypeVar,`
			`Union,`
			`cast,`
			`)`

			`from .config import registry`
			`from .types import Floats2d, Ints1d`
			`from .util import get_array_module, to_categorical`

			`LossT = TypeVar("LossT")`
			`GradT = TypeVar("GradT")`
			`GuessT = TypeVar("GuessT")`
			`TruthT = TypeVar("TruthT")`
			`IntsOrFloats = Union[Ints1d, Floats2d]`
			`IntsOrFloatsOrStrs = Union[Ints1d, Floats2d, Sequence[int], Sequence[str]]`


			`class Loss(Generic[GuessT, TruthT, GradT, LossT]): # pragma: no cover`
			`"""Base class for classes computing the loss / gradient. The class can`
			`be initialized with settings if needed. It provides get_loss and`
			`get_grad as separate methods to allow calculating them separately. It`
			`also provides a __call__ method that returns a tuple of both.`
			`"""`

			`def __init__(self, **kwargs: Any) -> None:`
			`...`

			`def __call__(self, guesses: GuessT, truths: TruthT) -> Tuple[GradT, LossT]:`
			`return self.get_grad(guesses, truths), self.get_loss(guesses, truths)`

			`@abstractmethod`
			`def get_grad(self, guesses: GuessT, truths: TruthT) -> GradT:`
			`...`

			`@abstractmethod`
			`def get_loss(self, guesses: GuessT, truths: TruthT) -> LossT:`
			`...`


			`class CategoricalCrossentropy(Loss):`
			`names: Optional[Sequence[str]]`
			`missing_value: Optional[Union[str, int]]`
			`_name_to_i: Dict[str, int]`

			`def __init__(`
			`self,`
			`*,`
			`normalize: bool = True,`
			`names: Optional[Sequence[str]] = None,`
			`missing_value: Optional[Union[str, int]] = None,`
			`neg_prefix: Optional[str] = None,`
			`label_smoothing: float = 0.0,`
			`):`
			`self.normalize = normalize`
			`self.names = names`
			`self.missing_value = missing_value`
			`self.neg_prefix = neg_prefix`
			`self.label_smoothing = label_smoothing`
			`if names is not None:`
			`self._name_to_i = {name: i for i, name in enumerate(names)}`
			`else:`
			`self._name_to_i = {}`

			`def convert_truths(self, truths, guesses: Floats2d) -> Tuple[Floats2d, Floats2d]:`
			`xp = get_array_module(guesses)`
			`missing = []`
			`negatives_mask = None`
			`if self.names:`
			`negatives_mask = xp.ones((len(truths), len(self.names)), dtype="f")`
			`missing_value = self.missing_value`
			`# Convert list of ints or list of strings`
			`if isinstance(truths, list):`
			`truths = list(truths)`
			`if len(truths):`
			`if isinstance(truths[0], int):`
			`for i, value in enumerate(truths):`
			`if value == missing_value:`
			`missing.append(i)`
			`else:`
			`if self.names is None:`
			`msg = (`
			`"Cannot calculate loss from list of strings without names. "`
			`"You can pass the names as a keyword argument when you "`
			`"create the loss object, "`
			`"e.g. CategoricalCrossentropy(names=['dog', 'cat'])"`
			`)`
			`raise ValueError(msg)`
			`for i, value in enumerate(truths):`
			`if value == missing_value:`
			`truths[i] = self.names[0]`
			`missing.append(i)`
			`elif (`
			`value`
			`and self.neg_prefix`
			`and value.startswith(self.neg_prefix)`
			`):`
			`truths[i] = value[len(self.neg_prefix) :]`
			`neg_index = self._name_to_i[truths[i]]`
			`negatives_mask[i] = 0 # type: ignore`
			`negatives_mask[i][neg_index] = -1 # type: ignore`
			`truths = [self._name_to_i[name] for name in truths]`
			`truths = xp.asarray(truths, dtype="i")`
			`mask = _make_mask(guesses, missing)`
			`else:`
			`mask = _make_mask_by_value(truths, guesses, missing_value)`
			`if truths.ndim != guesses.ndim:`
			`# transform categorical values to one-hot encoding`
			`truths = to_categorical(`
			`cast(Ints1d, truths),`
			`n_classes=guesses.shape[-1],`
			`label_smoothing=self.label_smoothing,`
			`)`
			`else:`
			`if self.label_smoothing:`
			`raise ValueError(`
			`"Label smoothing is only applied, when truths have type "`
			`"List[str], List[int] or Ints1d, but it seems like Floats2d "`
			`"was provided."`
			`)`
			`# Transform negative annotations to a 0 for the negated value`
			`# + mask all other values for that row`
			`if negatives_mask is not None:`
			`truths *= negatives_mask`
			`truths[truths == -1] = 0`
			`negatives_mask[negatives_mask == -1] = 1`
			`mask *= negatives_mask`
			`return truths, mask`

			`def __call__(`
			`self, guesses: Floats2d, truths: IntsOrFloatsOrStrs`
			`) -> Tuple[Floats2d, float]:`
			`d_truth = self.get_grad(guesses, truths)`
			`return (d_truth, self._get_loss_from_grad(d_truth))`

			`def get_grad(self, guesses: Floats2d, truths: IntsOrFloatsOrStrs) -> Floats2d:`
			`target, mask = self.convert_truths(truths, guesses)`
			`xp = get_array_module(target)`
			`if guesses.shape != target.shape: # pragma: no cover`
			`err = f"Cannot calculate CategoricalCrossentropy loss: mismatched shapes: {guesses.shape} vs {target.shape}."`
			`raise ValueError(err)`
			`if xp.any(guesses > 1) or xp.any(guesses < 0): # pragma: no cover`
			`err = f"Cannot calculate CategoricalCrossentropy loss with guesses outside the [0,1] interval."`
			`raise ValueError(err)`
			`if xp.any(target > 1) or xp.any(target < 0): # pragma: no cover`
			`err = f"Cannot calculate CategoricalCrossentropy loss with truth values outside the [0,1] interval."`
			`raise ValueError(err)`
			`difference = guesses - target`
			`difference *= mask`
			`if self.normalize:`
			`difference = difference / guesses.shape[0]`
			`return difference`

			`def get_loss(self, guesses: Floats2d, truths: IntsOrFloatsOrStrs) -> float:`
			`d_truth = self.get_grad(guesses, truths)`
			`return self._get_loss_from_grad(d_truth)`

			`def _get_loss_from_grad(self, d_truth: Floats2d) -> float:`
			`# TODO: Add overload for axis=None case to sum`
			`return (d_truth**2).sum() # type: ignore`


			`@registry.losses("CategoricalCrossentropy.v1")`
			`def configure_CategoricalCrossentropy_v1(`
			`*,`
			`normalize: bool = True,`
			`names: Optional[Sequence[str]] = None,`
			`missing_value: Optional[Union[str, int]] = None,`
			`) -> CategoricalCrossentropy:`
			`return CategoricalCrossentropy(`
			`normalize=normalize, names=names, missing_value=missing_value`
			`)`


			`@registry.losses("CategoricalCrossentropy.v2")`
			`def configure_CategoricalCrossentropy_v2(`
			`*,`
			`normalize: bool = True,`
			`names: Optional[Sequence[str]] = None,`
			`missing_value: Optional[Union[str, int]] = None,`
			`neg_prefix: Optional[str] = None,`
			`) -> CategoricalCrossentropy:`
			`return CategoricalCrossentropy(`
			`normalize=normalize,`
			`names=names,`
			`missing_value=missing_value,`
			`neg_prefix=neg_prefix,`
			`)`


			`@registry.losses("CategoricalCrossentropy.v3")`
			`def configure_CategoricalCrossentropy_v3(`
			`*,`
			`normalize: bool = True,`
			`names: Optional[Sequence[str]] = None,`
			`missing_value: Optional[Union[str, int]] = None,`
			`neg_prefix: Optional[str] = None,`
			`label_smoothing: float = 0.0,`
			`) -> CategoricalCrossentropy:`
			`return CategoricalCrossentropy(`
			`normalize=normalize,`
			`names=names,`
			`missing_value=missing_value,`
			`neg_prefix=neg_prefix,`
			`label_smoothing=label_smoothing,`
			`)`


			`class SequenceCategoricalCrossentropy(Loss):`
			`def __init__(`
			`self,`
			`*,`
			`normalize: bool = True,`
			`names: Optional[Sequence[str]] = None,`
			`missing_value: Optional[Union[str, int]] = None,`
			`neg_prefix: Optional[str] = None,`
			`label_smoothing: float = 0.0,`
			`):`
			`self.cc = CategoricalCrossentropy(`
			`normalize=False,`
			`names=names,`
			`missing_value=missing_value,`
			`neg_prefix=neg_prefix,`
			`label_smoothing=label_smoothing,`
			`)`
			`self.normalize = normalize`

			`def __call__(`
			`self, guesses: Sequence[Floats2d], truths: Sequence[IntsOrFloatsOrStrs]`
			`) -> Tuple[List[Floats2d], float]:`
			`grads = self.get_grad(guesses, truths)`
			`loss = self._get_loss_from_grad(grads)`
			`return grads, loss`

			`def get_grad(`
			`self, guesses: Sequence[Floats2d], truths: Sequence[IntsOrFloatsOrStrs]`
			`) -> List[Floats2d]:`
			`err = "Cannot calculate SequenceCategoricalCrossentropy loss: guesses and truths must be same length"`
			`if len(guesses) != len(truths): # pragma: no cover`
			`raise ValueError(err)`
			`n = len(guesses)`
			`d_scores = []`
			`for yh, y in zip(guesses, truths):`
			`d_yh = self.cc.get_grad(yh, y)`
			`if self.normalize:`
			`d_yh /= n`
			`d_scores.append(d_yh)`
			`return d_scores`

			`def get_loss(`
			`self, guesses: Sequence[Floats2d], truths: Sequence[IntsOrFloatsOrStrs]`
			`) -> float:`
			`return self._get_loss_from_grad(self.get_grad(guesses, truths))`

			`def _get_loss_from_grad(self, grads: Sequence[Floats2d]) -> float:`
			`loss = 0.0`
			`for grad in grads:`
			`loss += self.cc._get_loss_from_grad(grad)`
			`return loss`


			`@registry.losses("SequenceCategoricalCrossentropy.v1")`
			`def configure_SequenceCategoricalCrossentropy_v1(`
			`*, normalize: bool = True, names: Optional[Sequence[str]] = None`
			`) -> SequenceCategoricalCrossentropy:`
			`return SequenceCategoricalCrossentropy(normalize=normalize, names=names)`


			`@registry.losses("SequenceCategoricalCrossentropy.v2")`
			`def configure_SequenceCategoricalCrossentropy_v2(`
			`*,`
			`normalize: bool = True,`
			`names: Optional[Sequence[str]] = None,`
			`neg_prefix: Optional[str] = None,`
			`) -> SequenceCategoricalCrossentropy:`
			`return SequenceCategoricalCrossentropy(`
			`normalize=normalize, names=names, neg_prefix=neg_prefix`
			`)`


			`@registry.losses("SequenceCategoricalCrossentropy.v3")`
			`def configure_SequenceCategoricalCrossentropy_v3(`
			`*,`
			`normalize: bool = True,`
			`names: Optional[Sequence[str]] = None,`
			`missing_value: Optional[Union[str, int]] = None,`
			`neg_prefix: Optional[str] = None,`
			`label_smoothing: float = 0.0,`
			`) -> SequenceCategoricalCrossentropy:`
			`return SequenceCategoricalCrossentropy(`
			`normalize=normalize,`
			`names=names,`
			`missing_value=missing_value,`
			`neg_prefix=neg_prefix,`
			`label_smoothing=label_smoothing,`
			`)`


			`class L2Distance(Loss):`
			`def __init__(self, *, normalize: bool = True):`
			`self.normalize = normalize`

			`def __call__(self, guesses: Floats2d, truths: Floats2d) -> Tuple[Floats2d, float]:`
			`return self.get_grad(guesses, truths), self.get_loss(guesses, truths)`

			`def get_grad(self, guesses: Floats2d, truths: Floats2d) -> Floats2d:`
			`if guesses.shape != truths.shape: # pragma: no cover`
			`err = f"Cannot calculate L2 distance: mismatched shapes: {guesses.shape} vs {truths.shape}."`
			`raise ValueError(err)`
			`difference = guesses - truths`
			`if self.normalize:`
			`difference = difference / guesses.shape[0]`
			`return difference`

			`def get_loss(self, guesses: Floats2d, truths: Floats2d) -> float:`
			`if guesses.shape != truths.shape: # pragma: no cover`
			`err = f"Cannot calculate L2 distance: mismatched shapes: {guesses.shape} vs {truths.shape}."`
			`raise ValueError(err)`
			`d_truth = self.get_grad(guesses, truths)`
			`# TODO: Add overload for axis=None case to sum`
			`return (d_truth**2).sum() # type: ignore`


			`@registry.losses("L2Distance.v1")`
			`def configure_L2Distance(*, normalize: bool = True) -> L2Distance:`
			`return L2Distance(normalize=normalize)`


			`class CosineDistance(Loss):`
			`def __init__(self, *, normalize: bool = True, ignore_zeros: bool = False):`
			`self.normalize = normalize`
			`self.ignore_zeros = ignore_zeros`

			`def __call__(self, guesses: Floats2d, truths: Floats2d) -> Tuple[Floats2d, float]:`
			`return self.get_grad(guesses, truths), self.get_loss(guesses, truths)`

			`def get_similarity(self, guesses: Floats2d, truths: Floats2d) -> float:`
			`if guesses.shape != truths.shape: # pragma: no cover`
			`err = f"Cannot calculate cosine similarity: mismatched shapes: {guesses.shape} vs {truths.shape}."`
			`raise ValueError(err)`

			`xp = get_array_module(guesses)`
			`# Add a small constant to avoid 0 vectors`
			`yh = guesses + 1e-8`
			`y = truths + 1e-8`
			`norm_yh = xp.linalg.norm(yh, axis=1, keepdims=True)`
			`norm_y = xp.linalg.norm(y, axis=1, keepdims=True)`
			`mul_norms = norm_yh * norm_y`
			`cosine = (yh * y).sum(axis=1, keepdims=True) / mul_norms`
			`return cosine`

			`def get_grad(self, guesses: Floats2d, truths: Floats2d) -> Floats2d:`
			`if guesses.shape != truths.shape: # pragma: no cover`
			`err = f"Cannot calculate cosine similarity: mismatched shapes: {guesses.shape} vs {truths.shape}."`
			`raise ValueError(err)`

			`# Note: not using get_distance() here to avoid duplicating certain calculations`
			`xp = get_array_module(guesses)`
			`# Find the zero vectors`
			`if self.ignore_zeros:`
			`zero_indices = xp.abs(truths).sum(axis=1) == 0`
			`# Add a small constant to avoid 0 vectors`
			`yh = guesses + 1e-8`
			`y = truths + 1e-8`
			`# https://math.stackexchange.com/questions/1923613/partial-derivative-of-cosinesimilarity`
			`norm_yh = xp.linalg.norm(yh, axis=1, keepdims=True)`
			`norm_y = xp.linalg.norm(y, axis=1, keepdims=True)`
			`mul_norms = norm_yh * norm_y`
			`cosine = (yh * y).sum(axis=1, keepdims=True) / mul_norms`
			`d_yh = (y / mul_norms) - (cosine * (yh / norm_yh**2))`
			`if self.ignore_zeros:`
			`# If the target was a zero vector, don't count it in the loss.`
			`d_yh[zero_indices] = 0`
			`if self.normalize:`
			`d_yh = d_yh / guesses.shape[0]`
			`return -d_yh`

			`def get_loss(self, guesses: Floats2d, truths: Floats2d) -> float:`
			`if guesses.shape != truths.shape: # pragma: no cover`
			`err = f"Cannot calculate cosine similarity: mismatched shapes: {guesses.shape} vs {truths.shape}."`
			`raise ValueError(err)`

			`xp = get_array_module(guesses)`
			`cosine = self.get_similarity(guesses, truths)`
			`losses = xp.abs(cosine - 1)`
			`if self.ignore_zeros:`
			`# If the target was a zero vector, don't count it in the loss.`
			`zero_indices = xp.abs(truths).sum(axis=1) == 0`
			`losses[zero_indices] = 0`
			`if self.normalize:`
			`losses = losses / guesses.shape[0]`
			`loss = losses.sum()`
			`return loss`


			`@registry.losses("CosineDistance.v1")`
			`def configure_CosineDistance(`
			`*, normalize: bool = True, ignore_zeros: bool = False`
			`) -> CosineDistance:`
			`return CosineDistance(normalize=normalize, ignore_zeros=ignore_zeros)`


			`def _make_mask(guesses, missing) -> Floats2d:`
			`xp = get_array_module(guesses)`
			`mask = xp.ones(guesses.shape, dtype="f")`
			`mask[missing] = 0`
			`return mask`


			`def _make_mask_by_value(truths, guesses, missing_value) -> Floats2d:`
			`xp = get_array_module(guesses)`
			`mask = xp.ones(guesses.shape, dtype="f")`

			`if missing_value is not None:`
			`if truths.ndim == 1:`
			`mask[truths == missing_value] = 0.0`
			`else:`
			`# In 2D truths, labels are encoded as one-hot vectors, so we can get`
			`# the label indices using argmax.`
			`labels = xp.argmax(truths, axis=-1)`
			`mask[labels == missing_value] = 0.0`

			`return mask`


			`__all__ = [`
			`"SequenceCategoricalCrossentropy",`
			`"CategoricalCrossentropy",`
			`"L2Distance",`
			`"CosineDistance",`
			`]`