ai-content-maker/.venv/Lib/site-packages/thinc/loss.py

440 lines
16 KiB
Python
Raw Normal View History

2024-05-03 04:18:51 +03:00
from abc import abstractmethod
from typing import (
Any,
Dict,
Generic,
List,
Optional,
Sequence,
Tuple,
TypeVar,
Union,
cast,
)
from .config import registry
from .types import Floats2d, Ints1d
from .util import get_array_module, to_categorical
LossT = TypeVar("LossT")
GradT = TypeVar("GradT")
GuessT = TypeVar("GuessT")
TruthT = TypeVar("TruthT")
IntsOrFloats = Union[Ints1d, Floats2d]
IntsOrFloatsOrStrs = Union[Ints1d, Floats2d, Sequence[int], Sequence[str]]
class Loss(Generic[GuessT, TruthT, GradT, LossT]): # pragma: no cover
"""Base class for classes computing the loss / gradient. The class can
be initialized with settings if needed. It provides get_loss and
get_grad as separate methods to allow calculating them separately. It
also provides a __call__ method that returns a tuple of both.
"""
def __init__(self, **kwargs: Any) -> None:
...
def __call__(self, guesses: GuessT, truths: TruthT) -> Tuple[GradT, LossT]:
return self.get_grad(guesses, truths), self.get_loss(guesses, truths)
@abstractmethod
def get_grad(self, guesses: GuessT, truths: TruthT) -> GradT:
...
@abstractmethod
def get_loss(self, guesses: GuessT, truths: TruthT) -> LossT:
...
class CategoricalCrossentropy(Loss):
names: Optional[Sequence[str]]
missing_value: Optional[Union[str, int]]
_name_to_i: Dict[str, int]
def __init__(
self,
*,
normalize: bool = True,
names: Optional[Sequence[str]] = None,
missing_value: Optional[Union[str, int]] = None,
neg_prefix: Optional[str] = None,
label_smoothing: float = 0.0,
):
self.normalize = normalize
self.names = names
self.missing_value = missing_value
self.neg_prefix = neg_prefix
self.label_smoothing = label_smoothing
if names is not None:
self._name_to_i = {name: i for i, name in enumerate(names)}
else:
self._name_to_i = {}
def convert_truths(self, truths, guesses: Floats2d) -> Tuple[Floats2d, Floats2d]:
xp = get_array_module(guesses)
missing = []
negatives_mask = None
if self.names:
negatives_mask = xp.ones((len(truths), len(self.names)), dtype="f")
missing_value = self.missing_value
# Convert list of ints or list of strings
if isinstance(truths, list):
truths = list(truths)
if len(truths):
if isinstance(truths[0], int):
for i, value in enumerate(truths):
if value == missing_value:
missing.append(i)
else:
if self.names is None:
msg = (
"Cannot calculate loss from list of strings without names. "
"You can pass the names as a keyword argument when you "
"create the loss object, "
"e.g. CategoricalCrossentropy(names=['dog', 'cat'])"
)
raise ValueError(msg)
for i, value in enumerate(truths):
if value == missing_value:
truths[i] = self.names[0]
missing.append(i)
elif (
value
and self.neg_prefix
and value.startswith(self.neg_prefix)
):
truths[i] = value[len(self.neg_prefix) :]
neg_index = self._name_to_i[truths[i]]
negatives_mask[i] = 0 # type: ignore
negatives_mask[i][neg_index] = -1 # type: ignore
truths = [self._name_to_i[name] for name in truths]
truths = xp.asarray(truths, dtype="i")
mask = _make_mask(guesses, missing)
else:
mask = _make_mask_by_value(truths, guesses, missing_value)
if truths.ndim != guesses.ndim:
# transform categorical values to one-hot encoding
truths = to_categorical(
cast(Ints1d, truths),
n_classes=guesses.shape[-1],
label_smoothing=self.label_smoothing,
)
else:
if self.label_smoothing:
raise ValueError(
"Label smoothing is only applied, when truths have type "
"List[str], List[int] or Ints1d, but it seems like Floats2d "
"was provided."
)
# Transform negative annotations to a 0 for the negated value
# + mask all other values for that row
if negatives_mask is not None:
truths *= negatives_mask
truths[truths == -1] = 0
negatives_mask[negatives_mask == -1] = 1
mask *= negatives_mask
return truths, mask
def __call__(
self, guesses: Floats2d, truths: IntsOrFloatsOrStrs
) -> Tuple[Floats2d, float]:
d_truth = self.get_grad(guesses, truths)
return (d_truth, self._get_loss_from_grad(d_truth))
def get_grad(self, guesses: Floats2d, truths: IntsOrFloatsOrStrs) -> Floats2d:
target, mask = self.convert_truths(truths, guesses)
xp = get_array_module(target)
if guesses.shape != target.shape: # pragma: no cover
err = f"Cannot calculate CategoricalCrossentropy loss: mismatched shapes: {guesses.shape} vs {target.shape}."
raise ValueError(err)
if xp.any(guesses > 1) or xp.any(guesses < 0): # pragma: no cover
err = f"Cannot calculate CategoricalCrossentropy loss with guesses outside the [0,1] interval."
raise ValueError(err)
if xp.any(target > 1) or xp.any(target < 0): # pragma: no cover
err = f"Cannot calculate CategoricalCrossentropy loss with truth values outside the [0,1] interval."
raise ValueError(err)
difference = guesses - target
difference *= mask
if self.normalize:
difference = difference / guesses.shape[0]
return difference
def get_loss(self, guesses: Floats2d, truths: IntsOrFloatsOrStrs) -> float:
d_truth = self.get_grad(guesses, truths)
return self._get_loss_from_grad(d_truth)
def _get_loss_from_grad(self, d_truth: Floats2d) -> float:
# TODO: Add overload for axis=None case to sum
return (d_truth**2).sum() # type: ignore
@registry.losses("CategoricalCrossentropy.v1")
def configure_CategoricalCrossentropy_v1(
*,
normalize: bool = True,
names: Optional[Sequence[str]] = None,
missing_value: Optional[Union[str, int]] = None,
) -> CategoricalCrossentropy:
return CategoricalCrossentropy(
normalize=normalize, names=names, missing_value=missing_value
)
@registry.losses("CategoricalCrossentropy.v2")
def configure_CategoricalCrossentropy_v2(
*,
normalize: bool = True,
names: Optional[Sequence[str]] = None,
missing_value: Optional[Union[str, int]] = None,
neg_prefix: Optional[str] = None,
) -> CategoricalCrossentropy:
return CategoricalCrossentropy(
normalize=normalize,
names=names,
missing_value=missing_value,
neg_prefix=neg_prefix,
)
@registry.losses("CategoricalCrossentropy.v3")
def configure_CategoricalCrossentropy_v3(
*,
normalize: bool = True,
names: Optional[Sequence[str]] = None,
missing_value: Optional[Union[str, int]] = None,
neg_prefix: Optional[str] = None,
label_smoothing: float = 0.0,
) -> CategoricalCrossentropy:
return CategoricalCrossentropy(
normalize=normalize,
names=names,
missing_value=missing_value,
neg_prefix=neg_prefix,
label_smoothing=label_smoothing,
)
class SequenceCategoricalCrossentropy(Loss):
def __init__(
self,
*,
normalize: bool = True,
names: Optional[Sequence[str]] = None,
missing_value: Optional[Union[str, int]] = None,
neg_prefix: Optional[str] = None,
label_smoothing: float = 0.0,
):
self.cc = CategoricalCrossentropy(
normalize=False,
names=names,
missing_value=missing_value,
neg_prefix=neg_prefix,
label_smoothing=label_smoothing,
)
self.normalize = normalize
def __call__(
self, guesses: Sequence[Floats2d], truths: Sequence[IntsOrFloatsOrStrs]
) -> Tuple[List[Floats2d], float]:
grads = self.get_grad(guesses, truths)
loss = self._get_loss_from_grad(grads)
return grads, loss
def get_grad(
self, guesses: Sequence[Floats2d], truths: Sequence[IntsOrFloatsOrStrs]
) -> List[Floats2d]:
err = "Cannot calculate SequenceCategoricalCrossentropy loss: guesses and truths must be same length"
if len(guesses) != len(truths): # pragma: no cover
raise ValueError(err)
n = len(guesses)
d_scores = []
for yh, y in zip(guesses, truths):
d_yh = self.cc.get_grad(yh, y)
if self.normalize:
d_yh /= n
d_scores.append(d_yh)
return d_scores
def get_loss(
self, guesses: Sequence[Floats2d], truths: Sequence[IntsOrFloatsOrStrs]
) -> float:
return self._get_loss_from_grad(self.get_grad(guesses, truths))
def _get_loss_from_grad(self, grads: Sequence[Floats2d]) -> float:
loss = 0.0
for grad in grads:
loss += self.cc._get_loss_from_grad(grad)
return loss
@registry.losses("SequenceCategoricalCrossentropy.v1")
def configure_SequenceCategoricalCrossentropy_v1(
*, normalize: bool = True, names: Optional[Sequence[str]] = None
) -> SequenceCategoricalCrossentropy:
return SequenceCategoricalCrossentropy(normalize=normalize, names=names)
@registry.losses("SequenceCategoricalCrossentropy.v2")
def configure_SequenceCategoricalCrossentropy_v2(
*,
normalize: bool = True,
names: Optional[Sequence[str]] = None,
neg_prefix: Optional[str] = None,
) -> SequenceCategoricalCrossentropy:
return SequenceCategoricalCrossentropy(
normalize=normalize, names=names, neg_prefix=neg_prefix
)
@registry.losses("SequenceCategoricalCrossentropy.v3")
def configure_SequenceCategoricalCrossentropy_v3(
*,
normalize: bool = True,
names: Optional[Sequence[str]] = None,
missing_value: Optional[Union[str, int]] = None,
neg_prefix: Optional[str] = None,
label_smoothing: float = 0.0,
) -> SequenceCategoricalCrossentropy:
return SequenceCategoricalCrossentropy(
normalize=normalize,
names=names,
missing_value=missing_value,
neg_prefix=neg_prefix,
label_smoothing=label_smoothing,
)
class L2Distance(Loss):
def __init__(self, *, normalize: bool = True):
self.normalize = normalize
def __call__(self, guesses: Floats2d, truths: Floats2d) -> Tuple[Floats2d, float]:
return self.get_grad(guesses, truths), self.get_loss(guesses, truths)
def get_grad(self, guesses: Floats2d, truths: Floats2d) -> Floats2d:
if guesses.shape != truths.shape: # pragma: no cover
err = f"Cannot calculate L2 distance: mismatched shapes: {guesses.shape} vs {truths.shape}."
raise ValueError(err)
difference = guesses - truths
if self.normalize:
difference = difference / guesses.shape[0]
return difference
def get_loss(self, guesses: Floats2d, truths: Floats2d) -> float:
if guesses.shape != truths.shape: # pragma: no cover
err = f"Cannot calculate L2 distance: mismatched shapes: {guesses.shape} vs {truths.shape}."
raise ValueError(err)
d_truth = self.get_grad(guesses, truths)
# TODO: Add overload for axis=None case to sum
return (d_truth**2).sum() # type: ignore
@registry.losses("L2Distance.v1")
def configure_L2Distance(*, normalize: bool = True) -> L2Distance:
return L2Distance(normalize=normalize)
class CosineDistance(Loss):
def __init__(self, *, normalize: bool = True, ignore_zeros: bool = False):
self.normalize = normalize
self.ignore_zeros = ignore_zeros
def __call__(self, guesses: Floats2d, truths: Floats2d) -> Tuple[Floats2d, float]:
return self.get_grad(guesses, truths), self.get_loss(guesses, truths)
def get_similarity(self, guesses: Floats2d, truths: Floats2d) -> float:
if guesses.shape != truths.shape: # pragma: no cover
err = f"Cannot calculate cosine similarity: mismatched shapes: {guesses.shape} vs {truths.shape}."
raise ValueError(err)
xp = get_array_module(guesses)
# Add a small constant to avoid 0 vectors
yh = guesses + 1e-8
y = truths + 1e-8
norm_yh = xp.linalg.norm(yh, axis=1, keepdims=True)
norm_y = xp.linalg.norm(y, axis=1, keepdims=True)
mul_norms = norm_yh * norm_y
cosine = (yh * y).sum(axis=1, keepdims=True) / mul_norms
return cosine
def get_grad(self, guesses: Floats2d, truths: Floats2d) -> Floats2d:
if guesses.shape != truths.shape: # pragma: no cover
err = f"Cannot calculate cosine similarity: mismatched shapes: {guesses.shape} vs {truths.shape}."
raise ValueError(err)
# Note: not using get_distance() here to avoid duplicating certain calculations
xp = get_array_module(guesses)
# Find the zero vectors
if self.ignore_zeros:
zero_indices = xp.abs(truths).sum(axis=1) == 0
# Add a small constant to avoid 0 vectors
yh = guesses + 1e-8
y = truths + 1e-8
# https://math.stackexchange.com/questions/1923613/partial-derivative-of-cosinesimilarity
norm_yh = xp.linalg.norm(yh, axis=1, keepdims=True)
norm_y = xp.linalg.norm(y, axis=1, keepdims=True)
mul_norms = norm_yh * norm_y
cosine = (yh * y).sum(axis=1, keepdims=True) / mul_norms
d_yh = (y / mul_norms) - (cosine * (yh / norm_yh**2))
if self.ignore_zeros:
# If the target was a zero vector, don't count it in the loss.
d_yh[zero_indices] = 0
if self.normalize:
d_yh = d_yh / guesses.shape[0]
return -d_yh
def get_loss(self, guesses: Floats2d, truths: Floats2d) -> float:
if guesses.shape != truths.shape: # pragma: no cover
err = f"Cannot calculate cosine similarity: mismatched shapes: {guesses.shape} vs {truths.shape}."
raise ValueError(err)
xp = get_array_module(guesses)
cosine = self.get_similarity(guesses, truths)
losses = xp.abs(cosine - 1)
if self.ignore_zeros:
# If the target was a zero vector, don't count it in the loss.
zero_indices = xp.abs(truths).sum(axis=1) == 0
losses[zero_indices] = 0
if self.normalize:
losses = losses / guesses.shape[0]
loss = losses.sum()
return loss
@registry.losses("CosineDistance.v1")
def configure_CosineDistance(
*, normalize: bool = True, ignore_zeros: bool = False
) -> CosineDistance:
return CosineDistance(normalize=normalize, ignore_zeros=ignore_zeros)
def _make_mask(guesses, missing) -> Floats2d:
xp = get_array_module(guesses)
mask = xp.ones(guesses.shape, dtype="f")
mask[missing] = 0
return mask
def _make_mask_by_value(truths, guesses, missing_value) -> Floats2d:
xp = get_array_module(guesses)
mask = xp.ones(guesses.shape, dtype="f")
if missing_value is not None:
if truths.ndim == 1:
mask[truths == missing_value] = 0.0
else:
# In 2D truths, labels are encoded as one-hot vectors, so we can get
# the label indices using argmax.
labels = xp.argmax(truths, axis=-1)
mask[labels == missing_value] = 0.0
return mask
__all__ = [
"SequenceCategoricalCrossentropy",
"CategoricalCrossentropy",
"L2Distance",
"CosineDistance",
]