226 lines
7.3 KiB
Python
226 lines
7.3 KiB
Python
# coding=utf-8
|
|
# Copyright 2023 The HuggingFace Inc. team.
|
|
# Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
"""
|
|
Time series distributional output classes and utilities.
|
|
"""
|
|
from typing import Callable, Dict, Optional, Tuple
|
|
|
|
import torch
|
|
from torch import nn
|
|
from torch.distributions import (
|
|
AffineTransform,
|
|
Distribution,
|
|
Independent,
|
|
NegativeBinomial,
|
|
Normal,
|
|
StudentT,
|
|
TransformedDistribution,
|
|
)
|
|
|
|
|
|
class AffineTransformed(TransformedDistribution):
|
|
def __init__(self, base_distribution: Distribution, loc=None, scale=None, event_dim=0):
|
|
self.scale = 1.0 if scale is None else scale
|
|
self.loc = 0.0 if loc is None else loc
|
|
|
|
super().__init__(base_distribution, [AffineTransform(loc=self.loc, scale=self.scale, event_dim=event_dim)])
|
|
|
|
@property
|
|
def mean(self):
|
|
"""
|
|
Returns the mean of the distribution.
|
|
"""
|
|
return self.base_dist.mean * self.scale + self.loc
|
|
|
|
@property
|
|
def variance(self):
|
|
"""
|
|
Returns the variance of the distribution.
|
|
"""
|
|
return self.base_dist.variance * self.scale**2
|
|
|
|
@property
|
|
def stddev(self):
|
|
"""
|
|
Returns the standard deviation of the distribution.
|
|
"""
|
|
return self.variance.sqrt()
|
|
|
|
|
|
class ParameterProjection(nn.Module):
|
|
def __init__(
|
|
self, in_features: int, args_dim: Dict[str, int], domain_map: Callable[..., Tuple[torch.Tensor]], **kwargs
|
|
) -> None:
|
|
super().__init__(**kwargs)
|
|
self.args_dim = args_dim
|
|
self.proj = nn.ModuleList([nn.Linear(in_features, dim) for dim in args_dim.values()])
|
|
self.domain_map = domain_map
|
|
|
|
def forward(self, x: torch.Tensor) -> Tuple[torch.Tensor]:
|
|
params_unbounded = [proj(x) for proj in self.proj]
|
|
|
|
return self.domain_map(*params_unbounded)
|
|
|
|
|
|
class LambdaLayer(nn.Module):
|
|
def __init__(self, function):
|
|
super().__init__()
|
|
self.function = function
|
|
|
|
def forward(self, x, *args):
|
|
return self.function(x, *args)
|
|
|
|
|
|
class DistributionOutput:
|
|
distribution_class: type
|
|
in_features: int
|
|
args_dim: Dict[str, int]
|
|
|
|
def __init__(self, dim: int = 1) -> None:
|
|
self.dim = dim
|
|
self.args_dim = {k: dim * self.args_dim[k] for k in self.args_dim}
|
|
|
|
def _base_distribution(self, distr_args):
|
|
if self.dim == 1:
|
|
return self.distribution_class(*distr_args)
|
|
else:
|
|
return Independent(self.distribution_class(*distr_args), 1)
|
|
|
|
def distribution(
|
|
self,
|
|
distr_args,
|
|
loc: Optional[torch.Tensor] = None,
|
|
scale: Optional[torch.Tensor] = None,
|
|
) -> Distribution:
|
|
distr = self._base_distribution(distr_args)
|
|
if loc is None and scale is None:
|
|
return distr
|
|
else:
|
|
return AffineTransformed(distr, loc=loc, scale=scale, event_dim=self.event_dim)
|
|
|
|
@property
|
|
def event_shape(self) -> Tuple:
|
|
r"""
|
|
Shape of each individual event contemplated by the distributions that this object constructs.
|
|
"""
|
|
return () if self.dim == 1 else (self.dim,)
|
|
|
|
@property
|
|
def event_dim(self) -> int:
|
|
r"""
|
|
Number of event dimensions, i.e., length of the `event_shape` tuple, of the distributions that this object
|
|
constructs.
|
|
"""
|
|
return len(self.event_shape)
|
|
|
|
@property
|
|
def value_in_support(self) -> float:
|
|
r"""
|
|
A float that will have a valid numeric value when computing the log-loss of the corresponding distribution. By
|
|
default 0.0. This value will be used when padding data series.
|
|
"""
|
|
return 0.0
|
|
|
|
def get_parameter_projection(self, in_features: int) -> nn.Module:
|
|
r"""
|
|
Return the parameter projection layer that maps the input to the appropriate parameters of the distribution.
|
|
"""
|
|
return ParameterProjection(
|
|
in_features=in_features,
|
|
args_dim=self.args_dim,
|
|
domain_map=LambdaLayer(self.domain_map),
|
|
)
|
|
|
|
def domain_map(self, *args: torch.Tensor):
|
|
r"""
|
|
Converts arguments to the right shape and domain. The domain depends on the type of distribution, while the
|
|
correct shape is obtained by reshaping the trailing axis in such a way that the returned tensors define a
|
|
distribution of the right event_shape.
|
|
"""
|
|
raise NotImplementedError()
|
|
|
|
@staticmethod
|
|
def squareplus(x: torch.Tensor) -> torch.Tensor:
|
|
r"""
|
|
Helper to map inputs to the positive orthant by applying the square-plus operation. Reference:
|
|
https://twitter.com/jon_barron/status/1387167648669048833
|
|
"""
|
|
return (x + torch.sqrt(torch.square(x) + 4.0)) / 2.0
|
|
|
|
|
|
class StudentTOutput(DistributionOutput):
|
|
"""
|
|
Student-T distribution output class.
|
|
"""
|
|
|
|
args_dim: Dict[str, int] = {"df": 1, "loc": 1, "scale": 1}
|
|
distribution_class: type = StudentT
|
|
|
|
@classmethod
|
|
def domain_map(cls, df: torch.Tensor, loc: torch.Tensor, scale: torch.Tensor):
|
|
scale = cls.squareplus(scale).clamp_min(torch.finfo(scale.dtype).eps)
|
|
df = 2.0 + cls.squareplus(df)
|
|
return df.squeeze(-1), loc.squeeze(-1), scale.squeeze(-1)
|
|
|
|
|
|
class NormalOutput(DistributionOutput):
|
|
"""
|
|
Normal distribution output class.
|
|
"""
|
|
|
|
args_dim: Dict[str, int] = {"loc": 1, "scale": 1}
|
|
distribution_class: type = Normal
|
|
|
|
@classmethod
|
|
def domain_map(cls, loc: torch.Tensor, scale: torch.Tensor):
|
|
scale = cls.squareplus(scale).clamp_min(torch.finfo(scale.dtype).eps)
|
|
return loc.squeeze(-1), scale.squeeze(-1)
|
|
|
|
|
|
class NegativeBinomialOutput(DistributionOutput):
|
|
"""
|
|
Negative Binomial distribution output class.
|
|
"""
|
|
|
|
args_dim: Dict[str, int] = {"total_count": 1, "logits": 1}
|
|
distribution_class: type = NegativeBinomial
|
|
|
|
@classmethod
|
|
def domain_map(cls, total_count: torch.Tensor, logits: torch.Tensor):
|
|
total_count = cls.squareplus(total_count)
|
|
return total_count.squeeze(-1), logits.squeeze(-1)
|
|
|
|
def _base_distribution(self, distr_args) -> Distribution:
|
|
total_count, logits = distr_args
|
|
if self.dim == 1:
|
|
return self.distribution_class(total_count=total_count, logits=logits)
|
|
else:
|
|
return Independent(self.distribution_class(total_count=total_count, logits=logits), 1)
|
|
|
|
# Overwrites the parent class method. We cannot scale using the affine
|
|
# transformation since negative binomial should return integers. Instead
|
|
# we scale the parameters.
|
|
def distribution(
|
|
self, distr_args, loc: Optional[torch.Tensor] = None, scale: Optional[torch.Tensor] = None
|
|
) -> Distribution:
|
|
total_count, logits = distr_args
|
|
|
|
if scale is not None:
|
|
# See scaling property of Gamma.
|
|
logits += scale.log()
|
|
|
|
return self._base_distribution((total_count, logits))
|