from typing import Tuple, cast

import numpy
import pytest
from numpy.testing import assert_allclose

from thinc.api import Model, NumpyOps, Softmax_v2
from thinc.types import Floats2d, Ints1d
from thinc.util import has_torch, torch2xp, xp2torch

OPS = NumpyOps()

inputs = OPS.xp.asarray([[4, 2, 3, 4], [1, 5, 3, 1], [9, 8, 5, 7]], dtype="f")
outputs = OPS.xp.asarray(
    [
        [0.39948627, 0.05406459, 0.14696279, 0.39948627],
        [0.01562812, 0.8532666, 0.11547707, 0.01562812],
        [0.657233, 0.24178252, 0.01203764, 0.08894681],
    ],
    dtype="f",
)


def test_unnormalized_softmax_backprop():
    model = Softmax_v2(normalize_outputs=False)
    model.initialize(inputs, outputs)
    _, backprop = model(inputs, is_train=False)
    with pytest.raises(ValueError, match="backprop is not supported"):
        backprop(OPS.xp.zeros_like(outputs))

    # Backprop should not fail when training.
    _, backprop = model(inputs, is_train=True)
    dX = backprop(OPS.xp.zeros_like(outputs))
    assert OPS.xp.all(dX == 0.0)


def torch_softmax_with_temperature(
    model: Model, X: Floats2d, targets: Ints1d
) -> Tuple[Floats2d, Floats2d]:
    import torch

    Wt = xp2torch(model.get_param("W"))
    bt = xp2torch(model.get_param("b"))
    temperature = model.attrs["softmax_temperature"]

    Xt = xp2torch(X, requires_grad=True)
    Yt_gold = xp2torch(targets).long()

    XWbt = (Xt @ Wt) + bt
    XWbt_temp = XWbt / temperature

    loss = torch.nn.CrossEntropyLoss()
    output = loss(XWbt_temp, Yt_gold)
    output.backward()

    return cast(
        Floats2d, torch2xp(torch.nn.functional.softmax(XWbt_temp, dim=-1))
    ), cast(Floats2d, torch2xp(cast(torch.Tensor, Xt.grad)))


@pytest.mark.skipif(not has_torch, reason="needs PyTorch")
@pytest.mark.parametrize("temperature", [0.5, 1.0, 2.0])
def test_softmax_temperature(temperature):
    model = Softmax_v2(
        temperature=temperature,
        init_W=lambda ops, shape: ops.xp.eye(shape[1], dtype="f"),
        init_b=lambda ops, shape: ops.xp.zeros(shape, dtype="f"),
    )

    X = OPS.xp.arange(-1, 1, 0.2, dtype="f").reshape(1, 10)
    targets = OPS.asarray1i([4])
    Y_gold = OPS.xp.eye(10, dtype="f")[targets]
    model.initialize(X, Y_gold)

    Yt, dXt = torch_softmax_with_temperature(model, X, targets)

    Y, backprop = model(X, is_train=True)
    dX = backprop(Y - Y_gold)

    assert_allclose(Y, Yt, atol=1e-4)
    assert_allclose(dX, dXt, atol=1e-4)


def test_reject_incorrect_temperature():
    with pytest.raises(ValueError, match=r"softmax temperature.*zero"):
        Softmax_v2(normalize_outputs=False, temperature=0.0)

    model = Softmax_v2(normalize_outputs=False)
    model.attrs["softmax_temperature"] = 0.0
    model.initialize(inputs, outputs)
    with pytest.raises(ValueError, match=r"softmax temperature.*zero"):
        model(inputs, is_train=False)