import pytest

from thinc.api import (
    Adam,
    PyTorchWrapper,
    Relu,
    Softmax,
    TensorFlowWrapper,
    chain,
    clone,
    get_current_ops,
)
from thinc.compat import has_tensorflow, has_torch


@pytest.fixture(scope="module")
def mnist(limit=5000):
    pytest.importorskip("ml_datasets")
    import ml_datasets

    (train_X, train_Y), (dev_X, dev_Y) = ml_datasets.mnist()
    return (train_X[:limit], train_Y[:limit]), (dev_X[:limit], dev_Y[:limit])


def create_relu_softmax(width, dropout, nI, nO):
    return chain(clone(Relu(nO=width, dropout=dropout), 2), Softmax(10, width))


def create_wrapped_pytorch(width, dropout, nI, nO):
    import torch
    import torch.nn
    import torch.nn.functional as F

    class PyTorchModel(torch.nn.Module):
        def __init__(self, width, nO, nI, dropout):
            super(PyTorchModel, self).__init__()
            self.dropout1 = torch.nn.Dropout2d(dropout)
            self.dropout2 = torch.nn.Dropout2d(dropout)
            self.fc1 = torch.nn.Linear(nI, width)
            self.fc2 = torch.nn.Linear(width, nO)

        def forward(self, x):
            x = F.relu(x)
            x = self.dropout1(x)
            x = self.fc1(x)
            x = F.relu(x)
            x = self.dropout2(x)
            x = self.fc2(x)
            output = F.log_softmax(x, dim=1)
            return output

    return PyTorchWrapper(PyTorchModel(width, nO, nI, dropout))


def create_wrapped_tensorflow(width, dropout, nI, nO):
    from tensorflow.keras.layers import Dense, Dropout
    from tensorflow.keras.models import Sequential

    tf_model = Sequential()
    tf_model.add(Dense(width, activation="relu", input_shape=(nI,)))
    tf_model.add(Dropout(dropout))
    tf_model.add(Dense(width, activation="relu"))
    tf_model.add(Dropout(dropout))
    tf_model.add(Dense(nO, activation=None))
    return TensorFlowWrapper(tf_model)


@pytest.fixture(
    # fmt: off
    params=[
        create_relu_softmax,
        pytest.param(create_wrapped_pytorch, marks=pytest.mark.skipif(not has_torch, reason="needs PyTorch")),
        pytest.param(create_wrapped_tensorflow, marks=pytest.mark.skipif(not has_tensorflow, reason="needs TensorFlow"))
    ]
    # fmt: on
)
def create_model(request):
    return request.param


@pytest.mark.slow
@pytest.mark.parametrize(("width", "nb_epoch", "min_score"), [(32, 20, 0.8)])
def test_small_end_to_end(width, nb_epoch, min_score, create_model, mnist):
    batch_size = 128
    dropout = 0.2
    (train_X, train_Y), (dev_X, dev_Y) = mnist
    model = create_model(width, dropout, nI=train_X.shape[1], nO=train_Y.shape[1])
    model.initialize(X=train_X[:5], Y=train_Y[:5])
    optimizer = Adam(0.001)
    losses = []
    scores = []
    ops = get_current_ops()

    for i in range(nb_epoch):
        for X, Y in model.ops.multibatch(batch_size, train_X, train_Y, shuffle=True):
            Yh, backprop = model.begin_update(X)
            # Ensure that the tensor is type-compatible with the current backend.
            Yh = ops.asarray(Yh)

            backprop(Yh - Y)
            model.finish_update(optimizer)
            losses.append(((Yh - Y) ** 2).sum())
        correct = 0
        total = 0
        for X, Y in model.ops.multibatch(batch_size, dev_X, dev_Y):
            Yh = model.predict(X)
            Yh = ops.asarray(Yh)

            correct += (Yh.argmax(axis=1) == Y.argmax(axis=1)).sum()
            total += Yh.shape[0]
        score = correct / total
        scores.append(score)
    assert losses[-1] < losses[0], losses
    if scores[0] < 1.0:
        assert scores[-1] > scores[0], scores
    assert any([score > min_score for score in scores]), scores