ai-content-maker/.venv/Lib/site-packages/torch/onnx/symbolic_opset14.py

"""This file exports ONNX ops for opset 14.

Note [ONNX operators that are added/updated in opset 14]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
New operators:
    HardSwish, Trilu

Updated operators:
    Reshape
    Add, Sub, Mul, Div
    GRU, LSTM, RNN
    BatchNorm, Cumsum, Relu
"""

# EDITING THIS FILE? READ THIS FIRST!
# see Note [Edit Symbolic Files] in README.md
from __future__ import annotations

import functools
from typing import Optional

import torch
from torch.onnx import _constants, _type_utils, symbolic_helper
from torch.onnx._globals import GLOBALS
from torch.onnx._internal import _beartype, jit_utils, registration

__all__ = [
    "hardswish",
    "tril",
    "triu",
    "reshape",
    "batch_norm",
    "quantized_hardswish",
    "scaled_dot_product_attention",
]

_onnx_symbolic = functools.partial(registration.onnx_symbolic, opset=14)


@_onnx_symbolic("aten::hardswish")
@symbolic_helper.parse_args("v")
@_beartype.beartype
def hardswish(g: jit_utils.GraphContext, self):
    return g.op("HardSwish", self)


@_onnx_symbolic("aten::tril")
@_beartype.beartype
def tril(g: jit_utils.GraphContext, self, diagonal, out=None):
    return g.op("Trilu", self, diagonal, upper_i=0)


@_onnx_symbolic("aten::triu")
@_beartype.beartype
def triu(g: jit_utils.GraphContext, self, diagonal, out=None):
    return g.op("Trilu", self, diagonal, upper_i=1)


@_onnx_symbolic("aten::reshape")
@symbolic_helper.quantized_args(True)
@symbolic_helper.parse_args("v", "v")
@_beartype.beartype
def reshape(g: jit_utils.GraphContext, self, shape):
    # NOTE: Due to bug in ORT https://github.com/microsoft/onnxruntime/issues/10664
    #       Reshape export cannot utilize the new allowzero attribute introduced in opset 14.
    return symbolic_helper._reshape_helper(g, self, shape, allowzero=0)


@_onnx_symbolic("aten::batch_norm")
@symbolic_helper.parse_args("v", "v", "v", "v", "v", "i", "f", "f", "i")
@_beartype.beartype
def batch_norm(
    g: jit_utils.GraphContext,
    input,
    weight,
    bias,
    running_mean,
    running_var,
    training,
    momentum,
    eps,
    cudnn_enabled,
):
    if (
        torch.is_autocast_enabled()
        and not symbolic_helper.args_have_same_dtype(
            [input, weight, bias, running_mean, running_var]
        )
        and GLOBALS.export_onnx_opset_version < 15
    ):
        return symbolic_helper._onnx_opset_unsupported_detailed(
            "BatchNormalization",
            14,
            15,
            "All input tensors must have the same `dtype`."
            " Turn off Autocast or export using opset version 15.",
            input,
        )

    symbolic_helper.check_training_mode(training, "batch_norm")
    weight, bias, running_mean, running_var = symbolic_helper._batchnorm_helper(
        g, input, weight, bias, running_mean, running_var
    )
    out = g.op(
        "BatchNormalization",
        input,
        weight,
        bias,
        running_mean,
        running_var,
        epsilon_f=eps,
        momentum_f=1 - momentum,
        training_mode_i=0 if not training else 1,
        outputs=1 if not training else 3,
    )
    if not training:
        return out
    else:
        res, new_running_mean, new_running_var = out
        new_running_mean.setType(running_mean.type())
        new_running_var.setType(running_var.type())
        return res


@_onnx_symbolic("quantized::hardswish")
@_beartype.beartype
def quantized_hardswish(g: jit_utils.GraphContext, x, op_scale, op_zero_point):
    x, _, _, _ = symbolic_helper.dequantize_helper(g, x)

    output = hardswish(g, x)

    return symbolic_helper.quantize_helper(g, output, op_scale, op_zero_point)


# Ported from
# https://github.com/microsoft/onnxscript/blob/6b1b81700b4523f31d8c6d3321e5d8ef5d42b764/onnxscript/function_libs/torch_aten/ops/nn.py#L1504
# aten_scaled_dot_product_attention
# NOTE: Need op.Trilu
@_onnx_symbolic("aten::scaled_dot_product_attention")
@symbolic_helper.parse_args("v", "v", "v", "v", "f", "b", "v")
@_beartype.beartype
def scaled_dot_product_attention(
    g: jit_utils.GraphContext,
    query: torch._C.Value,
    key: torch._C.Value,
    value: torch._C.Value,
    attn_mask: Optional[torch._C.Value] = None,
    dropout_p: float = 0.0,
    is_causal: bool = False,
    scale: Optional[torch._C.Value] = None,
):
    assert (not is_causal) or (
        is_causal and symbolic_helper._is_none(attn_mask)
    ), "is_causal and attn_mask cannot be set at the same time"

    scale = symbolic_helper._maybe_get_const(scale, "f")
    if symbolic_helper._is_none(scale):
        scale = _attention_scale(g, query)

    if is_causal:
        attn_mask = _causal_attention_mask(g, query, key)

    # Swap the last two axes of key
    # NOTE: onnx-script has different logic here, because the attribute perms in
    # transpose needs list of ints
    key_shape_builtin = symbolic_helper._get_tensor_rank(key)
    key_transposed_axes = list(range(key_shape_builtin))
    key_transposed_axes[-1], key_transposed_axes[-2] = (
        key_transposed_axes[-2],
        key_transposed_axes[-1],
    )
    key_transposed = g.op("Transpose", key, perm_i=key_transposed_axes)

    # https://github.com/pytorch/pytorch/blob/12da0c70378b5be9135c6fda62a9863bce4a4818/aten/src/ATen/native/transformers/attention.cpp#L653
    # Scale q, k before matmul for stability see https://tinyurl.com/sudb9s96 for math
    query_scaled = g.op("Mul", query, g.op("Sqrt", scale))
    key_transposed_scaled = g.op("Mul", key_transposed, g.op("Sqrt", scale))
    mul_qk = g.op("MatMul", query_scaled, key_transposed_scaled)

    if symbolic_helper._is_none(attn_mask):
        mul_qk_add = mul_qk
    elif (
        _type_utils.JitScalarType.from_value(attn_mask)
        == _type_utils.JitScalarType.BOOL
    ):
        # Turn the Boolean mask to float: attn_mask.masked_fill(not attn_mask, -float('inf'))
        const_zero = g.op("Constant", value_t=torch.tensor([0.0]))
        const_neg_inf = g.op("Constant", value_t=torch.tensor([-float("inf")]))
        attn_mask = g.op("Where", attn_mask, const_zero, const_neg_inf)
        mul_qk_add = g.op("Add", mul_qk, attn_mask)
    elif _type_utils.JitScalarType.from_value(attn_mask) in (
        _type_utils.JitScalarType.FLOAT,
        _type_utils.JitScalarType.HALF,
        _type_utils.JitScalarType.BFLOAT16,
    ):
        mul_qk_add = g.op("Add", mul_qk, attn_mask)
    else:
        raise ValueError(
            f"Unsupported type for attn_mask: {_type_utils.JitScalarType.from_value(attn_mask)}"
        )

    attn_weight = g.op("Softmax", mul_qk_add, axis_i=-1)

    if dropout_p != 0:
        attn_weight = g.op(
            "Dropout",
            attn_weight,
            g.op("Constant", value_t=torch.tensor(dropout_p, dtype=torch.float)),
        )

    return g.op("MatMul", attn_weight, value)


@_beartype.beartype
def _attention_scale(
    g: jit_utils.GraphContext, query: torch._C.Value
) -> torch._C.Value:
    """Calculate the scale factor for the attention result.

    Args:
        query: Tensor of shape [..., L, E]

    Returns:
        Scalar scale factor := 1 / math.sqrt(query.size(-1))
    """
    query_shape = g.op("Shape", query)
    query_shape_last = g.op(
        "Slice",
        query_shape,
        g.op("Constant", value_t=torch.tensor([-1], dtype=torch.int64)),
        g.op(
            "Constant", value_t=torch.tensor([_constants.INT64_MAX], dtype=torch.int64)
        ),
    )
    embedding_size = g.op(
        "Cast",
        query_shape_last,
        to_i=_type_utils.JitScalarType.from_value(query).onnx_type(),
    )
    const_one = g.op("Constant", value_t=torch.tensor([1.0], dtype=torch.float))
    scale = g.op("Div", const_one, g.op("Sqrt", embedding_size))
    # Add a Cast to convert the scale back to original type
    scale = g.op(
        "Cast",
        scale,
        to_i=_type_utils.JitScalarType.from_value(query).onnx_type(),
    )
    return scale


@_beartype.beartype
def _causal_attention_mask(
    g: jit_utils.GraphContext, query: torch._C.Value, key: torch._C.Value
) -> torch._C.Value:
    """Create a causal mask for the given query and key tensors.

    Equivalent to::
        mask = torch.ones(L, S, dtype=torch.bool).tril(diagonal=0)
        attn_mask = torch.zeros(L, S, dtype=torch.float)
        attn_mask = attn_mask.masked_fill(not mask, -float('inf'))

    Args:
        query: Tensor of shape [..., L, E]
        key: Tensor of shape [..., S, E]

    Returns:
        Tensor of shape [L, S]
    """

    query_shape = g.op("Shape", query)
    key_shape = g.op("Shape", key)

    last_idx = g.op("Constant", value_t=torch.tensor([-1], dtype=torch.int64))
    second_last_idx = g.op("Constant", value_t=torch.tensor([-2], dtype=torch.int64))
    target_length = g.op("Slice", query_shape, second_last_idx, last_idx)
    source_length = g.op("Slice", key_shape, second_last_idx, last_idx)
    # attn_mask = torch.ones(L, S) := {
    size = g.op("Concat", target_length, source_length, axis_i=0)
    const_one = g.op("Constant", value_t=torch.tensor([1.0]))
    attn_mask = g.op("Expand", const_one, size)
    # }
    attn_mask = g.op("Trilu", attn_mask, upper_i=0)
    # The causal mask has 0s in the lower triangle and -inf in the upper triangle.
    const_zero = g.op("Constant", value_t=torch.tensor([0.0]))
    const_neg_inf = g.op("Constant", value_t=torch.tensor([-float("inf")]))
    attn_mask = g.op(
        "Where", g.op("Equal", attn_mask, const_zero), const_neg_inf, const_zero
    )
    return attn_mask
first commit 2024-05-03 04:18:51 +03:00			`"""This file exports ONNX ops for opset 14.`

			`Note [ONNX operators that are added/updated in opset 14]`
			`~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~`
			`New operators:`
			`HardSwish, Trilu`

			`Updated operators:`
			`Reshape`
			`Add, Sub, Mul, Div`
			`GRU, LSTM, RNN`
			`BatchNorm, Cumsum, Relu`
			`"""`

			`# EDITING THIS FILE? READ THIS FIRST!`
			`# see Note [Edit Symbolic Files] in README.md`
			`from __future__ import annotations`

			`import functools`
			`from typing import Optional`

			`import torch`
			`from torch.onnx import _constants, _type_utils, symbolic_helper`
			`from torch.onnx._globals import GLOBALS`
			`from torch.onnx._internal import _beartype, jit_utils, registration`

			`__all__ = [`
			`"hardswish",`
			`"tril",`
			`"triu",`
			`"reshape",`
			`"batch_norm",`
			`"quantized_hardswish",`
			`"scaled_dot_product_attention",`
			`]`

			`_onnx_symbolic = functools.partial(registration.onnx_symbolic, opset=14)`


			`@_onnx_symbolic("aten::hardswish")`
			`@symbolic_helper.parse_args("v")`
			`@_beartype.beartype`
			`def hardswish(g: jit_utils.GraphContext, self):`
			`return g.op("HardSwish", self)`


			`@_onnx_symbolic("aten::tril")`
			`@_beartype.beartype`
			`def tril(g: jit_utils.GraphContext, self, diagonal, out=None):`
			`return g.op("Trilu", self, diagonal, upper_i=0)`


			`@_onnx_symbolic("aten::triu")`
			`@_beartype.beartype`
			`def triu(g: jit_utils.GraphContext, self, diagonal, out=None):`
			`return g.op("Trilu", self, diagonal, upper_i=1)`


			`@_onnx_symbolic("aten::reshape")`
			`@symbolic_helper.quantized_args(True)`
			`@symbolic_helper.parse_args("v", "v")`
			`@_beartype.beartype`
			`def reshape(g: jit_utils.GraphContext, self, shape):`
			`# NOTE: Due to bug in ORT https://github.com/microsoft/onnxruntime/issues/10664`
			`# Reshape export cannot utilize the new allowzero attribute introduced in opset 14.`
			`return symbolic_helper._reshape_helper(g, self, shape, allowzero=0)`


			`@_onnx_symbolic("aten::batch_norm")`
			`@symbolic_helper.parse_args("v", "v", "v", "v", "v", "i", "f", "f", "i")`
			`@_beartype.beartype`
			`def batch_norm(`
			`g: jit_utils.GraphContext,`
			`input,`
			`weight,`
			`bias,`
			`running_mean,`
			`running_var,`
			`training,`
			`momentum,`
			`eps,`
			`cudnn_enabled,`
			`):`
			`if (`
			`torch.is_autocast_enabled()`
			`and not symbolic_helper.args_have_same_dtype(`
			`[input, weight, bias, running_mean, running_var]`
			`)`
			`and GLOBALS.export_onnx_opset_version < 15`
			`):`
			`return symbolic_helper._onnx_opset_unsupported_detailed(`
			`"BatchNormalization",`
			`14,`
			`15,`
			"All input tensors must have the same `dtype`."
			`" Turn off Autocast or export using opset version 15.",`
			`input,`
			`)`

			`symbolic_helper.check_training_mode(training, "batch_norm")`
			`weight, bias, running_mean, running_var = symbolic_helper._batchnorm_helper(`
			`g, input, weight, bias, running_mean, running_var`
			`)`
			`out = g.op(`
			`"BatchNormalization",`
			`input,`
			`weight,`
			`bias,`
			`running_mean,`
			`running_var,`
			`epsilon_f=eps,`
			`momentum_f=1 - momentum,`
			`training_mode_i=0 if not training else 1,`
			`outputs=1 if not training else 3,`
			`)`
			`if not training:`
			`return out`
			`else:`
			`res, new_running_mean, new_running_var = out`
			`new_running_mean.setType(running_mean.type())`
			`new_running_var.setType(running_var.type())`
			`return res`


			`@_onnx_symbolic("quantized::hardswish")`
			`@_beartype.beartype`
			`def quantized_hardswish(g: jit_utils.GraphContext, x, op_scale, op_zero_point):`
			`x, _, _, _ = symbolic_helper.dequantize_helper(g, x)`

			`output = hardswish(g, x)`

			`return symbolic_helper.quantize_helper(g, output, op_scale, op_zero_point)`


			`# Ported from`
			`# https://github.com/microsoft/onnxscript/blob/6b1b81700b4523f31d8c6d3321e5d8ef5d42b764/onnxscript/function_libs/torch_aten/ops/nn.py#L1504`
			`# aten_scaled_dot_product_attention`
			`# NOTE: Need op.Trilu`
			`@_onnx_symbolic("aten::scaled_dot_product_attention")`
			`@symbolic_helper.parse_args("v", "v", "v", "v", "f", "b", "v")`
			`@_beartype.beartype`
			`def scaled_dot_product_attention(`
			`g: jit_utils.GraphContext,`
			`query: torch._C.Value,`
			`key: torch._C.Value,`
			`value: torch._C.Value,`
			`attn_mask: Optional[torch._C.Value] = None,`
			`dropout_p: float = 0.0,`
			`is_causal: bool = False,`
			`scale: Optional[torch._C.Value] = None,`
			`):`
			`assert (not is_causal) or (`
			`is_causal and symbolic_helper._is_none(attn_mask)`
			`), "is_causal and attn_mask cannot be set at the same time"`

			`scale = symbolic_helper._maybe_get_const(scale, "f")`
			`if symbolic_helper._is_none(scale):`
			`scale = _attention_scale(g, query)`

			`if is_causal:`
			`attn_mask = _causal_attention_mask(g, query, key)`

			`# Swap the last two axes of key`
			`# NOTE: onnx-script has different logic here, because the attribute perms in`
			`# transpose needs list of ints`
			`key_shape_builtin = symbolic_helper._get_tensor_rank(key)`
			`key_transposed_axes = list(range(key_shape_builtin))`
			`key_transposed_axes[-1], key_transposed_axes[-2] = (`
			`key_transposed_axes[-2],`
			`key_transposed_axes[-1],`
			`)`
			`key_transposed = g.op("Transpose", key, perm_i=key_transposed_axes)`

			`# https://github.com/pytorch/pytorch/blob/12da0c70378b5be9135c6fda62a9863bce4a4818/aten/src/ATen/native/transformers/attention.cpp#L653`
			`# Scale q, k before matmul for stability see https://tinyurl.com/sudb9s96 for math`
			`query_scaled = g.op("Mul", query, g.op("Sqrt", scale))`
			`key_transposed_scaled = g.op("Mul", key_transposed, g.op("Sqrt", scale))`
			`mul_qk = g.op("MatMul", query_scaled, key_transposed_scaled)`

			`if symbolic_helper._is_none(attn_mask):`
			`mul_qk_add = mul_qk`
			`elif (`
			`_type_utils.JitScalarType.from_value(attn_mask)`
			`== _type_utils.JitScalarType.BOOL`
			`):`
			`# Turn the Boolean mask to float: attn_mask.masked_fill(not attn_mask, -float('inf'))`
			`const_zero = g.op("Constant", value_t=torch.tensor([0.0]))`
			`const_neg_inf = g.op("Constant", value_t=torch.tensor([-float("inf")]))`
			`attn_mask = g.op("Where", attn_mask, const_zero, const_neg_inf)`
			`mul_qk_add = g.op("Add", mul_qk, attn_mask)`
			`elif _type_utils.JitScalarType.from_value(attn_mask) in (`
			`_type_utils.JitScalarType.FLOAT,`
			`_type_utils.JitScalarType.HALF,`
			`_type_utils.JitScalarType.BFLOAT16,`
			`):`
			`mul_qk_add = g.op("Add", mul_qk, attn_mask)`
			`else:`
			`raise ValueError(`
			`f"Unsupported type for attn_mask: {_type_utils.JitScalarType.from_value(attn_mask)}"`
			`)`

			`attn_weight = g.op("Softmax", mul_qk_add, axis_i=-1)`

			`if dropout_p != 0:`
			`attn_weight = g.op(`
			`"Dropout",`
			`attn_weight,`
			`g.op("Constant", value_t=torch.tensor(dropout_p, dtype=torch.float)),`
			`)`

			`return g.op("MatMul", attn_weight, value)`


			`@_beartype.beartype`
			`def _attention_scale(`
			`g: jit_utils.GraphContext, query: torch._C.Value`
			`) -> torch._C.Value:`
			`"""Calculate the scale factor for the attention result.`

			`Args:`
			`query: Tensor of shape [..., L, E]`

			`Returns:`
			`Scalar scale factor := 1 / math.sqrt(query.size(-1))`
			`"""`
			`query_shape = g.op("Shape", query)`
			`query_shape_last = g.op(`
			`"Slice",`
			`query_shape,`
			`g.op("Constant", value_t=torch.tensor([-1], dtype=torch.int64)),`
			`g.op(`
			`"Constant", value_t=torch.tensor([_constants.INT64_MAX], dtype=torch.int64)`
			`),`
			`)`
			`embedding_size = g.op(`
			`"Cast",`
			`query_shape_last,`
			`to_i=_type_utils.JitScalarType.from_value(query).onnx_type(),`
			`)`
			`const_one = g.op("Constant", value_t=torch.tensor([1.0], dtype=torch.float))`
			`scale = g.op("Div", const_one, g.op("Sqrt", embedding_size))`
			`# Add a Cast to convert the scale back to original type`
			`scale = g.op(`
			`"Cast",`
			`scale,`
			`to_i=_type_utils.JitScalarType.from_value(query).onnx_type(),`
			`)`
			`return scale`


			`@_beartype.beartype`
			`def _causal_attention_mask(`
			`g: jit_utils.GraphContext, query: torch._C.Value, key: torch._C.Value`
			`) -> torch._C.Value:`
			`"""Create a causal mask for the given query and key tensors.`

			`Equivalent to::`
			`mask = torch.ones(L, S, dtype=torch.bool).tril(diagonal=0)`
			`attn_mask = torch.zeros(L, S, dtype=torch.float)`
			`attn_mask = attn_mask.masked_fill(not mask, -float('inf'))`

			`Args:`
			`query: Tensor of shape [..., L, E]`
			`key: Tensor of shape [..., S, E]`

			`Returns:`
			`Tensor of shape [L, S]`
			`"""`

			`query_shape = g.op("Shape", query)`
			`key_shape = g.op("Shape", key)`

			`last_idx = g.op("Constant", value_t=torch.tensor([-1], dtype=torch.int64))`
			`second_last_idx = g.op("Constant", value_t=torch.tensor([-2], dtype=torch.int64))`
			`target_length = g.op("Slice", query_shape, second_last_idx, last_idx)`
			`source_length = g.op("Slice", key_shape, second_last_idx, last_idx)`
			`# attn_mask = torch.ones(L, S) := {`
			`size = g.op("Concat", target_length, source_length, axis_i=0)`
			`const_one = g.op("Constant", value_t=torch.tensor([1.0]))`
			`attn_mask = g.op("Expand", const_one, size)`
			`# }`
			`attn_mask = g.op("Trilu", attn_mask, upper_i=0)`
			`# The causal mask has 0s in the lower triangle and -inf in the upper triangle.`
			`const_zero = g.op("Constant", value_t=torch.tensor([0.0]))`
			`const_neg_inf = g.op("Constant", value_t=torch.tensor([-float("inf")]))`
			`attn_mask = g.op(`
			`"Where", g.op("Equal", attn_mask, const_zero), const_neg_inf, const_zero`
			`)`
			`return attn_mask`