ai-content-maker/.venv/Lib/site-packages/torch/ao/ns/fx/utils.py

534 lines
20 KiB
Python
Raw Permalink Normal View History

2024-05-03 04:18:51 +03:00
import enum
import operator
import torch
import torch.nn as nn
import torch.ao.nn.intrinsic.quantized as nniq
import torch.ao.nn.quantized as nnq
toq = torch.ops.quantized
from typing import Tuple, Callable, Dict, Set, List, Optional, Union
from torch.fx import GraphModule
from torch.fx.graph import Node
from torch.ao.quantization import (
ObserverBase,
FakeQuantizeBase,
)
from torch.ao.quantization.utils import getattr_from_fqn
from torch.ao.quantization.observer import _is_activation_post_process
from .ns_types import NSNodeTargetType, NSResultsType
# TODO(future PR): consider deleting this enum and using the torch types
# directly. This might be tricky because it is not a one to one mapping.
class NodeInputOrOutputType(enum.Enum):
FP32 = enum.auto() # torch.float
INT8 = enum.auto() # torch.qint8 or torch.quint8
FP16 = enum.auto() # torch.float16
UNKNOWN = enum.auto() # we cannot determine input/output dtype
# TODO(future PR): while these functions can support multiple dtypes,
# for the purposes of numerical debugging we want to get the actual
# dtype used in the model. We will likely need some kind of dtype
# propagation to estimate this.
FP32_OR_INT8 = enum.auto() # either torch.float or torch.quint8 or torch.qint8
# TODO(future PRs): dynamic quant, fake quant, etc
def get_node_first_input_and_output_type(
node: Node,
gm: GraphModule,
logger_cls: Callable,
node_type_to_io_type_map: Dict[str, Set[NSNodeTargetType]],
) -> Tuple[NodeInputOrOutputType, NodeInputOrOutputType]:
# TODO(future PR): clean this up
FUNS_IO_TYPE_FP32 = node_type_to_io_type_map["funs_io_type_fp32"]
FUNS_IO_TYPE_FP16 = node_type_to_io_type_map["funs_io_type_fp16"]
FUNS_IO_TYPE_INT8 = node_type_to_io_type_map["funs_io_type_int8"]
FUNS_IO_TYPE_FP32_OR_INT8 = node_type_to_io_type_map["funs_io_type_fp32_or_int8"]
MODS_IO_TYPE_FP32 = node_type_to_io_type_map["mods_io_type_fp32"]
MODS_IO_TYPE_INT8 = node_type_to_io_type_map["mods_io_type_int8"]
MODS_IO_TYPE_FP32_OR_INT8 = node_type_to_io_type_map["mods_io_type_fp32_or_int8"]
METHS_IO_TYPE_FP32_OR_INT8 = node_type_to_io_type_map["meths_io_type_fp32_or_int8"]
if node.op == "call_function":
if node.target in FUNS_IO_TYPE_FP32:
return (NodeInputOrOutputType.FP32, NodeInputOrOutputType.FP32)
if node.target in FUNS_IO_TYPE_FP16:
return (NodeInputOrOutputType.FP16, NodeInputOrOutputType.FP16)
elif node.target in FUNS_IO_TYPE_INT8:
return (NodeInputOrOutputType.INT8, NodeInputOrOutputType.INT8)
elif node.target in FUNS_IO_TYPE_FP32_OR_INT8:
first_arg = get_normalized_nth_input(node, gm, 0)
assert isinstance(first_arg, Node)
(
_prev_node_input_type,
prev_node_output_type,
) = get_node_first_input_and_output_type(
first_arg, gm, logger_cls, node_type_to_io_type_map
)
return (prev_node_output_type, prev_node_output_type)
else:
return (NodeInputOrOutputType.UNKNOWN, NodeInputOrOutputType.UNKNOWN)
elif node.op == "call_module":
assert node.op == "call_module"
assert isinstance(node.target, str)
mod = getattr_from_fqn(gm, node.target)
is_known_fp32_or_int8_input_module = any(
isinstance(mod, target_type) for target_type in MODS_IO_TYPE_FP32_OR_INT8 # type: ignore[arg-type]
)
if (
isinstance(mod, (logger_cls, ObserverBase, FakeQuantizeBase)) # type: ignore[arg-type]
or is_known_fp32_or_int8_input_module
):
# A logger or observer's input and output type is the output
# type of the preceding node.
first_arg = get_normalized_nth_input(node, gm, 0)
assert isinstance(first_arg, Node)
(
_prev_node_input_type,
prev_node_output_type,
) = get_node_first_input_and_output_type(
first_arg, gm, logger_cls, node_type_to_io_type_map
)
return (prev_node_output_type, prev_node_output_type)
is_known_fp32_input_module = any(
isinstance(mod, target_type) for target_type in MODS_IO_TYPE_FP32 # type: ignore[arg-type]
)
is_known_int8_input_module = any(
isinstance(mod, target_type) for target_type in MODS_IO_TYPE_INT8 # type: ignore[arg-type]
)
if is_known_fp32_input_module:
return (NodeInputOrOutputType.FP32, NodeInputOrOutputType.FP32)
elif is_known_int8_input_module:
return (NodeInputOrOutputType.INT8, NodeInputOrOutputType.INT8)
else:
return (NodeInputOrOutputType.UNKNOWN, NodeInputOrOutputType.UNKNOWN)
elif node.op == "call_method":
if node.target == "dequantize":
# Dequantize is a special node because it allows multiple input types.
# So, we look up the output type of the previous node and return that
# as the input type of this node instance.
prev_node = get_normalized_nth_input(node, gm, 0)
assert isinstance(prev_node, Node)
(
_prev_node_input_type,
prev_node_output_type,
) = get_node_first_input_and_output_type(
prev_node, gm, logger_cls, node_type_to_io_type_map
)
return (prev_node_output_type, NodeInputOrOutputType.FP32)
elif node.target == "to":
# to is a special node because it allows multiple input types.
# So, we look up the output type of the previous node and return that
# as the input type of this node instance. We also look up the target
# of to and return the correct output type.
prev_node = get_normalized_nth_input(node, gm, 0)
assert isinstance(prev_node, Node)
(
_prev_node_input_type,
prev_node_output_type,
) = get_node_first_input_and_output_type(
prev_node, gm, logger_cls, node_type_to_io_type_map
)
cur_node_dtype_target = get_normalized_nth_input(node, gm, 1)
assert (
cur_node_dtype_target is torch.float16
), f"{cur_node_dtype_target} handling needs to be added"
return (prev_node_output_type, NodeInputOrOutputType.FP16)
elif node.target in METHS_IO_TYPE_FP32_OR_INT8:
first_arg = get_normalized_nth_input(node, gm, 0)
assert isinstance(first_arg, Node)
(
_prev_node_input_type,
prev_node_output_type,
) = get_node_first_input_and_output_type(
first_arg, gm, logger_cls, node_type_to_io_type_map
)
return (prev_node_output_type, prev_node_output_type)
return (NodeInputOrOutputType.UNKNOWN, NodeInputOrOutputType.UNKNOWN)
else:
return (NodeInputOrOutputType.UNKNOWN, NodeInputOrOutputType.UNKNOWN)
def get_node_input_qparams(
node: Node,
gm: GraphModule,
node_type_to_io_type_map: Dict[str, Set[NSNodeTargetType]],
) -> Optional[Tuple[Union[torch.Tensor, float], Union[torch.Tensor, int]]]:
"""
Returns the qparams (scale, zero_point) of the first input to `node`,
if they can be inferred from the graph.
"""
prev_node = get_normalized_nth_input(node, gm, 0)
if not isinstance(prev_node, Node):
return None
MODS_IO_TYPE_FP32_OR_INT8 = node_type_to_io_type_map["mods_io_type_fp32_or_int8"]
def _get_scale_zp_from_function_args(node, gm, scale_arg_idx, zp_arg_idx):
scale_node = get_normalized_nth_input(node, gm, scale_arg_idx)
zp_node = get_normalized_nth_input(node, gm, zp_arg_idx)
assert isinstance(scale_node, Node) and isinstance(scale_node.target, str)
assert isinstance(zp_node, Node) and isinstance(zp_node.target, str)
scale_obj = getattr_from_fqn(gm, scale_node.target)
zp_obj = getattr_from_fqn(gm, zp_node.target)
return (scale_obj, zp_obj)
if prev_node.op == "call_function":
# quantize - read the args directly
if prev_node.target == torch.quantize_per_tensor:
return _get_scale_zp_from_function_args(prev_node, gm, 1, 2)
elif prev_node.target in (toq.add, toq.add_relu, toq.mul, toq.mul_relu):
return _get_scale_zp_from_function_args(prev_node, gm, 2, 3)
return None
# TODO(future PR): handle more functionals
# TODO(future PR): handle functional ops which inherit qparams from input
elif prev_node.op == "call_module":
# get type of the module
assert isinstance(prev_node.target, str)
module_obj = getattr_from_fqn(gm, prev_node.target)
if isinstance(
module_obj,
(
nnq.Linear,
nnq.Conv1d,
nnq.Conv2d,
nniq.ConvReLU2d,
nnq.Conv3d,
nnq.BatchNorm2d,
nnq.BatchNorm3d,
nnq.ConvTranspose1d,
nnq.ConvTranspose2d,
nnq.ELU,
nnq.GroupNorm,
nnq.InstanceNorm1d,
nnq.InstanceNorm2d,
nnq.InstanceNorm3d,
nnq.LayerNorm,
nnq.Hardswish,
nnq.LeakyReLU,
nnq.ReLU6,
nniq.BNReLU2d,
nniq.BNReLU3d,
nniq.ConvReLU1d,
nniq.ConvReLU2d,
nniq.ConvReLU3d,
nniq.LinearReLU,
),
):
return (module_obj.scale, module_obj.zero_point) # type: ignore[return-value]
is_known_fp32_or_int8_input_module = any(
isinstance(module_obj, target_type) for target_type in MODS_IO_TYPE_FP32_OR_INT8 # type: ignore[arg-type]
)
if is_known_fp32_or_int8_input_module:
return get_node_input_qparams(prev_node, gm, node_type_to_io_type_map)
return None
def return_first_non_observer_node(
node: Node,
gm: GraphModule,
) -> Node:
"""
If node is not an observer, returns it. If node is an observer,
navigates up the graph and returns the first parent which is not an
observer. For example,
graph: (node_non_obs), node = node_non_obs : returns node_non_obs
graph: (node_non_obs -> obs0), node = obs0 : returns node_non_obs
graph: (node_non_obs -> obs0 -> fq0), node = fq0 : returns node_non_obs
"""
if node.op == "call_module":
node_obj = getattr_from_fqn(gm, node.target) # type: ignore[arg-type]
if _is_activation_post_process(node_obj):
assert len(node.args) == 1
assert isinstance(node.args[0], Node)
node = node.args[0]
# code duplication intended, not worth refactoring
assert isinstance(node.target, str)
node_obj = getattr_from_fqn(gm, node.target)
if _is_activation_post_process(node_obj):
assert len(node.args) == 1
assert isinstance(node.args[0], Node)
node = node.args[0]
return node
def get_number_of_non_param_args(
node: Node,
gm: GraphModule,
) -> int:
"""
Assumes that all non-param args occur first. Returns the number of
non-param args expected for a node. For example, for
F.linear(x, weight, bias)
Returns 1, because x is a non-param arg and weight and bias are params.
For
lstm_mod(x, hid)
Returns 2, because both x and hid are non-param args.
"""
if node.op == "call_module":
node_obj = getattr_from_fqn(gm, node.target) # type: ignore[arg-type]
if isinstance(node_obj, nn.LSTM):
return 2
# default is 1
return 1
def get_arg_indices_of_inputs_to_log(node: Node) -> List[int]:
"""
Returns the indices of args of the node which we should attach
loggers to, if input logging is enabled.
For example,
* for (x + y), returns [0, 1]
* for (1 + y), returns [1]
* for (x + 1), returns [0]
* for (linear(x, w, b)) returns [0]
* by default, returns [0]
"""
if len(node.args) == 0:
return []
if node.op == "call_function" and (
# TODO(future PR): use relationship map instead of hardcoding
node.target in (torch.add, torch.ops.quantized.add, operator.add)
or node.target in (torch.mul, torch.ops.quantized.mul, operator.mul)
):
result = []
for i in range(2):
if type(node.args[i]) == Node:
result.append(i)
return result
return [0]
def get_target_type_str(node: Node, gm: GraphModule) -> str:
"""
Returns a string representation of the type of the function or module
pointed to by this node, or '' for other node types.
"""
target_type = ""
if node.op in ("call_function", "call_method"):
target_type = torch.typename(node.target)
elif node.op == "call_module":
assert isinstance(node.target, str)
target_mod = getattr_from_fqn(gm, node.target)
target_type = torch.typename(target_mod)
return target_type
def rekey_logger_info_on_node_name_of_model(
results: NSResultsType,
model_name: str,
) -> NSResultsType:
"""
Rekeys the layer name of a results dictionary to use node names
from `model_name`.
For example, transforms
{'base_op_1_0': {'node_output': {'model_a':
[{'ref_node_name': 'linear1', ...}]}}}
into
{'linear1': {'node_output': {'model_a':
[{'ref_node_name': 'linear1', ...}]}}}
Note: we cannot use these node names directly because they are not
guaranteed to be consistent across models. This is why we extract
the results first and rekey afterwards.
"""
new_results = {}
for old_layer_name, result_type_to_results in results.items():
new_layer_name = None
for model_name_to_results in result_type_to_results.values():
for cur_model_name, list_of_results in model_name_to_results.items():
if cur_model_name == model_name:
assert len(list_of_results)
new_layer_name = list_of_results[0]["ref_node_name"]
else:
continue
if new_layer_name is not None:
new_results[new_layer_name] = result_type_to_results
else:
new_results[old_layer_name] = result_type_to_results
return new_results
def maybe_add_missing_fqns(results: NSResultsType) -> None:
"""
If `fqn` entries are filled in for one of the models in `results`, copies
them over to any models which do not have them filled out.
A common use case benefitting from this is comparing a model prepared by
quantization to a quantized model. In this case, the model prepared by
quantization would have `fqn` entries, and the quantized model would not.
"""
# Check in the first result to find any model with fqn entries defined.
model_name_with_fqns = None
for result_type_to_results in results.values():
for model_name_to_results in result_type_to_results.values():
for model_name, model_results in model_name_to_results.items():
if len(model_results) > 0:
if model_results[0]["fqn"] is not None:
model_name_with_fqns = model_name
break
break
break
if model_name_with_fqns:
for result_type_to_results in results.values():
for model_name_to_results in result_type_to_results.values():
ref_model_results = model_name_to_results[model_name_with_fqns]
for model_name, model_results in model_name_to_results.items():
if model_name == model_name_with_fqns:
continue
for i in range(len(model_results)):
fqn = ref_model_results[i]["fqn"]
model_results[i]["fqn"] = fqn
def maybe_dequantize_first_two_tensor_args_and_handle_tuples(f):
def inner(*args, **kwargs):
a0, a1, *a_other = args
if (isinstance(a0, tuple) and isinstance(a1, tuple)) or (
isinstance(a0, list) and isinstance(a1, list)
):
results = []
for el0, el1 in zip(a0, a1):
new_args = (el0, el1, *a_other)
results.append(inner(*new_args, **kwargs))
return results
elif isinstance(a0, torch.Tensor) and isinstance(a1, torch.Tensor):
if a0.is_quantized:
a0 = a0.dequantize()
if a1.is_quantized:
a1 = a1.dequantize()
# for the purposes of this util, only handle floats
if a0.dtype != torch.float or a1.dtype != torch.float:
return None
new_args = (a0, a1, *a_other)
return f(*new_args, **kwargs)
return inner
@maybe_dequantize_first_two_tensor_args_and_handle_tuples
def compute_sqnr(x: torch.Tensor, y: torch.Tensor) -> torch.Tensor:
"""
Computes the SQNR between `x` and `y`.
Args:
x: Tensor or tuple of tensors
y: Tensor or tuple of tensors
Return:
float or tuple of floats
"""
Ps = torch.norm(x)
Pn = torch.norm(x - y)
return 20 * torch.log10(Ps / Pn)
@maybe_dequantize_first_two_tensor_args_and_handle_tuples
def compute_normalized_l2_error(x: torch.Tensor, y: torch.Tensor) -> torch.Tensor:
"""
Computes the normalized L2 error between `x` and `y`.
Args:
x: Tensor or tuple of tensors
y: Tensor or tuple of tensors
Return:
float or tuple of floats
"""
return torch.sqrt(((x - y) ** 2).sum() / (x ** 2).sum())
@maybe_dequantize_first_two_tensor_args_and_handle_tuples
def compute_cosine_similarity(x: torch.Tensor, y: torch.Tensor) -> torch.Tensor:
"""
Computes the cosine similarity between `x` and `y`.
Args:
x: Tensor or tuple of tensors
y: Tensor or tuple of tensors
Return:
float or tuple of floats
"""
# For convolutions, the shape of the quantized weight has one additional
# dimension compared to the shape of the fp32 weight. Match the shapes
# to enable cosine similarity comparison.
x = x.reshape(1, -1)
y = y.reshape(1, -1)
return torch.nn.functional.cosine_similarity(x, y)
def op_type_supports_shadowing(node: Node) -> bool:
if node.op == 'call_function':
if node.target in (torch.add, torch.mul, operator.add, operator.mul, torch.cat, torch.stack):
# shadowing for ops with multiple tensor inputs is not implemented yet
return False
return True
def get_normalized_nth_input(node: Node, gm: GraphModule, idx: int) -> Node:
"""
Given a node, gets the n'th input to that node, normalizing
args and kwargs to the best of its ability.
"""
try:
norm_args_and_kwargs = node.normalized_arguments(
gm, normalize_to_only_use_kwargs=True)
if norm_args_and_kwargs is not None:
norm_args, norm_kwargs = norm_args_and_kwargs
assert len(norm_args) + len(norm_kwargs) > idx
if idx < len(norm_args):
return norm_args[idx]
else:
# note: in Python 3.7+ dicts are ordered
return list(norm_kwargs.values())[idx]
else:
assert len(node.args) + len(node.kwargs) > idx
if idx < len(node.args):
return node.args[idx] # type: ignore[return-value]
else:
kwargs_idx = idx + len(node.args)
return list(node.kwargs.values())[kwargs_idx] # type: ignore[return-value]
except RuntimeError:
# this RuntimeError happens when node argument normalization
# requires typehints to proceed, such as for torch.add where
# either the first, second or both arguments could be tensors
assert len(node.args) + len(node.kwargs) > idx
if idx < len(node.args):
return node.args[idx] # type: ignore[return-value]
else:
kwargs_idx = idx + len(node.args)
return list(node.kwargs.values())[kwargs_idx] # type: ignore[return-value]