516 lines
21 KiB
Python
516 lines
21 KiB
Python
|
"""
|
||
|
``torch.autograd`` provides classes and functions implementing automatic
|
||
|
differentiation of arbitrary scalar valued functions. It requires minimal
|
||
|
changes to the existing code - you only need to declare :class:`Tensor` s
|
||
|
for which gradients should be computed with the ``requires_grad=True`` keyword.
|
||
|
As of now, we only support autograd for floating point :class:`Tensor` types (
|
||
|
half, float, double and bfloat16) and complex :class:`Tensor` types (cfloat, cdouble).
|
||
|
"""
|
||
|
import warnings
|
||
|
from typing import Any, Callable, cast, List, Optional, Sequence, Tuple, Union
|
||
|
|
||
|
import torch
|
||
|
|
||
|
from torch.types import _size, _TensorOrTensors, _TensorOrTensorsOrGradEdge
|
||
|
from .. import _vmap_internals
|
||
|
from ..overrides import handle_torch_function, has_torch_function, is_tensor_like
|
||
|
from . import forward_ad, functional, graph
|
||
|
from .anomaly_mode import detect_anomaly, set_detect_anomaly
|
||
|
from .function import Function, NestedIOFunction
|
||
|
from .grad_mode import (
|
||
|
_force_original_view_tracking,
|
||
|
_unsafe_preserve_version_counter,
|
||
|
enable_grad,
|
||
|
inference_mode,
|
||
|
no_grad,
|
||
|
set_grad_enabled,
|
||
|
set_multithreading_enabled,
|
||
|
)
|
||
|
from .gradcheck import gradcheck, gradgradcheck
|
||
|
from .graph import _engine_run_backward
|
||
|
|
||
|
from .variable import Variable
|
||
|
|
||
|
__all__ = ["Variable", "Function", "backward", "grad_mode"]
|
||
|
|
||
|
_OptionalTensor = Optional[torch.Tensor]
|
||
|
_ShapeorNestedShape = Union[_size, Sequence[_size], torch.Tensor]
|
||
|
|
||
|
|
||
|
def _calculate_shape(
|
||
|
output: torch.Tensor, grad: torch.Tensor, is_grads_batched: bool
|
||
|
) -> Tuple[_ShapeorNestedShape, _ShapeorNestedShape]:
|
||
|
# is_same_size ensures that both tensors are either nested or non nested
|
||
|
# circular import
|
||
|
from torch.nested._internal.nested_tensor import NestedTensor
|
||
|
|
||
|
if output.is_nested and not isinstance(output, NestedTensor):
|
||
|
if is_grads_batched:
|
||
|
raise RuntimeError("Batched grads are not supported with Nested Tensor.")
|
||
|
out_shape = output._nested_tensor_size()
|
||
|
grad_shape = grad._nested_tensor_size()
|
||
|
|
||
|
return out_shape, grad_shape
|
||
|
|
||
|
reg_out_shape = output.shape
|
||
|
reg_grad_shape = grad.shape if not is_grads_batched else grad.shape[1:]
|
||
|
return reg_out_shape, reg_grad_shape
|
||
|
|
||
|
|
||
|
def _make_grads(
|
||
|
outputs: Sequence[torch.Tensor],
|
||
|
grads: Sequence[_OptionalTensor],
|
||
|
is_grads_batched: bool,
|
||
|
) -> Tuple[_OptionalTensor, ...]:
|
||
|
new_grads: List[_OptionalTensor] = []
|
||
|
for out, grad in zip(outputs, grads):
|
||
|
if isinstance(grad, torch.Tensor):
|
||
|
from torch.fx.experimental.symbolic_shapes import expect_true, sym_eq
|
||
|
|
||
|
first_grad = grad if not is_grads_batched else grad[0]
|
||
|
# TODO: We can remove this conditional once we uniformly use
|
||
|
# singleton int to represent jagged dimension, so that size() call
|
||
|
# on nested tensor works
|
||
|
if out.is_nested or first_grad.is_nested:
|
||
|
shape_matches = torch.is_same_size(out, first_grad)
|
||
|
else:
|
||
|
# We need to do a regular size check, without going through
|
||
|
# the operator, to be able to handle unbacked symints
|
||
|
# (expect_true ensures we can deal with unbacked)
|
||
|
shape_matches = expect_true(sym_eq(out.size(), first_grad.size()))
|
||
|
if not shape_matches:
|
||
|
out_shape, grad_shape = _calculate_shape(
|
||
|
out, first_grad, is_grads_batched
|
||
|
)
|
||
|
if is_grads_batched:
|
||
|
raise RuntimeError(
|
||
|
"If `is_grads_batched=True`, we interpret the first "
|
||
|
"dimension of each grad_output as the batch dimension. "
|
||
|
"The sizes of the remaining dimensions are expected to match "
|
||
|
"the shape of corresponding output, but a mismatch "
|
||
|
"was detected: grad_output["
|
||
|
+ str(grads.index(grad))
|
||
|
+ "] has a shape of "
|
||
|
+ str(grad_shape)
|
||
|
+ " and output["
|
||
|
+ str(outputs.index(out))
|
||
|
+ "] has a shape of "
|
||
|
+ str(out_shape)
|
||
|
+ ". "
|
||
|
"If you only want some tensors in `grad_output` to be considered "
|
||
|
"batched, consider using vmap."
|
||
|
)
|
||
|
else:
|
||
|
raise RuntimeError(
|
||
|
"Mismatch in shape: grad_output["
|
||
|
+ str(grads.index(grad))
|
||
|
+ "] has a shape of "
|
||
|
+ str(grad_shape)
|
||
|
+ " and output["
|
||
|
+ str(outputs.index(out))
|
||
|
+ "] has a shape of "
|
||
|
+ str(out_shape)
|
||
|
+ "."
|
||
|
)
|
||
|
if out.dtype.is_complex != grad.dtype.is_complex:
|
||
|
raise RuntimeError(
|
||
|
"For complex Tensors, both grad_output and output"
|
||
|
" are required to have the same dtype."
|
||
|
" Mismatch in dtype: grad_output["
|
||
|
+ str(grads.index(grad))
|
||
|
+ "] has a dtype of "
|
||
|
+ str(grad.dtype)
|
||
|
+ " and output["
|
||
|
+ str(outputs.index(out))
|
||
|
+ "] has a dtype of "
|
||
|
+ str(out.dtype)
|
||
|
+ "."
|
||
|
)
|
||
|
new_grads.append(grad)
|
||
|
elif grad is None:
|
||
|
if out.requires_grad:
|
||
|
if out.numel() != 1:
|
||
|
raise RuntimeError(
|
||
|
"grad can be implicitly created only for scalar outputs"
|
||
|
)
|
||
|
if not out.dtype.is_floating_point:
|
||
|
msg = (
|
||
|
"grad can be implicitly created only for real scalar outputs"
|
||
|
f" but got {out.dtype}"
|
||
|
)
|
||
|
raise RuntimeError(msg)
|
||
|
new_grads.append(
|
||
|
torch.ones_like(out, memory_format=torch.preserve_format)
|
||
|
)
|
||
|
else:
|
||
|
new_grads.append(None)
|
||
|
else:
|
||
|
raise TypeError(
|
||
|
"gradients can be either Tensors or None, but got "
|
||
|
+ type(grad).__name__
|
||
|
)
|
||
|
return tuple(new_grads)
|
||
|
|
||
|
|
||
|
def _tensor_or_tensors_to_tuple(
|
||
|
tensors: Optional[_TensorOrTensors], length: int
|
||
|
) -> Tuple[_OptionalTensor, ...]:
|
||
|
if tensors is None:
|
||
|
return (None,) * length
|
||
|
if isinstance(tensors, torch.Tensor):
|
||
|
return (tensors,)
|
||
|
return tuple(tensors)
|
||
|
|
||
|
|
||
|
def backward(
|
||
|
tensors: _TensorOrTensors,
|
||
|
grad_tensors: Optional[_TensorOrTensors] = None,
|
||
|
retain_graph: Optional[bool] = None,
|
||
|
create_graph: bool = False,
|
||
|
grad_variables: Optional[_TensorOrTensors] = None,
|
||
|
inputs: Optional[_TensorOrTensorsOrGradEdge] = None,
|
||
|
) -> None:
|
||
|
r"""Computes the sum of gradients of given tensors with respect to graph
|
||
|
leaves.
|
||
|
|
||
|
The graph is differentiated using the chain rule. If any of ``tensors``
|
||
|
are non-scalar (i.e. their data has more than one element) and require
|
||
|
gradient, then the Jacobian-vector product would be computed, in this
|
||
|
case the function additionally requires specifying ``grad_tensors``.
|
||
|
It should be a sequence of matching length, that contains the "vector"
|
||
|
in the Jacobian-vector product, usually the gradient of the differentiated
|
||
|
function w.r.t. corresponding tensors (``None`` is an acceptable value for
|
||
|
all tensors that don't need gradient tensors).
|
||
|
|
||
|
This function accumulates gradients in the leaves - you might need to zero
|
||
|
``.grad`` attributes or set them to ``None`` before calling it.
|
||
|
See :ref:`Default gradient layouts<default-grad-layouts>`
|
||
|
for details on the memory layout of accumulated gradients.
|
||
|
|
||
|
.. note::
|
||
|
Using this method with ``create_graph=True`` will create a reference cycle
|
||
|
between the parameter and its gradient which can cause a memory leak.
|
||
|
We recommend using ``autograd.grad`` when creating the graph to avoid this.
|
||
|
If you have to use this function, make sure to reset the ``.grad`` fields of your
|
||
|
parameters to ``None`` after use to break the cycle and avoid the leak.
|
||
|
|
||
|
.. note::
|
||
|
|
||
|
If you run any forward ops, create ``grad_tensors``, and/or call ``backward``
|
||
|
in a user-specified CUDA stream context, see
|
||
|
:ref:`Stream semantics of backward passes<bwd-cuda-stream-semantics>`.
|
||
|
|
||
|
.. note::
|
||
|
|
||
|
When ``inputs`` are provided and a given input is not a leaf,
|
||
|
the current implementation will call its grad_fn (even though it is not strictly needed to get this gradients).
|
||
|
It is an implementation detail on which the user should not rely.
|
||
|
See https://github.com/pytorch/pytorch/pull/60521#issuecomment-867061780 for more details.
|
||
|
|
||
|
Args:
|
||
|
tensors (Sequence[Tensor] or Tensor): Tensors of which the derivative will be
|
||
|
computed.
|
||
|
grad_tensors (Sequence[Tensor or None] or Tensor, optional): The "vector" in
|
||
|
the Jacobian-vector product, usually gradients w.r.t. each element of
|
||
|
corresponding tensors. None values can be specified for scalar Tensors or
|
||
|
ones that don't require grad. If a None value would be acceptable for all
|
||
|
grad_tensors, then this argument is optional.
|
||
|
retain_graph (bool, optional): If ``False``, the graph used to compute the grad
|
||
|
will be freed. Note that in nearly all cases setting this option to ``True``
|
||
|
is not needed and often can be worked around in a much more efficient
|
||
|
way. Defaults to the value of ``create_graph``.
|
||
|
create_graph (bool, optional): If ``True``, graph of the derivative will
|
||
|
be constructed, allowing to compute higher order derivative products.
|
||
|
Defaults to ``False``.
|
||
|
inputs (Sequence[Tensor] or Tensor or Sequence[GradientEdge], optional): Inputs w.r.t. which the gradient
|
||
|
be will accumulated into ``.grad``. All other Tensors will be ignored. If
|
||
|
not provided, the gradient is accumulated into all the leaf Tensors that
|
||
|
were used to compute the :attr:`tensors`.
|
||
|
"""
|
||
|
if torch._C._are_functorch_transforms_active():
|
||
|
raise RuntimeError(
|
||
|
"backward() called inside a functorch transform. This is not "
|
||
|
"supported, please use functorch.grad or functorch.vjp instead "
|
||
|
"or call backward() outside of functorch transforms."
|
||
|
)
|
||
|
|
||
|
if grad_variables is not None:
|
||
|
warnings.warn("'grad_variables' is deprecated. Use 'grad_tensors' instead.")
|
||
|
if grad_tensors is None:
|
||
|
grad_tensors = grad_variables
|
||
|
else:
|
||
|
raise RuntimeError(
|
||
|
"'grad_tensors' and 'grad_variables' (deprecated) "
|
||
|
"arguments both passed to backward(). Please only "
|
||
|
"use 'grad_tensors'."
|
||
|
)
|
||
|
if inputs is not None and len(inputs) == 0:
|
||
|
raise RuntimeError("'inputs' argument to backward() cannot be empty.")
|
||
|
|
||
|
tensors = (tensors,) if isinstance(tensors, torch.Tensor) else tuple(tensors)
|
||
|
inputs = (
|
||
|
(inputs,)
|
||
|
if isinstance(inputs, (torch.Tensor, graph.GradientEdge))
|
||
|
else tuple(inputs)
|
||
|
if inputs is not None
|
||
|
else tuple()
|
||
|
)
|
||
|
|
||
|
grad_tensors_ = _tensor_or_tensors_to_tuple(grad_tensors, len(tensors))
|
||
|
grad_tensors_ = _make_grads(tensors, grad_tensors_, is_grads_batched=False)
|
||
|
if retain_graph is None:
|
||
|
retain_graph = create_graph
|
||
|
|
||
|
# The reason we repeat the same comment below is that
|
||
|
# some Python versions print out the first line of a multi-line function
|
||
|
# calls in the traceback and some print out the last line
|
||
|
_engine_run_backward(
|
||
|
tensors,
|
||
|
grad_tensors_,
|
||
|
retain_graph,
|
||
|
create_graph,
|
||
|
inputs,
|
||
|
allow_unreachable=True,
|
||
|
accumulate_grad=True,
|
||
|
)
|
||
|
|
||
|
|
||
|
def grad(
|
||
|
outputs: _TensorOrTensors,
|
||
|
inputs: _TensorOrTensorsOrGradEdge,
|
||
|
grad_outputs: Optional[_TensorOrTensors] = None,
|
||
|
retain_graph: Optional[bool] = None,
|
||
|
create_graph: bool = False,
|
||
|
only_inputs: bool = True,
|
||
|
allow_unused: Optional[bool] = None,
|
||
|
is_grads_batched: bool = False,
|
||
|
materialize_grads: bool = False,
|
||
|
) -> Tuple[torch.Tensor, ...]:
|
||
|
r"""Computes and returns the sum of gradients of outputs with respect to
|
||
|
the inputs.
|
||
|
|
||
|
``grad_outputs`` should be a sequence of length matching ``output``
|
||
|
containing the "vector" in vector-Jacobian product, usually the pre-computed
|
||
|
gradients w.r.t. each of the outputs. If an output doesn't require_grad,
|
||
|
then the gradient can be ``None``).
|
||
|
|
||
|
.. note::
|
||
|
|
||
|
If you run any forward ops, create ``grad_outputs``, and/or call ``grad``
|
||
|
in a user-specified CUDA stream context, see
|
||
|
:ref:`Stream semantics of backward passes<bwd-cuda-stream-semantics>`.
|
||
|
|
||
|
.. note::
|
||
|
|
||
|
``only_inputs`` argument is deprecated and is ignored now (defaults to ``True``).
|
||
|
To accumulate gradient for other parts of the graph, please use
|
||
|
``torch.autograd.backward``.
|
||
|
|
||
|
Args:
|
||
|
outputs (sequence of Tensor): outputs of the differentiated function.
|
||
|
inputs (sequence of Tensor or GradientEdge): Inputs w.r.t. which the gradient will be
|
||
|
returned (and not accumulated into ``.grad``).
|
||
|
grad_outputs (sequence of Tensor): The "vector" in the vector-Jacobian product.
|
||
|
Usually gradients w.r.t. each output. None values can be specified for scalar
|
||
|
Tensors or ones that don't require grad. If a None value would be acceptable
|
||
|
for all grad_tensors, then this argument is optional. Default: None.
|
||
|
retain_graph (bool, optional): If ``False``, the graph used to compute the grad
|
||
|
will be freed. Note that in nearly all cases setting this option to ``True``
|
||
|
is not needed and often can be worked around in a much more efficient
|
||
|
way. Defaults to the value of ``create_graph``.
|
||
|
create_graph (bool, optional): If ``True``, graph of the derivative will
|
||
|
be constructed, allowing to compute higher order derivative products.
|
||
|
Default: ``False``.
|
||
|
allow_unused (Optional[bool], optional): If ``False``, specifying inputs
|
||
|
that were not used when computing outputs (and therefore their grad is
|
||
|
always zero) is an error. Defaults to the value of ``materialize_grads``.
|
||
|
is_grads_batched (bool, optional): If ``True``, the first dimension of each
|
||
|
tensor in ``grad_outputs`` will be interpreted as the batch dimension.
|
||
|
Instead of computing a single vector-Jacobian product, we compute a
|
||
|
batch of vector-Jacobian products for each "vector" in the batch.
|
||
|
We use the vmap prototype feature as the backend to vectorize calls
|
||
|
to the autograd engine so that this computation can be performed in a
|
||
|
single call. This should lead to performance improvements when compared
|
||
|
to manually looping and performing backward multiple times. Note that
|
||
|
due to this feature being experimental, there may be performance
|
||
|
cliffs. Please use ``torch._C._debug_only_display_vmap_fallback_warnings(True)``
|
||
|
to show any performance warnings and file an issue on github if warnings exist
|
||
|
for your use case. Defaults to ``False``.
|
||
|
materialize_grads (bool, optional): If ``True``, set the gradient for unused inputs
|
||
|
to zero instead of None. This is useful when computing higher-order derivatives.
|
||
|
If ``materialize_grads`` is ``True`` and ``allow_unused`` is ``False``, an error
|
||
|
will be raised. Defaults to ``False``.
|
||
|
|
||
|
"""
|
||
|
if materialize_grads and allow_unused is False:
|
||
|
raise ValueError(
|
||
|
"Expected allow_unused to be True or not passed when materialize_grads=True, "
|
||
|
"but got: allow_unused=False."
|
||
|
)
|
||
|
if allow_unused is None:
|
||
|
allow_unused = materialize_grads
|
||
|
t_outputs = cast(
|
||
|
Tuple[torch.Tensor, ...],
|
||
|
(outputs,) if is_tensor_like(outputs) else tuple(outputs),
|
||
|
)
|
||
|
if is_tensor_like(inputs) or isinstance(inputs, graph.GradientEdge):
|
||
|
inputs = cast(_TensorOrTensorsOrGradEdge, (inputs,))
|
||
|
else:
|
||
|
inputs = tuple(inputs)
|
||
|
t_inputs = tuple(i for i in inputs if is_tensor_like(i))
|
||
|
overridable_args = t_outputs + t_inputs
|
||
|
if has_torch_function(overridable_args):
|
||
|
return handle_torch_function(
|
||
|
grad,
|
||
|
overridable_args,
|
||
|
t_outputs,
|
||
|
inputs,
|
||
|
grad_outputs=grad_outputs,
|
||
|
retain_graph=retain_graph,
|
||
|
create_graph=create_graph,
|
||
|
only_inputs=only_inputs,
|
||
|
allow_unused=allow_unused,
|
||
|
is_grads_batched=is_grads_batched,
|
||
|
materialize_grads=materialize_grads,
|
||
|
)
|
||
|
|
||
|
if not only_inputs:
|
||
|
warnings.warn(
|
||
|
"only_inputs argument is deprecated and is ignored now "
|
||
|
"(defaults to True). To accumulate gradient for other "
|
||
|
"parts of the graph, please use torch.autograd.backward."
|
||
|
)
|
||
|
|
||
|
grad_outputs_ = _tensor_or_tensors_to_tuple(grad_outputs, len(t_outputs))
|
||
|
grad_outputs_ = _make_grads(
|
||
|
t_outputs, grad_outputs_, is_grads_batched=is_grads_batched
|
||
|
)
|
||
|
|
||
|
if retain_graph is None:
|
||
|
retain_graph = create_graph
|
||
|
|
||
|
# The reason we repeat the same comment several times below is because
|
||
|
# some Python versions print out the first line of multi-line function
|
||
|
# calls in the traceback and some print out the last line
|
||
|
if is_grads_batched:
|
||
|
|
||
|
def vjp(gO):
|
||
|
return _engine_run_backward(
|
||
|
t_outputs,
|
||
|
gO,
|
||
|
retain_graph,
|
||
|
create_graph,
|
||
|
inputs,
|
||
|
allow_unused,
|
||
|
accumulate_grad=False,
|
||
|
)
|
||
|
|
||
|
result = _vmap_internals._vmap(vjp, 0, 0, allow_none_pass_through=True)(
|
||
|
grad_outputs_
|
||
|
)
|
||
|
else:
|
||
|
result = _engine_run_backward(
|
||
|
t_outputs,
|
||
|
grad_outputs_,
|
||
|
retain_graph,
|
||
|
create_graph,
|
||
|
inputs,
|
||
|
allow_unused,
|
||
|
accumulate_grad=False,
|
||
|
)
|
||
|
if materialize_grads:
|
||
|
if any(
|
||
|
result[i] is None and not is_tensor_like(inputs[i])
|
||
|
for i in range(len(inputs))
|
||
|
):
|
||
|
raise RuntimeError(
|
||
|
"materialize_grads cannot be used when the given input is a GradientEdge"
|
||
|
)
|
||
|
result = tuple(
|
||
|
output
|
||
|
if output is not None
|
||
|
else torch.zeros_like(input, requires_grad=True)
|
||
|
for (output, input) in zip(result, inputs)
|
||
|
)
|
||
|
return result
|
||
|
|
||
|
|
||
|
# This function applies in case of gradient checkpointing for memory
|
||
|
# optimization. Currently, gradient checkpointing is supported only if the
|
||
|
# execution engine is invoked through torch.autograd.backward() and its
|
||
|
# inputs argument is not passed. It is not supported for torch.autograd.grad().
|
||
|
# This is because if inputs are specified, the gradient won't be calculated for
|
||
|
# anything else e.g. model parameters like weights, bias etc.
|
||
|
#
|
||
|
# This function returns whether the checkpointing is valid i.e. torch.autograd.backward
|
||
|
# or not i.e. torch.autograd.grad. The implementation works by maintaining a thread
|
||
|
# local variable in torch/csrc/autograd/engine.cpp which looks at the NodeTask
|
||
|
# in the stack and before a NodeTask is executed in evaluate_function, it
|
||
|
# checks for whether reentrant backwards is imperative or not.
|
||
|
# See https://github.com/pytorch/pytorch/pull/4594 for more discussion/context
|
||
|
def _is_checkpoint_valid():
|
||
|
return Variable._execution_engine.is_checkpoint_valid()
|
||
|
|
||
|
|
||
|
def variable(*args, **kwargs):
|
||
|
raise RuntimeError(
|
||
|
"torch.autograd.variable(...) is deprecated, use torch.tensor(...) instead"
|
||
|
)
|
||
|
|
||
|
|
||
|
# Monkey patching variable.Variable to fix FX codegen. FX generates a call by roughly doing
|
||
|
# f"{fn.__module__}.{fn.__name__}(...). This yields torch.autograd.variable.Variable(...) in the
|
||
|
# output of an FX graph. Unfortunately the module name torch.autograd.variable is shadowed by the
|
||
|
# deprecated function - variable(...).
|
||
|
variable.Variable = Variable # type: ignore[attr-defined]
|
||
|
|
||
|
if not torch._C._autograd_init():
|
||
|
raise RuntimeError("autograd initialization failed")
|
||
|
|
||
|
# Import all native method/classes
|
||
|
from torch._C._autograd import (
|
||
|
_add_metadata_json,
|
||
|
_disable_profiler,
|
||
|
_disable_profiler_legacy,
|
||
|
_enable_profiler,
|
||
|
_enable_profiler_legacy,
|
||
|
_enable_record_function,
|
||
|
_get_sequence_nr,
|
||
|
_kineto_step,
|
||
|
_KinetoEvent,
|
||
|
_pop_saved_tensors_default_hooks,
|
||
|
_prepare_profiler,
|
||
|
_profiler_enabled,
|
||
|
_ProfilerResult,
|
||
|
_push_saved_tensors_default_hooks,
|
||
|
_record_function_with_args_enter,
|
||
|
_record_function_with_args_exit,
|
||
|
_set_empty_test_observer,
|
||
|
_supported_activities,
|
||
|
DeviceType,
|
||
|
kineto_available,
|
||
|
ProfilerEvent,
|
||
|
SavedTensor,
|
||
|
)
|
||
|
|
||
|
from torch._C._profiler import ProfilerActivity, ProfilerConfig, ProfilerState
|
||
|
|
||
|
from . import profiler
|
||
|
|
||
|
|
||
|
def _register_py_tensor_class_for_device(device, cls):
|
||
|
if not isinstance(cls, type):
|
||
|
raise RuntimeError("cls isn't a typeinfo object")
|
||
|
torch._C._register_py_class_for_device(device, cls)
|
||
|
|
||
|
|
||
|
is_multithreading_enabled = torch._C._is_multithreading_enabled
|
||
|
torch._C._add_docstr(
|
||
|
is_multithreading_enabled, "Returns True if multithreading is currently enabled."
|
||
|
)
|
||
|
|
||
|
is_view_replay_enabled = torch._C._is_view_replay_enabled
|
||
|
torch._C._add_docstr(
|
||
|
is_view_replay_enabled, "Returns True if view-replay is currently enabled."
|
||
|
)
|