Skip to contents

This does two things:

Usage

with_detect_anomaly(code)

Arguments

code

Code that will be executed in the detect anomaly context.

Details

  • Running the forward pass with detection enabled will allow the backward pass to print the traceback of the forward operation that created the failing backward function.

  • Any backward computation that generate "nan" value will raise an error.

Warning

This mode should be enabled only for debugging as the different tests will slow down your program execution.

Examples

if (torch_is_installed()) {
x <- torch_randn(2, requires_grad = TRUE)
y <- torch_randn(1)
b <- (x^y)$sum()
y$add_(1)

try({
  b$backward()

  with_detect_anomaly({
    b$backward()
  })
})
}
#> Error in (function (self, inputs, gradient, retain_graph, create_graph)  : 
#>   one of the variables needed for gradient computation has been modified by an inplace operation: [CPUFloatType [1]] is at version 1; expected version 0 instead. Hint: enable anomaly detection to find the operation that failed to compute its gradient, with torch.autograd.set_detect_anomaly(True).
#> Exception raised from unpack at /Users/runner/work/libtorch-mac-m1/libtorch-mac-m1/pytorch/torch/csrc/autograd/saved_variable.cpp:193 (most recent call first):
#> frame #0: std::__1::shared_ptr<c10::(anonymous namespace)::PyTorchStyleBacktrace> std::__1::make_shared[abi:ue170006]<c10::(anonymous namespace)::PyTorchStyleBacktrace, c10::SourceLocation&, void>(c10::SourceLocation&) + 121 (0x10dc95049 in libc10.dylib)
#> frame #1: c10::Error::Error(c10::SourceLocation, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char>>) + 54 (0x10dc95186 in libc10.dylib)
#> frame #2: c10::detail::torchCheckFail(char const*, char const*, unsigned int, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char>> const&) + 149 (0x10dc91a45 in libc10.dylib)
#> frame #3: torch::autograd::SavedVariable::unpack(std::__1::shared_ptr<torch::autograd::Node>) const + 2149 (0x1250409e5 in libtorch_cpu.dylib)
#> frame #4: torch::autograd::generated::PowBackward1::apply(std::__1::vector<at::Tensor, std::__1::allocator<at::Tensor>>&&) + 66 (0x123d6e8e2 in libtorch_cpu.dylib)
#> frame #5: torch::autograd::Node::operator()(std::__1::vector<at::Tensor, std::__1::allocator<at::Tensor>>&&) + 104 (0x125006a88 in libtorch_cpu.dylib)
#> frame #6: torch::autograd::Engine::evaluate_function(std::__1::shared_ptr<torch::autograd::GraphTask>&, torch::autograd::Node*, torch::autograd::InputBuffer&, std::__1::shared_ptr<torch::autograd::ReadyQueue> const&) + 2899 (0x124ffdff3 in libtorch_cpu.dylib)
#> frame #7: torch::autograd::Engine::thread_main(std::__1::shared_ptr<torch::autograd::GraphTask> const&) + 1140 (0x124ffce74 in libtorch_cpu.dylib)
#> frame #8: torch::autograd::Engine::execute_with_graph_task(std::__1::shared_ptr<torch::autograd::GraphTask> const&, std::__1::shared_ptr<torch::autograd::Node>, torch::autograd::InputBuffer&&) + 535 (0x125005a37 in libtorch_cpu.dylib)
#> frame #9: torch::autograd::Engine::execute(std::__1::vector<torch::autograd::Edge, std::__1::allocator<torch::autograd::Edge>> const&, std::__1::vector<at::Tensor, std::__1::allocator<at::Tensor>> const&, bool, bool, bool, std::__1::vector<torch::autograd::Edge, std::__1::allocator<torch::autograd::Edge>> const&) + 1834 (0x12500447a in libtorch_cpu.dylib)
#> frame #10: torch::autograd::run_backward(std::__1::vector<at::Tensor, std::__1::allocator<at::Tensor>> const&, std::__1::vector<at::Tensor, std::__1::allocator<at::Tensor>> const&, bool, bool, std::__1::vector<at::Tensor, std::__1::allocator<at::Tensor>> const&, bool, bool) + 982 (0x124fe8c06 in libtorch_cpu.dylib)
#> frame #11: torch::autograd::backward(std::__1::vector<at::Tensor, std::__1::allocator<at::Tensor>> const&, std::__1::vector<at::Tensor, std::__1::allocator<at::Tensor>> const&, std::__1::optional<bool>, bool, std::__1::vector<at::Tensor, std::__1::allocator<at::Tensor>> const&) + 107 (0x124fe810b in libtorch_cpu.dylib)
#> frame #12: torch::autograd::VariableHooks::_backward(at::Tensor const&, c10::ArrayRef<at::Tensor>, std::__1::optional<at::Tensor> const&, std::__1::optional<bool>, bool) const + 296 (0x125045c08 in libtorch_cpu.dylib)
#> frame #13: at::Tensor::_backward(c10::ArrayRef<at::Tensor>, std::__1::optional<at::Tensor> const&, std::__1::optional<bool>, bool) const + 73 (0x120a862d9 in libtorch_cpu.dylib)
#> frame #14: _lantern_Tensor__backward_tensor_tensorlist_tensor_bool_bool + 211 (0x110f7cfd3 in liblantern.dylib)
#> frame #15: std::__1::__function::__func<cpp_torch_method__backward_self_Tensor_inputs_TensorList(XPtrTorchTensor, XPtrTorchTensorList, XPtrTorchOptionalTensor, XPtrTorchoptional_bool, XPtrTorchbool)::$_2, std::__1::allocator<cpp_torch_method__backward_self_Tensor_inputs_TensorList(XPtrTorchTensor, XPtrTorchTensorList, XPtrTorchOptionalTensor, XPtrTorchoptional_bool, XPtrTorchbool)::$_2>, void ()>::operator()() + 54 (0x10f796ab6 in torchpkg.so)
#> frame #16: std::__1::packaged_task<void ()>::operator()() + 72 (0x10f794b88 in torchpkg.so)
#> frame #17: EventLoop<void>::run() + 413 (0x10f7949dd in torchpkg.so)
#> frame #18: void* std::__1::__thread_proxy[abi:v160006]<std::__1::tuple<std::__1::unique_ptr<std::__1::__thread_struct, std::__1::default_delete<std::__1::__thread_struct>>, ThreadPool<void>::ThreadPool(int)::'lambda'()>>(void*) + 50 (0x10f794732 in torchpkg.so)
#> frame #19: _pthread_start + 125 (0x7ff808ead1d3 in libsystem_pthread.dylib)
#> frame #20: thread_start + 15 (0x7ff808ea8bd3 in libsystem_pthread.dylib)
#>