Context-manager that enable anomaly detection for the autograd engine.
Source:R/autograd.R
with_detect_anomaly.Rd
This does two things:
Details
Running the forward pass with detection enabled will allow the backward pass to print the traceback of the forward operation that created the failing backward function.
Any backward computation that generate "nan" value will raise an error.
Warning
This mode should be enabled only for debugging as the different tests will slow down your program execution.
Examples
if (torch_is_installed()) {
x <- torch_randn(2, requires_grad = TRUE)
y <- torch_randn(1)
b <- (x^y)$sum()
y$add_(1)
try({
b$backward()
with_detect_anomaly({
b$backward()
})
})
}
#> Error in (function (self, inputs, gradient, retain_graph, create_graph) :
#> one of the variables needed for gradient computation has been modified by an inplace operation: [CPUFloatType [1]] is at version 1; expected version 0 instead. Hint: enable anomaly detection to find the operation that failed to compute its gradient, with torch.autograd.set_detect_anomaly(True).
#> Exception raised from unpack at /Users/runner/work/libtorch-mac-m1/libtorch-mac-m1/pytorch/torch/csrc/autograd/saved_variable.cpp:187 (most recent call first):
#> frame #0: std::__1::shared_ptr<c10::(anonymous namespace)::PyTorchStyleBacktrace> std::__1::make_shared[abi:ue170006]<c10::(anonymous namespace)::PyTorchStyleBacktrace, c10::SourceLocation&, void>(c10::SourceLocation&) + 121 (0x10e1cc639 in libc10.dylib)
#> frame #1: c10::Error::Error(c10::SourceLocation, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char>>) + 54 (0x10e1cc776 in libc10.dylib)
#> frame #2: c10::detail::torchCheckFail(char const*, char const*, unsigned int, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char>> const&) + 149 (0x10e1c9035 in libc10.dylib)
#> frame #3: torch::autograd::SavedVariable::unpack(std::__1::shared_ptr<torch::autograd::Node>) const + 1987 (0x12503e6f3 in libtorch_cpu.dylib)
#> frame #4: torch::autograd::generated::PowBackward1::apply(std::__1::vector<at::Tensor, std::__1::allocator<at::Tensor>>&&) + 84 (0x123eb51a4 in libtorch_cpu.dylib)
#> frame #5: torch::autograd::Node::operator()(std::__1::vector<at::Tensor, std::__1::allocator<at::Tensor>>&&) + 104 (0x125005108 in libtorch_cpu.dylib)
#> frame #6: torch::autograd::Engine::evaluate_function(std::__1::shared_ptr<torch::autograd::GraphTask>&, torch::autograd::Node*, torch::autograd::InputBuffer&, std::__1::shared_ptr<torch::autograd::ReadyQueue> const&) + 3039 (0x124ffdbdf in libtorch_cpu.dylib)
#> frame #7: torch::autograd::Engine::thread_main(std::__1::shared_ptr<torch::autograd::GraphTask> const&) + 1140 (0x124ffc9d4 in libtorch_cpu.dylib)
#> frame #8: torch::autograd::Engine::execute_with_graph_task(std::__1::shared_ptr<torch::autograd::GraphTask> const&, std::__1::shared_ptr<torch::autograd::Node>, torch::autograd::InputBuffer&&) + 415 (0x12500455f in libtorch_cpu.dylib)
#> frame #9: torch::autograd::Engine::execute(std::__1::vector<torch::autograd::Edge, std::__1::allocator<torch::autograd::Edge>> const&, std::__1::vector<at::Tensor, std::__1::allocator<at::Tensor>> const&, bool, bool, bool, std::__1::vector<torch::autograd::Edge, std::__1::allocator<torch::autograd::Edge>> const&) + 1786 (0x125002dda in libtorch_cpu.dylib)
#> frame #10: torch::autograd::run_backward(std::__1::vector<at::Tensor, std::__1::allocator<at::Tensor>> const&, std::__1::vector<at::Tensor, std::__1::allocator<at::Tensor>> const&, bool, bool, std::__1::vector<at::Tensor, std::__1::allocator<at::Tensor>> const&, bool, bool) + 982 (0x124fea186 in libtorch_cpu.dylib)
#> frame #11: torch::autograd::backward(std::__1::vector<at::Tensor, std::__1::allocator<at::Tensor>> const&, std::__1::vector<at::Tensor, std::__1::allocator<at::Tensor>> const&, std::__1::optional<bool>, bool, std::__1::vector<at::Tensor, std::__1::allocator<at::Tensor>> const&) + 107 (0x124fe968b in libtorch_cpu.dylib)
#> frame #12: torch::autograd::VariableHooks::_backward(at::Tensor const&, c10::ArrayRef<at::Tensor>, std::__1::optional<at::Tensor> const&, std::__1::optional<bool>, bool) const + 296 (0x125043918 in libtorch_cpu.dylib)
#> frame #13: at::Tensor::_backward(c10::ArrayRef<at::Tensor>, std::__1::optional<at::Tensor> const&, std::__1::optional<bool>, bool) const + 73 (0x121009419 in libtorch_cpu.dylib)
#> frame #14: _lantern_Tensor__backward_tensor_tensorlist_tensor_bool_bool + 211 (0x1120090f3 in liblantern.dylib)
#> frame #15: std::__1::__function::__func<cpp_torch_method__backward_self_Tensor_inputs_TensorList(XPtrTorchTensor, XPtrTorchTensorList, XPtrTorchOptionalTensor, XPtrTorchoptional_bool, XPtrTorchbool)::$_2, std::__1::allocator<cpp_torch_method__backward_self_Tensor_inputs_TensorList(XPtrTorchTensor, XPtrTorchTensorList, XPtrTorchOptionalTensor, XPtrTorchoptional_bool, XPtrTorchbool)::$_2>, void ()>::operator()() + 54 (0x110891d36 in torchpkg.so)
#> frame #16: std::__1::packaged_task<void ()>::operator()() + 72 (0x11088fe08 in torchpkg.so)
#> frame #17: EventLoop<void>::run() + 413 (0x11088fc5d in torchpkg.so)
#> frame #18: void* std::__1::__thread_proxy[abi:v160006]<std::__1::tuple<std::__1::unique_ptr<std::__1::__thread_struct, std::__1::default_delete<std::__1::__thread_struct>>, ThreadPool<void>::ThreadPool(int)::'lambda'()>>(void*) + 50 (0x11088f9b2 in torchpkg.so)
#> frame #19: _pthread_start + 125 (0x7ff8150b61d3 in libsystem_pthread.dylib)
#> frame #20: thread_start + 15 (0x7ff8150b1bd3 in libsystem_pthread.dylib)
#>