Context-manager that enable anomaly detection for the autograd engine.
Source:R/autograd.R
with_detect_anomaly.Rd
This does two things:
Details
Running the forward pass with detection enabled will allow the backward pass to print the traceback of the forward operation that created the failing backward function.
Any backward computation that generate "nan" value will raise an error.
Warning
This mode should be enabled only for debugging as the different tests will slow down your program execution.
Examples
if (torch_is_installed()) {
x <- torch_randn(2, requires_grad = TRUE)
y <- torch_randn(1)
b <- (x^y)$sum()
y$add_(1)
try({
b$backward()
with_detect_anomaly({
b$backward()
})
})
}
#> Error in (function (self, inputs, gradient, retain_graph, create_graph) :
#> one of the variables needed for gradient computation has been modified by an inplace operation: [CPUFloatType [1]] is at version 1; expected version 0 instead. Hint: enable anomaly detection to find the operation that failed to compute its gradient, with torch.autograd.set_detect_anomaly(True).
#> Exception raised from unpack at /Users/runner/work/libtorch-mac-m1/libtorch-mac-m1/pytorch/torch/csrc/autograd/saved_variable.cpp:187 (most recent call first):
#> frame #0: std::__1::shared_ptr<c10::(anonymous namespace)::PyTorchStyleBacktrace> std::__1::make_shared[abi:ue170006]<c10::(anonymous namespace)::PyTorchStyleBacktrace, c10::SourceLocation&, void>(c10::SourceLocation&) + 121 (0x108ecb639 in libc10.dylib)
#> frame #1: c10::Error::Error(c10::SourceLocation, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char>>) + 54 (0x108ecb776 in libc10.dylib)
#> frame #2: c10::detail::torchCheckFail(char const*, char const*, unsigned int, std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char>> const&) + 149 (0x108ec8035 in libc10.dylib)
#> frame #3: torch::autograd::SavedVariable::unpack(std::__1::shared_ptr<torch::autograd::Node>) const + 1987 (0x11fcd26f3 in libtorch_cpu.dylib)
#> frame #4: torch::autograd::generated::PowBackward1::apply(std::__1::vector<at::Tensor, std::__1::allocator<at::Tensor>>&&) + 84 (0x11eb491a4 in libtorch_cpu.dylib)
#> frame #5: torch::autograd::Node::operator()(std::__1::vector<at::Tensor, std::__1::allocator<at::Tensor>>&&) + 104 (0x11fc99108 in libtorch_cpu.dylib)
#> frame #6: torch::autograd::Engine::evaluate_function(std::__1::shared_ptr<torch::autograd::GraphTask>&, torch::autograd::Node*, torch::autograd::InputBuffer&, std::__1::shared_ptr<torch::autograd::ReadyQueue> const&) + 3039 (0x11fc91bdf in libtorch_cpu.dylib)
#> frame #7: torch::autograd::Engine::thread_main(std::__1::shared_ptr<torch::autograd::GraphTask> const&) + 1140 (0x11fc909d4 in libtorch_cpu.dylib)
#> frame #8: torch::autograd::Engine::execute_with_graph_task(std::__1::shared_ptr<torch::autograd::GraphTask> const&, std::__1::shared_ptr<torch::autograd::Node>, torch::autograd::InputBuffer&&) + 415 (0x11fc9855f in libtorch_cpu.dylib)
#> frame #9: torch::autograd::Engine::execute(std::__1::vector<torch::autograd::Edge, std::__1::allocator<torch::autograd::Edge>> const&, std::__1::vector<at::Tensor, std::__1::allocator<at::Tensor>> const&, bool, bool, bool, std::__1::vector<torch::autograd::Edge, std::__1::allocator<torch::autograd::Edge>> const&) + 1786 (0x11fc96dda in libtorch_cpu.dylib)
#> frame #10: torch::autograd::run_backward(std::__1::vector<at::Tensor, std::__1::allocator<at::Tensor>> const&, std::__1::vector<at::Tensor, std::__1::allocator<at::Tensor>> const&, bool, bool, std::__1::vector<at::Tensor, std::__1::allocator<at::Tensor>> const&, bool, bool) + 982 (0x11fc7e186 in libtorch_cpu.dylib)
#> frame #11: torch::autograd::backward(std::__1::vector<at::Tensor, std::__1::allocator<at::Tensor>> const&, std::__1::vector<at::Tensor, std::__1::allocator<at::Tensor>> const&, std::__1::optional<bool>, bool, std::__1::vector<at::Tensor, std::__1::allocator<at::Tensor>> const&) + 107 (0x11fc7d68b in libtorch_cpu.dylib)
#> frame #12: torch::autograd::VariableHooks::_backward(at::Tensor const&, c10::ArrayRef<at::Tensor>, std::__1::optional<at::Tensor> const&, std::__1::optional<bool>, bool) const + 296 (0x11fcd7918 in libtorch_cpu.dylib)
#> frame #13: at::Tensor::_backward(c10::ArrayRef<at::Tensor>, std::__1::optional<at::Tensor> const&, std::__1::optional<bool>, bool) const + 73 (0x11bc9d419 in libtorch_cpu.dylib)
#> frame #14: _lantern_Tensor__backward_tensor_tensorlist_tensor_bool_bool + 211 (0x10cc9e4c3 in liblantern.dylib)
#> frame #15: std::__1::__function::__func<cpp_torch_method__backward_self_Tensor_inputs_TensorList(XPtrTorchTensor, XPtrTorchTensorList, XPtrTorchOptionalTensor, XPtrTorchoptional_bool, XPtrTorchbool)::$_2, std::__1::allocator<cpp_torch_method__backward_self_Tensor_inputs_TensorList(XPtrTorchTensor, XPtrTorchTensorList, XPtrTorchOptionalTensor, XPtrTorchoptional_bool, XPtrTorchbool)::$_2>, void ()>::operator()() + 54 (0x10b527836 in torchpkg.so)
#> frame #16: std::__1::packaged_task<void ()>::operator()() + 72 (0x10b525908 in torchpkg.so)
#> frame #17: EventLoop<void>::run() + 413 (0x10b52575d in torchpkg.so)
#> frame #18: void* std::__1::__thread_proxy[abi:v160006]<std::__1::tuple<std::__1::unique_ptr<std::__1::__thread_struct, std::__1::default_delete<std::__1::__thread_struct>>, ThreadPool<void>::ThreadPool(int)::'lambda'()>>(void*) + 50 (0x10b5254b2 in torchpkg.so)
#> frame #19: _pthread_start + 125 (0x7ff80e2a71d3 in libsystem_pthread.dylib)
#> frame #20: thread_start + 15 (0x7ff80e2a2bd3 in libsystem_pthread.dylib)
#>