跳转至

Autograd 以及反向传播

要谈 autograd,就要先从创建 tensor 时的 requires_grad 关键词参数说起。

标量多元微分

import torch
t = torch.tensor([1,2,3],[4,5,6](1,2,3],[4,5,6.md){#c18a6725aa7a52e088012abac31ec9d9})
a = torch.tensor(1.0)
b = torch.tensor(2.0)
print(a.grad) # None
c = a + b
try:
    c.backward()
except RuntimeError as e:
    print(e)
# element 0 of tensors does not require grad and does not have a grad_fn
a = torch.tensor(1.0, requires_grad=True)
b = torch.tensor(2.0, requires_grad=True)
print(a.grad, b.grad)
# None None
c = a + b
print(c)
# tensor(3., grad_fn=<AddBackward0>)
c.backward()
# 也可以写作 torch.autograd.backward(c)
c, a, b
# (tensor(3., grad_fn=<AddBackward0>),
# tensor(1., requires_grad=True),
# tensor(2., requires_grad=True))
a.grad, b.grad
# (tensor(1.), tensor(1.))
a = torch.tensor(42.0, requires_grad=True)
b = torch.tensor(99.0, requires_grad=True)
c = a * b
c
# tensor(4158., grad_fn=<MulBackward0>)
c.backward()
a.grad, b.grad
# (tensor(99.), tensor(42.))
print(a.grad_fn) # None
print(a.is_leaf) # True
print(c.grad_fn, c.is_leaf) # <MulBackward0 object at 0x000001E264B841F0> False

一个更加复杂的例子

\[ f = a \times b + \frac{c \times d^2}{e} \]
a = torch.tensor(2., requires_grad=True)
b = torch.tensor(4., requires_grad=True)
c = torch.tensor(6., requires_grad=True)
d = torch.tensor(8., requires_grad=True)
e = torch.tensor(10., requires_grad=True)
print(a.grad) # None
g = a * b
h = c*d**2/e
f = g + h
print(g.is_leaf, g.grad_fn)
# False <MulBackward0 object at 0x000001E264C1F7F0>
f.backward()
for el in [a, b, c, d, e]:
    print(el.grad)
# tensor(4.)
# tensor(2.)
# tensor(6.4000)
# tensor(9.6000)
# tensor(-3.8400)
a.grad = None
g.grad, h.grad
d:\Users\Min\Anaconda3\envs\torch\lib\site-packages\torch\_tensor.py:1083: UserWarning: The .grad attribute of a Tensor that is not a leaf Tensor is being accessed. Its .grad attribute won't be populated during autograd.backward(). If you indeed want the .grad field to be populated for a non-leaf Tensor, use .retain_grad() on the non-leaf Tensor. If you access the non-leaf Tensor by mistake, make sure you access the leaf Tensor instead. See github.com/pytorch/pytorch/pull/30531 for more informations. (Triggered internally at  C:\cb\pytorch_1000000000000\work\build\aten\src\ATen/core/TensorBody.h:482.)
      return self._grad

(None, None)

矩阵的多元微分

a = torch.tensor([2.,4.,6.],[2.,3.,4.],[9.,7.,8.](2.,4.,6.],[2.,3.,4.],[9.,7.,8..md){#04f908901b9b27dc4b8bbd3173530d75}, requires_grad=True)
b = torch.tensor([4.,9.,2.],[7.,1.,1.],[6.,7.,3.](4.,9.,2.],[7.,1.,1.],[6.,7.,3..md){#b2ccb27378980d7f6ed3284c2cca1854}, requires_grad=True)
c = torch.tensor([6.,1.,3.],[2.,3.,2.],[4.,8.,6.](6.,1.,3.],[2.,3.,2.],[4.,8.,6..md){#664e405ce20e4972358027f97987c287}, requires_grad=True)
d = torch.tensor([2.,2.,4.],[4.,4.,6.],[3.,2.,8.](2.,2.,4.],[4.,4.,6.],[3.,2.,8..md){#2bad0c1be7bb6602951e5ccc74948539}, requires_grad=True)
e = torch.tensor([1.,3.,6.],[7.,1.,4.],[1.,4.,6.](1.,3.,6.],[7.,1.,4.],[1.,4.,6..md){#2db96565f3be38e7029fb9d0c3606b22}, requires_grad=True)
g = a @ b
h = c@d**2/e
f = g + h
grad_tensor = torch.ones_like(g)
g.backward(grad_tensor)
print(g, a.grad, b.grad)
tensor([[ 72.,  64.,  26.],
        [ 53.,  49.,  19.],
        [133., 144.,  49.]], grad_fn=<MmBackward0>) tensor([[15.,  9., 16.],
        [15.,  9., 16.],
        [15.,  9., 16.]]) tensor([[13., 13., 13.],
        [14., 14., 14.],
        [18., 18., 18.]])
g = a @ b
h = c@d**2/e
f = g + h

grad_tensor = torch.ones_like(g)
grad_tensor[1][1] = 2
g.backward(grad_tensor)
print(g, a.grad, b.grad)
tensor([[ 72.,  64.,  26.],
        [ 53.,  49.,  19.],
        [133., 144.,  49.]], grad_fn=<MmBackward0>) tensor([[30., 18., 32.],
        [39., 19., 39.],
        [30., 18., 32.]]) tensor([[26., 28., 26.],
        [28., 31., 28.],
        [36., 40., 36.]])
f
tensor([[139.0000,  81.3333,  80.0000],
        [ 63.5714, 113.0000,  86.0000],
        [331.0000, 186.0000, 171.6667]], grad_fn=<AddBackward0>)
g = a @ b
h = c@d**2/e
f = g + h

f.backward(torch.ones_like(f))
for el in [a, b, c, d, e]:
    print(el.grad)
tensor([[45., 27., 48.],
        [54., 28., 55.],
        [45., 27., 48.]])
tensor([[39., 41., 39.],
        [42., 45., 42.],
        [54., 58., 54.]])
tensor([[ 8.0000, 27.3333, 21.0000],
        [ 8.5714, 27.2857, 21.2857],
        [ 7.6667, 26.0000, 20.6667]])
tensor([[41.1429, 20.0000, 17.3333],
        [75.4286, 42.6667, 27.0000],
        [55.7143, 18.0000, 32.0000]])
tensor([[ -67.0000,   -5.7778,   -9.0000],
        [  -1.5102,  -64.0000,  -16.7500],
        [-198.0000,  -10.5000,  -20.4444]])
x = torch.tensor([1., 2., 3.], requires_grad=True)
y = x @ x
print(y)
y.backward()
print(x.grad)
tensor(14., grad_fn=<DotBackward0>)
tensor([2., 4., 6.])
y = x.T @ x
print(y)
y.backward()
print(x.grad)
tensor(14., grad_fn=<DotBackward0>)
tensor([ 4.,  8., 12.])


C:\Users\Min\AppData\Local\Temp\ipykernel_15588\3668660662.py:1: UserWarning: The use of `x.T` on tensors of dimension other than 2 to reverse their shape is deprecated and it will throw an error in a future release. Consider `x.mT` to transpose batches of matricesor `x.permute(*torch.arange(x.ndim - 1, -1, -1))` to reverse the dimensions of a tensor. (Triggered internally at  C:\cb\pytorch_1000000000000\work\aten\src\ATen\native\TensorShape.cpp:2985.)
  y = x.T @ x
import torch

x = torch.ones(5)  # input tensor
y = torch.zeros(3)  # expected output
w = torch.randn(5, 3, requires_grad=True)
b = torch.randn(3, requires_grad=True)
z = torch.matmul(x, w)+b
loss = torch.nn.functional.binary_cross_entropy_with_logits(z, y)

loss.backward()