pytorch API
torch
### Settings
is_tensor(obj)
set_default_dtype(d) # torch.float32
get_default_dtype()
set_default_tensor_type(t) # torch.FloatTensor , float32
set_printoptions() # threshold="nan" for print all
### Creations
''' shared parameters
@ dtype=None
@ device=None
@ layout=None : memory layout, eg. strided, sparse_coo
@ require_grad=False
'''
tensor(data) # always copy the data!
as_tensor(data) # no copy if device, dtype is the same.
from_numpy(ndarray) # no copy, share the same memory.
zeros(*size) # dtype is globally default as float32
zeros_like(input)
ones(*size)
ones_like(input)
arange(start=0, end, step=1)
linspace(start, end, steps=100)
logspace(start, end, steps=100)
eye(n)
empty(*size)
empty_like(input)
full(size, fill_value)
full_like(input, fill_value)
### Operations
cat(seq, dim=0) # concat in dim
stack(seq, dim=0) # stack in a new dim
chunk(tensor, chunks<int>, dim=0) # split into chunks
split(tensor, split_size, dim=0) # split by size
gather(input, dim, index<LongTensor>) # rearrange
'''
very tricky... Different from tensorflow.gather.
>>> t = torch.tensor([[1,2],[3,4]])
>>> torch.gather(t, 1, torch.tensor([[0,0],[1,0]]))
tensor([[ 1, 1],
[ 4, 3]])
'''
index_select(input, dim, indices<LongTensor>) # select indices from input along dim.
"""
only support indices to be a vector(1-dim tensor).
so also silly.
"""
masked_select(input, mask<ByteTensor>)
nonzero(input) # indices of non-zero input
"""very important and useful!"""
reshape(x, shape)
squeeze(x, dim=None) # default: remove all 1. INPLACE!
unsqueeze(x, dim)
t(x) # transpose(x, 0, 1)
transpose(x, d0, d1)
permute(x, (d0, d1, ...))
take(input, indices<LongTensor>) # indices is 1d, flattened indices version of index_select
unbind(x, dim=0) # inversion of cat
where(condition<ByteTensor>, x, y) # x if True else y
### random sampling
manual_seed(seed)
bernoulli(input) # out_i ~ B(input_i)
normal(mean, std) # mean&std can be a tensor
rand(*size)
rand_like(input)
randint(low=0, high, size)
randint_like(input, low=0, high, size)
randn(*size)
randn_like(input)
### Math
abs(x)
add(x, n) # tensor + number
div(x, n)
mul(x, n)
ceil(x)
floor(x)
clamp(x, min, max)
pow(x, n)
exp(x)
...
### reduction
argmax(x, dim=None, keepdim=False) # default is flatten
argmin(x, dim=None, keepdim=False)
cumprod(x, dim) # cumulative product
cumsum(x, dim)
dist(x, y, p=2) # p-norm
norm(x, p=2)
unique(x, sorted=False, return_inverse=False) # unique value
### comparison
eq(x, y) # -> element-wise ByteTensor
ne(x, y)
equal(x, y) # -> Trur/False
isfinite(x)
isinf(x)
isnan(x)
sort(x, dim=None, descending=False)
# others
cross(x, y, dim=-1)
diag(x, diagonal=0)
diagonal(x) # diag(x, 0)
# BLAS & LAPACK
matmul(t1, t2) # broadcastable
''' behavior
@ [M]*[M] = [M], [M,N]*[N,K]=[M,K]
@ [M]*[M,N] = [1,M]*[M,N] = [N]
@ [M,N]*[N] = [M]
@ [j,m,n]*[j,n,p] = [j,m,p]
@ [j,m,n]*[n] = [j,m]
@ [j,1,n,m]*[k,m,p] = [j,k,n,p]
@ [j,m,n]*[n,p] = [j,m,p]
'''
bmm(b1, b2) # [B,X,Y] * [B,Y,Z] = [B,X,Z]
mm(m1, m2) # [N,M] * [M,P] = [N,P]
dot(t1, t2)
eig(mat, eigenvectors=False)
inverse(x)
det(x)
torch.Tensor
default torch.Tensor is FloatTensor
class Tensor:
shape
device
dtype
layout # strided for dense, sparse_coo for coo
item() # tensor->scalar, only for one-element tensor
tolist() # tensor->list
abs()
abs_() # inplace, faster
clone()
contiguous()
cpu()
cuda()
to()
'''
to(dtype)
to(device)
'''
repeat(*size) # np.tile
'''
>>> x = torch.tensor([1, 2, 3])
>>> x.repeat(4, 2)
tensor([[ 1, 2, 3, 1, 2, 3],
[ 1, 2, 3, 1, 2, 3],
[ 1, 2, 3, 1, 2, 3],
[ 1, 2, 3, 1, 2, 3]])
'''
size()
type()
view(*size)
view_as(other)
torch.nn
### parameters
Parameters()
'''a kind of tensor able to be considered a module parameter'''
data # the tensor
requires_grad # bool
### Containers
Module() # base class for all nn modules.
add_module(name, module) # add child module
apply(fn) # apply function fn to all submodules recursively
''' net.apply(init_weight) '''
children() # iterator on children
modules() # ditto?
cpu(), cuda()
double(), float(), half(), ...
train() # train mode.
eval() # eval mode, stop Dropout and BatchNorm.
forward(*input) # define the computation
state_dict()
load_state_dict(dict)
parameters() # iterator on all Parameters
register_buffer(name, tensor) # add a persistent buffer
register_parameter(name, param) # add a parameter
type(dtype) # cast all paramters to dtype.
to() # device or dtype, inplace, always
zero_grad()
Sequential(*args)
args: *list, or OrderedDict.
# all inputs are aligned sequentially.
ModuleList(modules=None)
modules: list
# just a list, supporting append, extend.
ModuleDict(modules=None)
# just a dict
ParameterList(...)
ParameterDict(...)
### Convolutions
Conv1d(Fin, Fout, ks, stride=1, padding=0, dilation=1, groups=1, bias=True)
'''
It's a VALID cross-correlation in fact.
padding: int or (int, int)
L_out = floor((L_in+2*padding-ks)/stride + 1)
'''
Conv2d(...)
"""
only accept (N,C,H,W).
"""
ConvTransposed1d(Fin, Fout, ks, stride=1, padding=0, output_padding=0,...)
"""
accept (N, F_in, L_in).
L_out = (L_in - 1)*stride - 2*paddint + ks + output_padding
"""
### Pooling
MaxPool1d(ks, stride=None, padding=0, dilation=1, ...)
MaxUnpool1d(ks, stride=None, padding=0) # lost-vals are set to 0.
AvgPool1d(...)
LPPool1d(p, ks, stride=None)
AdaptiveMaxPool1d(output_size) # [N, L] -> [N, out]
AdaptiveAvgPool1d(output_size)
### Padding
ReflectionPad1d(padding)
ReplicationPad1d(padding)
...
### activation
ELU(alpha=1.0, inplace=False)
SELU(inplace=False)
ReLU(inplace=False)
ReLU6(inplace=False) # min(max(0,x),6)
LeakyReLU(negative_slope=0.01, inplace=False)
PReLU(num_parameters=1, init=0.25)
"""
x = x if x>=0 else Ax; A is learnable.
num_parameters: length of A, set to Fin.
init: initial value of A.
"""
RReLU(lover=0.125, upper=0.333, inplace=False)
"""
similar to PReLU, where A ~ U(lower,upper)
"""
Sigmoid()
Softplus(beta=1)
Softmax()
Tanh()
### Normalization
BatchNorm1d(num_features, eps=1e-5, momentum=0.1, affine=True, ...)
...
### RNN
### Linear
Linear(Fin, Fout, bias=True)
Bilinear(Fin1, Fin2, Fout, bias=True) # y = x_1 * A * x_2 + b
### dropout
Dropout(p=0.5, inplace=False)
Dropout2d(p=0.5, inplace=False)
### Loss
L1Loss(size_average=True, reduce=True, ...)
'''
l = mean({|x_n - y_n|})
loss(input, target), shape=(N,*)
'''
MSELoss(...)
'''
l = mean({(x_n - y_n)^2})
loss(input, target), shape=(N,*)
'''
CrossEntropyLoss()
'''
l(x,cls) = -x[class]+log(\sum_j exp(x[j]))
loss(input<N,nCls>, target<N>)
'''
NLLLoss()
'''
negative log likelihood.
l(x, y) = (\sum_n -w_yn*x_{n,yn})/\sum_n w_yn
logits = nn.LogSoftmax(x)
loss(logits<N,C>, target<N>)
equals to CrossEntropyLoss(x).
'''
BCELoss()
'''
l_n = -w_n[y_n * log x_n + (1-y_n)*log(1-x_n)]
l(input<N>,target<N>) = mean(l_n)
logits = nn.Sigmoid(x)
loss(logits, target)
eq. BCEWithLogitLoss(x)
'''
torch.nn.functional
# mostly correspond to layers.
interpolate(input, size=None, scale_factor=None, mode="nearest", align_corners=None)
torch.optim
optimizer = optim.SGD(model.parameters(), lr=..., momentum=...)
optimizer.zero_grad()
loss.backward()
optimizer.step()
...
lr_scheduler.LambdaLR(optimizer, lr_lambda, last_epoch=-1)
lr_scheduler.StepLR(optimizer, step_size, gamma=0.1, last_epoch=-1)
'''
>>> scheduler = StepLR(optimizer, step_size=30, gamma=0.1)
>>> for epoch in range(100):
>>> scheduler.step()
>>> train(...)
>>> validate(...)
'''
lr_scheduler.MultiStepLR(opt, milestones, gamma=0.1)
'''
scheduler = MultiStepLR(optimizer, milestones=[30,80], gamma=0.1)
'''
lr_scheduler.ExponentialLR(optimizer, gamma)
lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=10, verbose=False, threshold=0.0001, threshold_mode='rel', cooldown=0, min_lr=0, eps=1e-08)
'''
dynamically reduce lr when a metric has stopped improving.
mode: min means metric stops decreasing.
>>> optimizer = torch.optim.SGD(model.parameters(), lr=0.1, momentum=0.9)
>>> scheduler = ReduceLROnPlateau(optimizer, 'min')
>>> for epoch in range(10):
>>> train(...)
>>> val_loss = validate(...)
>>> # Note that step should be called after validate()
>>> scheduler.step(val_loss)
'''
torch.autograd
backward(tensors)
grad(outputs, inputs)
no_grad()
'''
reduce memory consumption, be sure not call backward().
1. with torch.no_grad():
2. @torch.no_grad()
'''