1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
|
- # Copyright (c) 2017-present, Facebook, Inc.
- # All rights reserved.
- #
- # This source code is licensed under the license found in the LICENSE file in
- # the root directory of this source tree. An additional grant of patent rights
- # can be found in the PATENTS file in the same directory.
- import math
- import torch
- from torch.autograd import Variable
- import torch.nn as nn
- from fairseq import utils
- class SinusoidalPositionalEmbedding(nn.Module):
- """This module produces sinusoidal positional embeddings of any length.
- Padding symbols are ignored, but it is necessary to specify whether padding
- is added on the left side (left_pad=True) or right side (left_pad=False).
- """
- def __init__(self, embedding_dim, padding_idx, left_pad, init_size=1024):
- super().__init__()
- self.embedding_dim = embedding_dim
- self.padding_idx = padding_idx
- self.left_pad = left_pad
- self.weights = SinusoidalPositionalEmbedding.get_embedding(
- init_size,
- embedding_dim,
- padding_idx,
- )
- self.register_buffer('_float_tensor', torch.FloatTensor())
- @staticmethod
- def get_embedding(num_embeddings, embedding_dim, padding_idx=None):
- """Build sinusoidal embeddings.
- This matches the implementation in tensor2tensor, but differs slightly
- from the description in Section 3.5 of "Attention Is All You Need".
- """
- half_dim = embedding_dim // 2
- emb = math.log(10000) / (half_dim - 1)
- emb = torch.exp(torch.arange(half_dim) * -emb)
- emb = torch.arange(num_embeddings).unsqueeze(1) * emb.unsqueeze(0)
- emb = torch.cat([torch.sin(emb), torch.cos(emb)], dim=1).view(num_embeddings, -1)
- if embedding_dim % 2 == 1:
- # zero pad
- emb = torch.cat([emb, torch.zeros(num_embeddings, 1)], dim=1)
- if padding_idx is not None:
- emb[padding_idx, :] = 0
- return emb
- def forward(self, input, incremental_state=None):
- """Input is expected to be of size [bsz x seqlen]."""
- # recompute/expand embeddings if needed
- bsz, seq_len = input.size()
- max_pos = self.padding_idx + 1 + seq_len
- if max_pos > self.weights.size(0):
- self.weights = SinusoidalPositionalEmbedding.get_embedding(
- max_pos,
- self.embedding_dim,
- self.padding_idx,
- ).type_as(self.weights)
- self.weights = self.weights.type_as(self._float_tensor)
- weights = Variable(self.weights)
- if incremental_state is not None:
- # positions is the same for every token when decoding a single step
- return weights[self.padding_idx + seq_len, :].expand(bsz, 1, -1)
- positions = Variable(utils.make_positions(input.data, self.padding_idx, self.left_pad))
- return weights.index_select(0, positions.view(-1)).view(bsz, seq_len, -1)
- def max_positions(self):
- """Maximum number of supported positions."""
- return int(1e5) # an arbitrary large number
|