camel/models/transformer/encoders.py

from torch.nn import functional as F
from models.transformer.utils import sinusoid_encoding_table, PositionWiseFeedForward
import torch
from torch import nn
from models.transformer.attention import MultiHeadAttention


class EncoderLayer(nn.Module):
    def __init__(self, d_model=512, d_k=64, d_v=64, h=8, d_ff=2048, dropout=.1, identity_map_reordering=False,
                 attention_module=None, attention_module_kwargs=None):
        super(EncoderLayer, self).__init__()
        self.identity_map_reordering = identity_map_reordering
        self.mhatt = MultiHeadAttention(d_model, d_k, d_v, h, dropout, identity_map_reordering=identity_map_reordering,
                                        attention_module=attention_module,
                                        attention_module_kwargs=attention_module_kwargs)
        self.pwff = PositionWiseFeedForward(d_model, d_ff, dropout, identity_map_reordering=identity_map_reordering)

    def forward(self, queries, keys, values, attention_mask=None, attention_weights=None):
        att = self.mhatt(queries, keys, values, attention_mask, attention_weights)
        ff = self.pwff(att)
        return ff


class Encoder(nn.Module):
    def __init__(self, N, max_len, d_in, d_model=512, d_k=64, d_v=64, h=8, d_ff=2048, dropout=.1,
                 identity_map_reordering=False, attention_module=None, attention_module_kwargs=None,
                 with_pe=False, with_mesh=False):
        super(Encoder, self).__init__()
        self.d_in = d_in
        self.d_model = d_model
        self.dropout = dropout
        self.layers = nn.ModuleList([EncoderLayer(d_model, d_k, d_v, h, d_ff, dropout,
                                                  identity_map_reordering=identity_map_reordering,
                                                  attention_module=attention_module,
                                                  attention_module_kwargs=attention_module_kwargs)
                                     for _ in range(N)])
        self.pos_emb = nn.Embedding.from_pretrained(sinusoid_encoding_table(max_len + 1, self.d_in, 0), freeze=True)
        self.fc = nn.Linear(d_in, self.d_model)
        self.dropout = nn.Dropout(p=self.dropout)
        self.layer_norm = nn.LayerNorm(self.d_model)
        self.with_pe = with_pe
        self.with_mesh = with_mesh

    def forward(self, input):
        # input (b_s, seq_len, d_in)
        b_s, seq_len = input.shape[:2]
        seq = torch.arange(1, seq_len + 1, device=input.device).view(1, -1).expand(b_s, -1)  # (b_s, seq_len)

        out = input
        if self.with_pe:
            out = out + self.pos_emb(seq)
        out = F.relu(self.fc(out))
        out = self.dropout(out)
        out = self.layer_norm(out)
        outs = list()
        for l in self.layers:
            out = l(out, out, out)
            if self.with_mesh:
                outs.append(out.unsqueeze(1))

        if self.with_mesh:
            outs = torch.cat(outs, 1)
            return outs, None
        return out, None
init the operator. Signed-off-by: wxywb <xy.wang@zilliz.com> 3 years ago			`from torch.nn import functional as F`
			`from models.transformer.utils import sinusoid_encoding_table, PositionWiseFeedForward`
			`import torch`
			`from torch import nn`
			`from models.transformer.attention import MultiHeadAttention`


			`class EncoderLayer(nn.Module):`
			`def __init__(self, d_model=512, d_k=64, d_v=64, h=8, d_ff=2048, dropout=.1, identity_map_reordering=False,`
			`attention_module=None, attention_module_kwargs=None):`
			`super(EncoderLayer, self).__init__()`
			`self.identity_map_reordering = identity_map_reordering`
			`self.mhatt = MultiHeadAttention(d_model, d_k, d_v, h, dropout, identity_map_reordering=identity_map_reordering,`
			`attention_module=attention_module,`
			`attention_module_kwargs=attention_module_kwargs)`
			`self.pwff = PositionWiseFeedForward(d_model, d_ff, dropout, identity_map_reordering=identity_map_reordering)`

			`def forward(self, queries, keys, values, attention_mask=None, attention_weights=None):`
			`att = self.mhatt(queries, keys, values, attention_mask, attention_weights)`
			`ff = self.pwff(att)`
			`return ff`


			`class Encoder(nn.Module):`
			`def __init__(self, N, max_len, d_in, d_model=512, d_k=64, d_v=64, h=8, d_ff=2048, dropout=.1,`
			`identity_map_reordering=False, attention_module=None, attention_module_kwargs=None,`
			`with_pe=False, with_mesh=False):`
			`super(Encoder, self).__init__()`
			`self.d_in = d_in`
			`self.d_model = d_model`
			`self.dropout = dropout`
			`self.layers = nn.ModuleList([EncoderLayer(d_model, d_k, d_v, h, d_ff, dropout,`
			`identity_map_reordering=identity_map_reordering,`
			`attention_module=attention_module,`
			`attention_module_kwargs=attention_module_kwargs)`
			`for _ in range(N)])`
			`self.pos_emb = nn.Embedding.from_pretrained(sinusoid_encoding_table(max_len + 1, self.d_in, 0), freeze=True)`
			`self.fc = nn.Linear(d_in, self.d_model)`
			`self.dropout = nn.Dropout(p=self.dropout)`
			`self.layer_norm = nn.LayerNorm(self.d_model)`
			`self.with_pe = with_pe`
			`self.with_mesh = with_mesh`

			`def forward(self, input):`
			`# input (b_s, seq_len, d_in)`
			`b_s, seq_len = input.shape[:2]`
			`seq = torch.arange(1, seq_len + 1, device=input.device).view(1, -1).expand(b_s, -1) # (b_s, seq_len)`

			`out = input`
			`if self.with_pe:`
			`out = out + self.pos_emb(seq)`
			`out = F.relu(self.fc(out))`
			`out = self.dropout(out)`
			`out = self.layer_norm(out)`
			`outs = list()`
			`for l in self.layers:`
			`out = l(out, out, out)`
			`if self.with_mesh:`
			`outs.append(out.unsqueeze(1))`

			`if self.with_mesh:`
			`outs = torch.cat(outs, 1)`
			`return outs, None`
			`return out, None`