> 文章列表 > Transformer训练例子

Transformer训练例子

Transformer训练例子

import torch

import math

import torch.nn as nn

import torch.optim as optim

from torch.utils.data import Dataset, DataLoader

 # 定义模型

class Transformer(nn.Module):

    def __init__(self, input_size, output_size, hidden_size=512, num_layers=6, num_heads=8):

        super().__init__()

        self.embedding = nn.Embedding(input_size, hidden_size)

        self.pos_encoder = PositionalEncoding(hidden_size, 0.1)

        encoder_layer = nn.TransformerEncoderLayer(hidden_size, num_heads)

        self.transformer_encoder = nn.TransformerEncoder(encoder_layer, num_layers)

        self.decoder = nn.Linear(hidden_size, output_size)

    def forward(self, src):

        src = self.embedding(src)

        src = self.pos_encoder(src)

        output = self.transformer_encoder(src)

        output = self.decoder(output)

        return output

 # 定义数据

class MyDataset(Dataset):

    def __init__(self, data, labels):

        self.data = data

        self.labels = labels

    def __len__(self):

        return len(self.data)

    def __getitem__(self, index):

        return self.data[index], self.labels[index]

 # 定义位置编码层

class PositionalEncoding(nn.Module):

    def __init__(self, d_model, dropout=0.1, max_len=5000):

        super().__init__()

        self.dropout = nn.Dropout(p=dropout)

        pe = torch.zeros(max_len, d_model)

        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)

        div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model))

        pe[:, 0::2] = torch.sin(position * div_term)

        pe[:, 1::2] = torch.cos(position * div_term)

        pe = pe.unsqueeze(0).transpose(0, 1)

        self.register_buffer('pe', pe)

    def forward(self, x):

        x = x + self.pe[:x.size(0), :]

        return self.dropout(x)

# 进行训练

def train(model, train_loader, criterion, optimizer, scheduler=None, num_epochs=10):

    for epoch in range(num_epochs):

        for data, label in train_loader:

            data = data.to(device)

            label = label.to(device)

            optimizer.zero_grad()

            output = model(data)

            loss = criterion(output, label)

            loss.backward()

            optimizer.step()

        if scheduler is not None:

            scheduler.step()

        print(f'Epoch [{epoch + 1}/{num_epochs}], Loss: {loss.item():.4f}')

# 示例数据

data = [[1, 2, 3, 4, 5], [5, 4, 3, 2, 1], [2, 3, 1, 5, 4], [4, 2, 1, 3, 5]]

labels = [[1, 0, 0, 0], [0, 1, 0, 0], [0, 0, 1, 0], [0, 0, 0, 1]]

 # 转换为Tensor类型

data = torch.LongTensor(data)

labels = torch.LongTensor(labels)

 # 定义超参数

input_size = 6

output_size = 4

hidden_size = 512

num_layers = 6

num_heads = 8

batch_size = 2

num_epochs = 100

learning_rate = 0.01

device='cpu'

 # 创建模型和数据集

model = Transformer(input_size, output_size, hidden_size, num_layers, num_heads).to(device)

dataset = MyDataset(data, labels)

 # 定义数据加载器和损失函数

train_loader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

criterion = nn.CrossEntropyLoss()

 # 定义优化器和学习率调度器

optimizer = optim.Adam(model.parameters(), lr=learning_rate)

scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=1, gamma=0.95)

 # 开始训练

train(model, train_loader, criterion, optimizer, scheduler, num_epochs)