1402 words
7 minutes
Deep Learning: Từ cơ bản đến ứng dụng thực tế
Mục lục
Deep Learning: Từ cơ bản đến ứng dụng thực tế 🧠
Deep Learning là một nhánh của Machine Learning sử dụng các neural networks với nhiều lớp để học các biểu diễn phức tạp từ dữ liệu. Trong bài viết này, tôi sẽ giới thiệu từ những khái niệm cơ bản đến các ứng dụng thực tế.
1. Neural Networks cơ bản
Perceptron (Neuron đơn giản) với PyTorch
import torchimport torch.nn as nnimport torch.optim as optim
# Dữ liệu AND gateX = torch.tensor([[0,0],[0,1],[1,0],[1,1]], dtype=torch.float32)y = torch.tensor([[0],[0],[0],[1]], dtype=torch.float32)
class Perceptron(nn.Module): def __init__(self): super().__init__() self.linear = nn.Linear(2, 1) def forward(self, x): return torch.sigmoid(self.linear(x))
model = Perceptron()criterion = nn.BCELoss()optimizer = optim.SGD(model.parameters(), lr=0.1)
for epoch in range(100): optimizer.zero_grad() outputs = model(X) loss = criterion(outputs, y) loss.backward() optimizer.step()
print("Predictions:")with torch.no_grad(): preds = model(X) print(torch.round(preds).squeeze().numpy())
Multi-Layer Perceptron (MLP) với PyTorch
import torchimport torch.nn as nnimport torch.optim as optimfrom sklearn.datasets import make_classificationfrom sklearn.model_selection import train_test_split
# Dữ liệu mẫuX, y = make_classification(n_samples=1000, n_features=20, n_informative=10, n_redundant=5, random_state=42)X = torch.tensor(X, dtype=torch.float32)y = torch.tensor(y, dtype=torch.float32).unsqueeze(1)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
class MLP(nn.Module): def __init__(self, input_dim): super().__init__() self.net = nn.Sequential( nn.Linear(input_dim, 64), nn.ReLU(), nn.Linear(64, 32), nn.ReLU(), nn.Linear(32, 16), nn.ReLU(), nn.Linear(16, 1), nn.Sigmoid() ) def forward(self, x): return self.net(x)
model = MLP(X_train.shape[1])criterion = nn.BCELoss()optimizer = optim.Adam(model.parameters(), lr=0.001)
for epoch in range(50): optimizer.zero_grad() outputs = model(X_train) loss = criterion(outputs, y_train) loss.backward() optimizer.step()
with torch.no_grad(): test_outputs = model(X_test) acc = ((test_outputs > 0.5) == y_test).float().mean() print(f"Test Accuracy: {acc:.3f}")
2. Convolutional Neural Networks (CNN)
CNN cho Computer Vision
import torchimport torch.nn as nnimport torch.optim as optimfrom torchvision import datasets, transformsfrom torch.utils.data import DataLoader
# Chuẩn bị dữ liệu MNISTtransform = transforms.Compose([transforms.ToTensor()])train_dataset = datasets.MNIST(root='./data', train=True, download=True, transform=transform)test_dataset = datasets.MNIST(root='./data', train=False, download=True, transform=transform)train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)test_loader = DataLoader(test_dataset, batch_size=1000, shuffle=False)
class SimpleCNN(nn.Module): def __init__(self): super().__init__() self.conv = nn.Sequential( nn.Conv2d(1, 32, 3, 1), nn.ReLU(), nn.MaxPool2d(2), nn.Conv2d(32, 64, 3, 1), nn.ReLU(), nn.MaxPool2d(2) ) self.fc = nn.Sequential( nn.Flatten(), nn.Linear(64*5*5, 64), nn.ReLU(), nn.Dropout(0.5), nn.Linear(64, 10) ) def forward(self, x): x = self.conv(x) x = self.fc(x) return x
model = SimpleCNN()criterion = nn.CrossEntropyLoss()optimizer = optim.Adam(model.parameters(), lr=0.001)
# Huấn luyệnfor epoch in range(3): model.train() for images, labels in train_loader: optimizer.zero_grad() outputs = model(images) loss = criterion(outputs, labels) loss.backward() optimizer.step()
# Đánh giámodel.eval()correct = 0total = 0with torch.no_grad(): for images, labels in test_loader: outputs = model(images) _, predicted = torch.max(outputs, 1) total += labels.size(0) correct += (predicted == labels).sum().item()print(f"Test Accuracy: {correct/total:.3f}")
Transfer Learning với Pre-trained Models (VGG16)
import torchfrom torchvision import models, transformsfrom PIL import Image
# Load pre-trained VGG16vgg16 = models.vgg16(pretrained=True)vgg16.eval()
# Freeze base modelfor param in vgg16.parameters(): param.requires_grad = False
# Thay đổi classifier nếu muốn fine-tunevgg16.classifier[6] = torch.nn.Linear(4096, 10) # Ví dụ: 10 classes
# Dự đoán ảnhdef predict_image(img_path): preprocess = transforms.Compose([ transforms.Resize((224, 224)), transforms.ToTensor(), ]) img = Image.open(img_path).convert('RGB') img = preprocess(img).unsqueeze(0) with torch.no_grad(): output = vgg16(img) _, pred = torch.max(output, 1) return pred.item()
3. Recurrent Neural Networks (RNN)
LSTM cho Text Classification
import torchimport torch.nn as nnimport torch.optim as optimfrom torch.nn.utils.rnn import pad_sequence
# Giả lập dữ liệutexts = [ "Tôi thích machine learning", "Deep learning rất thú vị", "AI là tương lai của công nghệ", "Thuật toán rất quan trọng", "Neural networks rất phức tạp"]labels = [1, 1, 1, 0, 0]
# Tokenization đơn giảnvocab = {}def tokenize(text): return [vocab.setdefault(word, len(vocab)) for word in text.lower().split()]
sequences = [torch.tensor(tokenize(t)) for t in texts]padded = pad_sequence(sequences, batch_first=True)labels = torch.tensor(labels, dtype=torch.float32).unsqueeze(1)
class LSTMClassifier(nn.Module): def __init__(self, vocab_size, embed_dim=16, hidden_dim=32): super().__init__() self.embedding = nn.Embedding(vocab_size, embed_dim) self.lstm = nn.LSTM(embed_dim, hidden_dim, batch_first=True) self.fc = nn.Linear(hidden_dim, 1) self.sigmoid = nn.Sigmoid() def forward(self, x): x = self.embedding(x) _, (h, _) = self.lstm(x) out = self.fc(h[-1]) return self.sigmoid(out)
model = LSTMClassifier(len(vocab))criterion = nn.BCELoss()optimizer = optim.Adam(model.parameters(), lr=0.01)
for epoch in range(50): optimizer.zero_grad() outputs = model(padded) loss = criterion(outputs, labels) loss.backward() optimizer.step()
with torch.no_grad(): preds = model(padded) print("Predictions:", (preds > 0.5).int().squeeze().tolist())
4. Generative Adversarial Networks (GAN)
Simple GAN
import torchimport torch.nn as nnimport torch.optim as optim
# Generatorclass Generator(nn.Module): def __init__(self, latent_dim): super().__init__() self.model = nn.Sequential( nn.Linear(latent_dim, 256), nn.LeakyReLU(0.2), nn.BatchNorm1d(256), nn.Linear(256, 512), nn.LeakyReLU(0.2), nn.BatchNorm1d(512), nn.Linear(512, 1024), nn.LeakyReLU(0.2), nn.BatchNorm1d(1024), nn.Linear(1024, 784), # 28x28 = 784 nn.Tanh() )
def forward(self, x): return self.model(x)
# Discriminatorclass Discriminator(nn.Module): def __init__(self): super().__init__() self.model = nn.Sequential( nn.Linear(784, 512), nn.LeakyReLU(0.2), nn.Linear(512, 256), nn.LeakyReLU(0.2), nn.Linear(256, 128), nn.LeakyReLU(0.2), nn.Linear(128, 1), nn.Sigmoid() )
def forward(self, x): return self.model(x)
# Tạo modelslatent_dim = 100generator = Generator(latent_dim)discriminator = Discriminator()
# Compilecriterion = nn.BCELoss()optimizer_g = optim.Adam(generator.parameters(), lr=0.0002, betas=(0.5, 0.999))optimizer_d = optim.Adam(discriminator.parameters(), lr=0.0002, betas=(0.5, 0.999))
# GANdiscriminator.trainable = Falsegan_input = torch.randn(1, latent_dim)gan_output = discriminator(generator(gan_input))gan = torch.nn.Model(gan_input, gan_output)gan.compile(optimizer=optimizer_g, loss=criterion)
print("GAN Architecture:")print(gan)
5. Autoencoders
Variational Autoencoder (VAE)
import torchimport torch.nn as nnimport torch.optim as optim
# Encoderclass Encoder(nn.Module): def __init__(self, latent_dim): super().__init__() self.model = nn.Sequential( nn.Linear(784, 512), nn.ReLU(), nn.Linear(512, 256), nn.ReLU() ) self.mu = nn.Linear(256, latent_dim) self.log_var = nn.Linear(256, latent_dim)
def forward(self, x): x = x.view(x.size(0), -1) # Flatten x = self.model(x) mu = self.mu(x) log_var = self.log_var(x) return mu, log_var
# Sampling layerdef sampling(mu, log_var): std = torch.exp(0.5 * log_var) eps = torch.randn_like(std) return mu + eps * std
# Decoderclass Decoder(nn.Module): def __init__(self, latent_dim): super().__init__() self.model = nn.Sequential( nn.Linear(latent_dim, 256), nn.ReLU(), nn.Linear(256, 512), nn.ReLU(), nn.Linear(512, 784), # 28x28 = 784 nn.Sigmoid() )
def forward(self, z): return self.model(z)
# Build VAElatent_dim = 2encoder = Encoder(latent_dim)decoder = Decoder(latent_dim)
# VAE modelinputs = torch.randn(1, 784) # Example inputmu, log_var = encoder(inputs)z = sampling(mu, log_var)outputs = decoder(z)
vae = nn.Model(inputs, outputs)
# Loss functionreconstruction_loss = nn.MSELoss()(inputs, outputs)kl_loss = -0.5 * torch.sum(1 + log_var - mu.pow(2) - log_var.exp())vae_loss = reconstruction_loss + kl_loss
vae.add_loss(vae_loss)vae.compile(optimizer=optimizer_g)
print("VAE Architecture:")print(vae)
6. Attention Mechanisms
Self-Attention Implementation
import torch
def scaled_dot_product_attention(query, key, value, mask=None): # Tính attention scores matmul_qk = torch.matmul(query, key.transpose(-2, -1))
# Scale dk = query.size(-1) scaled_attention_logits = matmul_qk / torch.sqrt(torch.tensor(dk, dtype=torch.float32))
# Apply mask nếu có if mask is not None: scaled_attention_logits += (mask * -1e9)
# Softmax attention_weights = torch.softmax(scaled_attention_logits, dim=-1)
# Apply attention weights output = torch.matmul(attention_weights, value)
return output, attention_weights
# Multi-head attentiondef multi_head_attention(d_model, num_heads): def attention(inputs): batch_size = inputs.size(0) seq_len = inputs.size(1)
# Linear transformations query = torch.nn.Linear(d_model)(inputs) key = torch.nn.Linear(d_model)(inputs) value = torch.nn.Linear(d_model)(inputs)
# Reshape for multi-head query = query.view(batch_size, seq_len, num_heads, d_model // num_heads) key = key.view(batch_size, seq_len, num_heads, d_model // num_heads) value = value.view(batch_size, seq_len, num_heads, d_model // num_heads)
# Transpose for attention query = query.transpose(1, 2) key = key.transpose(1, 2) value = value.transpose(1, 2)
# Apply attention attention_output, attention_weights = scaled_dot_product_attention( query, key, value)
# Reshape back attention_output = attention_output.transpose(1, 2) attention_output = attention_output.reshape(batch_size, seq_len, d_model)
return attention_output, attention_weights
return attention
# Testd_model = 512num_heads = 8attention_layer = multi_head_attention(d_model, num_heads) # có thể sử dụng nn.MultiheadAttention module
# Input shape: (batch_size, seq_len, d_model)test_input = torch.randn(1, 10, d_model)output, weights = attention_layer(test_input)
print(f"Input shape: {test_input.shape}")print(f"Output shape: {output.shape}")print(f"Attention weights shape: {weights.shape}")
7. Ứng dụng thực tế
Image Classification với ResNet
import torchimport torchvision.models as modelsimport torchvision.transforms as transformsfrom PIL import Image
# Load pre-trained ResNet50model = models.resnet50(pretrained=True)model.eval()
# Hàm dự đoán ảnhdef predict_image(img_path): preprocess = transforms.Compose([ transforms.Resize((224, 224)), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) img = Image.open(img_path).convert('RGB') img = preprocess(img).unsqueeze(0) with torch.no_grad(): output = model(img) _, pred = torch.max(output, 1) return pred.item()
# Ví dụ sử dụng (cần có ảnh thực tế)# results = predict_image('path/to/image.jpg')# print(f"Predicted class: {results}")
Text Generation với GPT-style
import torchimport torch.nn as nnimport torch.optim as optimfrom transformers import GPT2LMHeadModel, GPT2Tokenizer
# tải pre-trained modeltokenizer = GPT2Tokenizer.from_pretrained('gpt2')model = GPT2LMHeadModel.from_pretrained('gpt2')
def generate_text(prompt, max_length=100): # Encode input inputs = tokenizer.encode(prompt, return_tensors='pt')
# Generate outputs = model.generate(inputs, max_length=max_length, num_return_sequences=1, temperature=0.7, do_sample=True)
# Decode generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True) return generated_text
# Testprompt = "Machine learning is"generated = generate_text(prompt)print(generated)
Kết luận
Deep Learning đã mở ra những khả năng mới trong AI. Để thành công trong lĩnh vực này:
1. Nền tảng vững chắc
- Toán học: Linear Algebra, Calculus, Statistics
- Lập trình: Python, TensorFlow/PyTorch
- Kiến thức ML cơ bản
2. Tools và Frameworks
- TensorFlow/PyTorch: Deep learning
- Transformers: NLP
- OpenCV: Computer vision
- JAX: Research
Deep Learning: Từ cơ bản đến ứng dụng thực tế
https://nguyen-blog.vercel.app/posts/deep-learning-intro/