别再手动调焦了！用Python+PyTorch实现多聚焦图像自动融合（附完整代码与数据集）

张

张建站

2026/5/28 4:13:59

10分钟阅读

别再手动调焦了！用Python+PyTorch实现多聚焦图像自动融合（附完整代码与数据集）

用PythonPyTorch实现多聚焦图像自动融合的实战指南在摄影和计算机视觉领域获取一张所有区域都清晰的全聚焦图像一直是个挑战。传统方法需要摄影师手动调整焦点拍摄多张照片再通过后期处理软件进行合成整个过程耗时耗力。本文将带你用Python和PyTorch构建一个端到端的深度学习解决方案实现多聚焦图像的自动融合。1. 环境准备与数据加载多聚焦图像融合的第一步是搭建合适的开发环境。我们推荐使用Python 3.8和PyTorch 1.10的组合它们提供了良好的兼容性和性能。基础环境配置conda create -n mfif python3.8 conda activate mfif pip install torch torchvision torchaudio pip install opencv-python numpy matplotlib对于数据集Lytro多聚焦数据集是个不错的起点它包含20对真实拍摄的多聚焦图像。我们可以使用以下代码加载和处理这些数据import cv2 import numpy as np import os def load_lytro_dataset(path): image_pairs [] for i in range(1, 21): img1 cv2.imread(f{path}/lytro-{i:02d}-A.jpg) img2 cv2.imread(f{path}/lytro-{i:02d}-B.jpg) if img1 is not None and img2 is not None: image_pairs.append((img1, img2)) return image_pairs注意实际应用中你可能需要对图像进行归一化处理将像素值缩放到0-1范围这对神经网络的训练稳定性很重要。2. 模型架构设计与实现我们将采用U-Net作为基础架构这是一种在图像处理任务中表现出色的编码器-解码器结构。以下是我们的改进版U-Net实现import torch import torch.nn as nn class DoubleConv(nn.Module): def __init__(self, in_channels, out_channels): super().__init__() self.conv nn.Sequential( nn.Conv2d(in_channels, out_channels, 3, padding1), nn.BatchNorm2d(out_channels), nn.ReLU(inplaceTrue), nn.Conv2d(out_channels, out_channels, 3, padding1), nn.BatchNorm2d(out_channels), nn.ReLU(inplaceTrue) ) def forward(self, x): return self.conv(x) class MFIF_UNet(nn.Module): def __init__(self, in_channels6, out_channels3): super().__init__() # 编码器部分 self.enc1 DoubleConv(in_channels, 64) self.enc2 DoubleConv(64, 128) self.enc3 DoubleConv(128, 256) self.enc4 DoubleConv(256, 512) # 解码器部分 self.up3 nn.ConvTranspose2d(512, 256, 2, stride2) self.dec3 DoubleConv(512, 256) self.up2 nn.ConvTranspose2d(256, 128, 2, stride2) self.dec2 DoubleConv(256, 128) self.up1 nn.ConvTranspose2d(128, 64, 2, stride2) self.dec1 DoubleConv(128, 64) self.final nn.Conv2d(64, out_channels, 1) def forward(self, x1, x2): # 将两幅输入图像在通道维度拼接 x torch.cat([x1, x2], dim1) # 编码过程 e1 self.enc1(x) p1 nn.MaxPool2d(2)(e1) e2 self.enc2(p1) p2 nn.MaxPool2d(2)(e2) e3 self.enc3(p2) p3 nn.MaxPool2d(2)(e3) e4 self.enc4(p3) # 解码过程 d3 self.up3(e4) d3 torch.cat([d3, e3], dim1) d3 self.dec3(d3) d2 self.up2(d3) d2 torch.cat([d2, e2], dim1) d2 self.dec2(d2) d1 self.up1(d2) d1 torch.cat([d1, e1], dim1) d1 self.dec1(d1) return torch.sigmoid(self.final(d1))这个模型有几个关键特点双输入处理模型接受两幅多聚焦图像作为输入在通道维度拼接后进行处理跳跃连接保留了U-Net的经典跳跃连接结构有助于保留空间细节端到端训练直接输出融合后的图像无需额外的后处理步骤3. 训练策略与损失函数训练多聚焦图像融合模型需要精心设计的损失函数。我们采用组合损失函数包含像素级损失和感知损失class MFIF_Loss(nn.Module): def __init__(self): super().__init__() self.mse nn.MSELoss() self.vgg self._build_vgg() def _build_vgg(self): vgg torchvision.models.vgg16(pretrainedTrue).features[:16] for param in vgg.parameters(): param.requires_grad False return vgg def forward(self, output, img1, img2): # 像素级损失 mse_loss (self.mse(output, img1) self.mse(output, img2)) / 2 # 感知损失 def vgg_loss(x): x x.repeat(1, 3, 1, 1) # 灰度图转三通道 return self.vgg(x) percep_loss (F.mse_loss(vgg_loss(output), vgg_loss(img1)) F.mse_loss(vgg_loss(output), vgg_loss(img2))) / 2 return 0.7 * mse_loss 0.3 * percep_loss训练流程的实现def train_model(model, train_loader, val_loader, epochs50, lr1e-4): device torch.device(cuda if torch.cuda.is_available() else cpu) model model.to(device) optimizer torch.optim.Adam(model.parameters(), lrlr) criterion MFIF_Loss().to(device) best_loss float(inf) for epoch in range(epochs): model.train() train_loss 0.0 for img1, img2 in train_loader: img1, img2 img1.to(device), img2.to(device) optimizer.zero_grad() output model(img1, img2) loss criterion(output, img1, img2) loss.backward() optimizer.step() train_loss loss.item() # 验证阶段 model.eval() val_loss 0.0 with torch.no_grad(): for img1, img2 in val_loader: img1, img2 img1.to(device), img2.to(device) output model(img1, img2) val_loss criterion(output, img1, img2).item() print(fEpoch {epoch1}/{epochs} - Train Loss: {train_loss/len(train_loader):.4f} - Val Loss: {val_loss/len(val_loader):.4f}) if val_loss best_loss: best_loss val_loss torch.save(model.state_dict(), best_model.pth) return model提示在实际训练中可以使用学习率调度器如ReduceLROnPlateau来动态调整学习率这有助于模型收敛到更好的局部最优。4. 结果评估与可视化训练完成后我们需要评估模型性能并可视化融合结果。以下是评估和可视化的代码实现def evaluate_model(model, test_loader): device next(model.parameters()).device model.eval() ssim_scores [] psnr_scores [] with torch.no_grad(): for img1, img2 in test_loader: img1, img2 img1.to(device), img2.to(device) output model(img1, img2) # 计算SSIM ssim_val ssim(output, img1) ssim(output, img2) ssim_scores.append(ssim_val.item() / 2) # 计算PSNR psnr_val psnr(output, img1) psnr(output, img2) psnr_scores.append(psnr_val.item() / 2) print(fAverage SSIM: {np.mean(ssim_scores):.4f}) print(fAverage PSNR: {np.mean(psnr_scores):.4f}) def visualize_results(model, img1, img2): device next(model.parameters()).device model.eval() with torch.no_grad(): output model(img1.unsqueeze(0).to(device), img2.unsqueeze(0).to(device)) output output.squeeze().cpu().numpy().transpose(1, 2, 0) plt.figure(figsize(15, 5)) plt.subplot(1, 3, 1) plt.imshow(img1.permute(1, 2, 0).numpy()) plt.title(Image A (Focus on Foreground)) plt.subplot(1, 3, 2) plt.imshow(img2.permute(1, 2, 0).numpy()) plt.title(Image B (Focus on Background)) plt.subplot(1, 3, 3) plt.imshow(output) plt.title(Fused Image) plt.show()评估指标说明指标名称计算公式理想值评估重点SSIM结构相似性指数1.0图像结构保留PSNR峰值信噪比越高越好像素级差异MSE均方误差0.0像素级精度在实际项目中我发现以下几个技巧可以显著提升融合质量数据增强对训练图像应用随机裁剪、旋转和颜色抖动可以提高模型的泛化能力注意力机制在U-Net中添加注意力模块可以帮助模型更好地聚焦于重要区域多尺度训练在不同分辨率下训练模型有助于捕捉不同尺度的细节特征5. 高级技巧与优化方向对于追求更高质量融合结果的开发者可以考虑以下几个进阶方向1. 基于GAN的融合方法生成对抗网络可以学习更复杂的图像分布产生更自然的融合结果。以下是GAN架构的基本实现class Generator(nn.Module): # 类似前面的U-Net结构但输出层使用tanh激活 ... class Discriminator(nn.Module): def __init__(self): super().__init__() self.model nn.Sequential( nn.Conv2d(3, 64, 4, stride2, padding1), nn.LeakyReLU(0.2), nn.Conv2d(64, 128, 4, stride2, padding1), nn.BatchNorm2d(128), nn.LeakyReLU(0.2), nn.Conv2d(128, 256, 4, stride2, padding1), nn.BatchNorm2d(256), nn.LeakyReLU(0.2), nn.Conv2d(256, 1, 4, stride1, padding0), nn.Sigmoid() ) def forward(self, x): return self.model(x)2. 多焦点图像序列处理当有多于两幅的多聚焦图像时可以扩展模型处理任意数量的输入class MultiInputMFIF(nn.Module): def __init__(self, num_inputs3): super().__init__() self.initial_conv nn.Conv2d(3*num_inputs, 64, 3, padding1) # 其余结构与U-Net类似 ... def forward(self, *inputs): x torch.cat(inputs, dim1) x self.initial_conv(x) # 继续U-Net处理 ... return x3. 实时融合优化对于需要实时处理的应用可以考虑以下优化策略使用轻量级网络架构如MobileNetV3作为编码器应用模型量化技术减少计算量使用TensorRT等推理加速框架# 模型量化示例 quantized_model torch.quantization.quantize_dynamic( model, {nn.Conv2d, nn.Linear}, dtypetorch.qint8 )在显微摄影项目中应用这套方案时我们遇到了边缘区域融合不自然的问题。通过添加边缘感知损失函数我们显著改善了这一情况class EdgeAwareLoss(nn.Module): def __init__(self): super().__init__() self.sobel_x torch.tensor([[-1, 0, 1], [-2, 0, 2], [-1, 0, 1]], dtypetorch.float32).view(1, 1, 3, 3) self.sobel_y torch.tensor([[-1, -2, -1], [0, 0, 0], [1, 2, 1]], dtypetorch.float32).view(1, 1, 3, 3) def forward(self, output, target): # 计算边缘图 def get_edges(x): gx F.conv2d(x, self.sobel_x.to(x.device), padding1) gy F.conv2d(x, self.sobel_y.to(x.device), padding1) return torch.sqrt(gx**2 gy**2 1e-6) output_edges get_edges(output) target_edges get_edges(target) return F.l1_loss(output_edges, target_edges)这套多聚焦图像融合方案已经成功应用于多个实际项目包括显微图像分析、工业检测和景深扩展摄影。相比传统方法深度学习方案在保持自然过渡和细节保留方面表现更优特别是在处理复杂纹理和边缘区域时。

Kibana 仪表板即代码：在 Elastic 9.4 中用于 Kibana 仪表板的 GitOps、漂移检测与 Terraform

作者：来自 Elastic Teresa Alvarez Soler, Omer Kushmaro 及 Devon Thomson Elastic 9.4 推出了类型化的 Dashboards API，以及原生 Terraform 资源，这使得 Kibana 仪表板首次具备漂移检测、PR 可审查差异以及基于 Git 的回滚能力。通过一个统…...

2026/5/28 4:09:14 阅读更多 →

面试官问‘加法器有几种？’：从行波进位到前缀加法器的性能演进与面试考点解析

面试官问‘加法器有几种？’：从行波进位到前缀加法器的性能演进与面试考点解析当面试官抛出"加法器有几种？"这个问题时，他们期待的绝不仅仅是一个名词列表。作为数字IC设计的核心组件，加法器的选择直接影响处…...

2026/5/28 4:09:14 阅读更多 →

E5-small常见问题解答：解决使用过程中的10个典型问题

E5-small常见问题解答：解决使用过程中的10个典型问题【免费下载链接】e5-small 项目地址: https://ai.gitcode.com/hf_mirrors/zhouhui/e5-small E5-small是一款高效的文本嵌入模型，在信息检索、语义相似性计算等任务中表现出色。本文整理了使用…...

2026/5/28 4:09:12 阅读更多 →

PS5 NOR修改器终极指南：简单三步修复你的游戏主机

PS5 NOR修改器终极指南：简单三步修复你的游戏主机【免费下载链接】PS5NorModifier The PS5 Nor Modifier is an easy to use Windows based application to rewrite your PS5 NOR file. This can be useful if your NOR is corrupt, or if you have a disc edition…...

2026/5/25 2:11:12 阅读更多 →

毕业论文，如何合规使用AI

2022年11月出现了大语言模型ChatGPT，今年是第一批使用大模型大学生毕业的第一年，如何安全、高效地使用AIGC正在成为不少人关心的重要事情。大学生或研究生毕业论文使用AIGC的核心原则是：把它当成你的“科研实习生”，而不是“代笔枪…...

2026/5/26 15:59:40 阅读更多 →

3步彻底解决RDP Wrapper [not supported]问题：实战修复指南

3步彻底解决RDP Wrapper [not supported]问题：实战修复指南【免费下载链接】rdpwrap RDP Wrapper Library 项目地址: https://gitcode.com/gh_mirrors/rd/rdpwrap RDP Wrapper是一款让Windows家庭版支持多用户远程桌面的开源工具，但许多用户在系…...

2026/5/24 0:32:45 阅读更多 →