EPNet代码解读——LI

编程入门行业动态更新时间:2024-10-06 23:28:49

EPNet<a href=https://www.elefans.com/category/jswz/34/1771412.html style= 代码解读——LI"/>

EPNet代码解读——LI

一、前言

论文源码：
注释版本：（仅写了li-fusion，之后有空了更新一下)
论文地址：.08856

2、核心代码位置

1、cig文件：EPNet-master\EPNet-master\tools\cfgs\LI_Fusion_with_attention_use_ce_loss.yaml

LI-Fusion的代码段：

LI_FUSION:
ENABLED: TrueADD_Image_Attention: True
IMG_FEATURES_CHANNEL: 128IMG_CHANNELS: [3, 64, 128, 256, 512]
POINT_CHANNELS: [96, 256, 512, 1024]DeConv_Reduce: [16, 16, 16, 16]
DeConv_Kernels: [2, 4, 8, 16]
DeConv_Strides: [2, 4, 8, 16]

2、Li-Fusion位置：EPNet-master\EPNet-master\lib\net\pointnet2_msg.py

完整注释代码：

import torch
import torch.nn as nn
import torch.nn.functional as F
from pointnet2_lib.pointnet2.pointnet2_modules import PointnetFPModule, PointnetSAModuleMSG
from lib.config import cfg
from torch.nn.functional import grid_sampleBatchNorm2d = nn.BatchNorm2ddef conv3x3(in_planes, out_planes, stride = 1):"""3x3 convolution with padding"""# 3*3的卷积层模块：在原文中使用3*3卷积模块对图像进行降维return nn.Conv2d(in_planes, out_planes, kernel_size = 3, stride = stride,padding = 1, bias = False)class BasicBlock(nn.Module):# 这个Block就是经典的卷积模块：Conv+BN+Reludef __init__(self, inplanes, outplanes, stride = 1):super(BasicBlock, self).__init__()self.conv1 = conv3x3(inplanes, outplanes, stride)self.bn1 = BatchNorm2d(outplanes )self.relu = nn.ReLU(inplace = True)self.conv2 = conv3x3(outplanes, outplanes, 2*stride)def forward(self, x):out = self.conv1(x)# conv层与层之间需要加入BN+ReLU，以保证非线性out = self.bn1(out)out = self.relu(out)# 输出out = self.conv2(out)return outclass Fusion_Conv(nn.Module):def __init__(self, inplanes, outplanes):super(Fusion_Conv, self).__init__()self.conv1 = torch.nn.Conv1d(inplanes, outplanes, 1)self.bn1 = torch.nn.BatchNorm1d(outplanes)def forward(self, point_features, img_features):#print(point_features.shape, img_features.shape)fusion_features = torch.cat([point_features, img_features], dim=1)fusion_features = F.relu(self.bn1(self.conv1(fusion_features)))return fusion_features#================addition attention (add)=======================#
class IA_Layer(nn.Module):def __init__(self, channels):print('##############ADDITION ATTENTION(ADD)#########')super(IA_Layer, self).__init__()self.ic, self.pc = channelsrc = self.pc // 4self.conv1 = nn.Sequential(nn.Conv1d(self.ic, self.pc, 1),nn.BatchNorm1d(self.pc),nn.ReLU())self.fc1 = nn.Linear(self.ic, rc)self.fc2 = nn.Linear(self.pc, rc)self.fc3 = nn.Linear(rc, 1)def forward(self, img_feas, point_feas):batch = img_feas.size(0)# 初始化图像和点云特征信息img_feas_f = img_feas.transpose(1,2).contiguous().view(-1, self.ic) #BCN->BNC->(BN)Cpoint_feas_f = point_feas.transpose(1,2).contiguous().view(-1, self.pc) #BCN->BNC->(BN)C'# print(img_feas)# 将图像特征和点云特征分别输入到FC层中，目的：把两者变换到一个维度上去，以便于后面融合ri = self.fc1(img_feas_f)rp = self.fc2(point_feas_f)# 1、特征融合：图像特征和点云特征融合的方式是元素级相加的方式：即ri+rp# 2、然后经过tanh：将其分布变换到[-1,1]中# 3、FC3层变换维度，以便于后面与图像信息进行相乘# 4、通过sigmoid将值限制在[0,1]内，得到权重矩阵，这样图像或点云中每一个元素都有自己的权重值，我们自然也就能知道图像像素能贡献多少att = F.sigmoid(self.fc3(F.tanh(ri + rp))) #BNx1att = att.squeeze(1)att = att.view(batch, 1, -1) #B1N# print(img_feas.size(), att.size())# 原始图像维度变换：图像特征经过一个卷积层变换维度，使得其能够与权重矩阵进行相乘img_feas_new = self.conv1(img_feas)# 图像融合特征：用图像*权重矩阵，得到的是图像贡献的信息out = img_feas_new * att# 返回融合后图像的信息：outreturn outclass Atten_Fusion_Conv(nn.Module):# Li—Fusion融合函数def __init__(self, inplanes_I, inplanes_P, outplanes):super(Atten_Fusion_Conv, self).__init__()# 采用IA_Layer融合图像和点云信息self.IA_Layer = IA_Layer(channels = [inplanes_I, inplanes_P])# self.conv1 = torch.nn.Conv1d(inplanes_P, outplanes, 1)self.conv1 = torch.nn.Conv1d(inplanes_P + inplanes_P, outplanes, 1)self.bn1 = torch.nn.BatchNorm1d(outplanes)def forward(self, point_features, img_features):# print(point_features.shape, img_features.shape)# 利用IA_Layer得到融合后的图像信息img_features =  self.IA_Layer(img_features, point_features)# print("img_features:", img_features.shape)# 将原始点云信息和融合后的图像信息直接拼接，这样既保留了具有一定权重的图像信息（去除了不重要的图像信息），也保留了原始点云信息# fusion_features = img_features + point_featuresfusion_features = torch.cat([point_features, img_features], dim=1)# Conv+bn+relu得到最终融合结果fusion_features = F.relu(self.bn1(self.conv1(fusion_features)))return fusion_featuresdef Feature_Gather(feature_map, xy):""":param xy:(B,N,2)  normalize to [-1,1]:param feature_map:(B,C,H,W):return:"""# 插值# use grid_sample for this.# xy(B,N,2)->(B,1,N,2)xy = xy.unsqueeze(1)# grid_sample：torch官网提供的插值方法：# 	原型：torch.nn.functional.grid_sample(input,grid,mode='bilinear',padding_mode='zeros',align_corners=None)。# 	其中：mode为选择采样方法，有三种内插算法可选，分别是'bilinear'双线性差值、'nearest'最邻近插值、'bicubic' 双三次插值。# 具体参考： = grid_sample(feature_map, xy)  # (B,C,1,N)# 返回插值结果return interpolate_feature.squeeze(2) # (B,C,N)def get_model(input_channels = 6, use_xyz = True):return Pointnet2MSG(input_channels = input_channels, use_xyz = use_xyz)class Pointnet2MSG(nn.Module):def __init__(self, input_channels = 6, use_xyz = True):super().__init__()# PointNet++中的SA模块定义self.SA_modules = nn.ModuleList()channel_in = input_channelsskip_channel_list = [input_channels]for k in range(cfg.RPN.SA_CONFIG.NPOINTS.__len__()):mlps = cfg.RPN.SA_CONFIG.MLPS[k].copy()channel_out = 0for idx in range(mlps.__len__()):mlps[idx] = [channel_in] + mlps[idx]channel_out += mlps[idx][-1]# 调用PointNet++中的SA模块，聚合信息self.SA_modules.append(PointnetSAModuleMSG(npoint = cfg.RPN.SA_CONFIG.NPOINTS[k],radii = cfg.RPN.SA_CONFIG.RADIUS[k],nsamples = cfg.RPN.SA_CONFIG.NSAMPLE[k],mlps = mlps,use_xyz = use_xyz,bn = cfg.RPN.USE_BN))skip_channel_list.append(channel_out)channel_in = channel_out################### 根据cig文件中设定的参数，决定是否采用LI_FUSION模块# cig文件：LI_FUSION.ENABLED: Trueif cfg.LI_FUSION.ENABLED:# 这是图像特征抽象模块（图像降采样模块）定义self.Img_Block = nn.ModuleList()# 这是融合卷积模块定义self.Fusion_Conv = nn.ModuleList()# 反卷积模块定义self.DeConv = nn.ModuleList()for i in range(len(cfg.LI_FUSION.IMG_CHANNELS) - 1):# 下面是对Img_Block模块进行初始化：# 1、图像降采样模块Img_Block：根据cig文件里设定参数，将卷积层等添加到Img_Block模块中# 2、采用已经定义好的BasicBlock模块进行堆叠，堆叠的方法是按照：# 输入input:LI_FUSION.IMG_CHANNELS[i] 输出output：cfg.LI_FUSION.IMG_CHANNELS[i+1]# 3、cig文件中对于图像channel的设定：IMG_CHANNELS: [3, 64, 128, 256, 512]# 也就是说卷积层输出输出(input,output)设定应该为:(3,64),(64,128),(128,256),(256,512)self.Img_Block.append(BasicBlock(cfg.LI_FUSION.IMG_CHANNELS[i], cfg.LI_FUSION.IMG_CHANNELS[i+1], stride=1))# 根据cig文件中设定，决定是否加入图像注意力ADD_Image_Attention# cig文件：ADD_Image_Attention: Trueif cfg.LI_FUSION.ADD_Image_Attention:# Fusion_Conv模块中添加Li-Fusion融合模块self.Fusion_Conv.append(Atten_Fusion_Conv(cfg.LI_FUSION.IMG_CHANNELS[i + 1], cfg.LI_FUSION.POINT_CHANNELS[i],cfg.LI_FUSION.POINT_CHANNELS[i]))else:self.Fusion_Conv.append(Fusion_Conv(cfg.LI_FUSION.IMG_CHANNELS[i + 1] + cfg.LI_FUSION.POINT_CHANNELS[i],cfg.LI_FUSION.POINT_CHANNELS[i]))# 反卷积模块DeConv：通过cig设定，添加转置卷积模块ConvTranspose2d# cig文件：① IMG_CHANNELS: [3, 64, 128, 256, 512]  ② DeConv_Reduce: [16, 16, 16, 16]self.DeConv.append(nn.ConvTranspose2d(cfg.LI_FUSION.IMG_CHANNELS[i + 1], cfg.LI_FUSION.DeConv_Reduce[i],kernel_size=cfg.LI_FUSION.DeConv_Kernels[i],stride=cfg.LI_FUSION.DeConv_Kernels[i]))# 图像特征融合模块self.image_fusion_conv = nn.Conv2d(sum(cfg.LI_FUSION.DeConv_Reduce), cfg.LI_FUSION.IMG_FEATURES_CHANNEL//4, kernel_size = 1)# BNself.image_fusion_bn = torch.nn.BatchNorm2d(cfg.LI_FUSION.IMG_FEATURES_CHANNEL//4)# 根据ADD_Image_Attention决定是否采用图像注意力if cfg.LI_FUSION.ADD_Image_Attention:self.final_fusion_img_point = Atten_Fusion_Conv(cfg.LI_FUSION.IMG_FEATURES_CHANNEL//4, cfg.LI_FUSION.IMG_FEATURES_CHANNEL, cfg.LI_FUSION.IMG_FEATURES_CHANNEL)else:self.final_fusion_img_point = Fusion_Conv(cfg.LI_FUSION.IMG_FEATURES_CHANNEL + cfg.LI_FUSION.IMG_FEATURES_CHANNEL//4, cfg.LI_FUSION.IMG_FEATURES_CHANNEL)# FP模块（逆距离加权平均）定义self.FP_modules = nn.ModuleList()# FP模块初始化for k in range(cfg.RPN.FP_MLPS.__len__()):pre_channel = cfg.RPN.FP_MLPS[k + 1][-1] if k + 1 < len(cfg.RPN.FP_MLPS) else channel_outself.FP_modules.append(PointnetFPModule(mlp = [pre_channel + skip_channel_list[k]] + cfg.RPN.FP_MLPS[k]))def _break_up_pc(self, pc):# _break_up_pc函数：初始化点云参数# 截取点云前三个变量作为点云的xyz坐标xyz = pc[..., 0:3].contiguous()features = (# 截取点云pc[3:]作为点云特征pc[..., 3:].transpose(1, 2).contiguous()if pc.size(-1) > 3 else None)return xyz, featuresdef forward(self, pointcloud: torch.cuda.FloatTensor, image=None, xy=None):# 点云坐标xyz及其特征featuresxyz, features = self._break_up_pc(pointcloud)# 将点云xyz坐标和特征升维后传给l_xyz和l_featuresl_xyz, l_features = [xyz], [features]# 根据cig文件，决定是否采用LI_FUSION# cig文件：LI_FUSION.ENABLED: Trueif cfg.LI_FUSION.ENABLED:#### normalize xy to [-1,1]size_range = [1280.0, 384.0]xy[:, :, 0] = xy[:, :, 0] / (size_range[0] - 1.0) * 2.0 - 1.0xy[:, :, 1] = xy[:, :, 1] / (size_range[1] - 1.0) * 2.0 - 1.0  # = xy / (size_range - 1.) * 2 - 1.# 保存归一化后的结果l_xy_cor = [xy]img = [image]# 遍历SA模块：S1，S2，S3，S4（原文图2），严格意义上来讲，应该是遍历SA和图像卷积模块，因为两者数量，所以用len(self.SA_modules)for i in range(len(self.SA_modules)):# 使用SA_modules对点云特征进行提取li_xyz, li_features, li_index = self.SA_modules[i](l_xyz[i], l_features[i])# 根据cig文件，决定是否采用LI_FUSIONif cfg.LI_FUSION.ENABLED:li_index = li_index.long().unsqueeze(-1).repeat(1,1,2)li_xy_cor = torch.gather(l_xy_cor[i],1,li_index)  # 作用：收集输入的特定维度指定位置的数值# 得到图像卷积的结果image = self.Img_Block[i](img[i])#print(image.shape)# 对图像进行插值img_gather_feature = Feature_Gather(image,li_xy_cor) #, scale= 2**(i+1))# 采用Li-Fusion模块对图像和点云信息进行融合li_features = self.Fusion_Conv[i](li_features,img_gather_feature)# 保存数据l_xy_cor.append(li_xy_cor)# 图像每个尺度都要保存img.append(image)# 保存点云xyzl_xyz.append(li_xyz)# 保存li-fusion融合后的结果l_features.append(li_features)# FP模块：对应到原图中共有三个FP模块：P1,P2,P3（原文图2）for i in range(-1, -(len(self.FP_modules) + 1), -1):l_features[i - 1] = self.FP_modules[i](l_xyz[i - 1], l_xyz[i], l_features[i - 1], l_features[i])# 下面是Image Stream的反卷积if cfg.LI_FUSION.ENABLED:#for i in range(1,len(img))DeConv = []for i in range(len(cfg.LI_FUSION.IMG_CHANNELS) - 1):# 将img作为self.DeConv的输入进行反卷积# 注：这里的DeConv数组和self.DeConv[i]是不同的，前者保存上采样的结果，后者是反卷积层，具体可看其定义DeConv.append(self.DeConv[i](img[i + 1]))# 将反卷积的结果进行拼接de_concat = torch.cat(DeConv,dim=1)# 将反卷积后的结果经过Conv+Bn+Relu，对应到原文是FU层img_fusion = F.relu(self.image_fusion_bn(self.image_fusion_conv(de_concat)))# 最后一次FP插值:P4（原文图2）img_fusion_gather_feature = Feature_Gather(img_fusion, xy)# 最后一次Li-Fusion融合(原文图2)l_features[0] = self.final_fusion_img_point(l_features[0], img_fusion_gather_feature)return l_xyz[0], l_features[0]class Pointnet2MSG_returnMiddleStages(Pointnet2MSG):def __init__(self, input_channels = 6, use_xyz = True):super().__init__(input_channels, use_xyz)def forward(self, pointcloud: torch.cuda.FloatTensor):xyz, features = self._break_up_pc(pointcloud)l_xyz, l_features = [xyz], [features]idxs = []for i in range(len(self.SA_modules)):li_xyz, li_features, idx = self.SA_modules[i](l_xyz[i], l_features[i])l_xyz.append(li_xyz)l_features.append(li_features)idxs.append(idx)for i in range(-1, -(len(self.FP_modules) + 1), -1):l_features[i - 1] = self.FP_modules[i](l_xyz[i - 1], l_xyz[i], l_features[i - 1], l_features[i])return l_xyz, l_features, idxs

三、分模块介绍

3、Two-stream RPN

双流RPN由Geometric Stream几何流（点云流） 和一个Image Stream图像流组成。
[其实也可叫做点云分支和图像分支，两个分支各司其职，同时也一定程度上相互交流信息]

具体如上图所示。几何流和图像流分别产生点特征和语义图像特征。

EPNet使用多个LI-Fusion模块来增强具有相应不同的尺度上图像语义特征的点特征，产生出更有区别的特征表示。

3.1 Image Stream图像流

1、特征提取——图像编码

图像流以图像作为输入，并通过一组卷积操作提取语义图像信息。

组成：每个卷积块由两个3×3的卷积层组成，然后是一个批处理归一化层和一个ReLU激活函数。

位置：EPNet-master\EPNet-master\lib\net\pointnet2_msg.py
具体定义如下：

            for i in range(len(cfg.LI_FUSION.IMG_CHANNELS) - 1):# 下面是对Img_Block模块进行初始化：# 1、图像降采样模块Img_Block：根据cig文件里设定参数，将卷积层（BASIC_Block）添加到Img_Block模块中# 2、采用已经定义好的BasicBlock模块进行堆叠，堆叠的方法是按照：# 输入input:LI_FUSION.IMG_CHANNELS[i] 输出output：cfg.LI_FUSION.IMG_CHANNELS[i+1]，# 前一个和后一个channel两两组合，生成卷积层，具体示例看第3点。# 3、cig文件中对于图像channel的设定：IMG_CHANNELS: [3, 64, 128, 256, 512]# 也就是说卷积层输出输出(input,output)设定应该为:(3,64),(64,128),(128,256),(256,512)self.Img_Block.append(BasicBlock(cfg.LI_FUSION.IMG_CHANNELS[i], cfg.LI_FUSION.IMG_CHANNELS[i+1], stride=1))

这个BasicBlock就是经典的卷积模块：Conv+BN+Relu

class BasicBlock(nn.Module):# 这个Block就是经典的卷积模块：Conv+BN+Reludef __init__(self, inplanes, outplanes, stride = 1):super(BasicBlock, self).__init__()self.conv1 = conv3x3(inplanes, outplanes, stride)self.bn1 = BatchNorm2d(outplanes )self.relu = nn.ReLU(inplace = True)self.conv2 = conv3x3(outplanes, outplanes, 2*stride)def forward(self, x):out = self.conv1(x)# conv层与层之间需要加入BN+ReLU，以保证非线性out = self.bn1(out)out = self.relu(out)# 输出out = self.conv2(out)return out

2、反卷积模块——图像解码

文中进一步采用四个不同步幅的转置卷积层来恢复图像分辨率，得到与原始图像大小相同的特征图。
（1）参数设定方法
采用nn.ConvTranspose2d来设定参数，具体设定方法就是根据cig文件里的DeConv_Reduce参数（作为output channel）：

DeConv_Reduce: [16, 16, 16, 16]
DeConv_Kernels: [2, 4, 8, 16]
DeConv_Strides: [2, 4, 8, 16]

以及之前用来抽象图像特征的IMG_CHANNELS（作为input channel）：

IMG_CHANNELS: [3, 64, 128, 256, 512]

由于是要把几个不同尺度的特征图同时scaling到一个大小上，所以输入channel由IMG_CHANNELS: [3, 64, 128, 256, 512]决定，输出channel一致都是DeConv_Reduce: [16, 16, 16, 16]。
具体过程，如下图所示

位置：EPNet-master\EPNet-master\lib\net\pointnet2_msg.py

具体代码：

                # 反卷积模块DeConv：通过cig设定，添加转置卷积模块ConvTranspose2d# cig文件：① IMG_CHANNELS: [3, 64, 128, 256, 512]  ② DeConv_Reduce: [16, 16, 16, 16]self.DeConv.append(nn.ConvTranspose2d(cfg.LI_FUSION.IMG_CHANNELS[i + 1], cfg.LI_FUSION.DeConv_Reduce[i],kernel_size=cfg.LI_FUSION.DeConv_Kernels[i],stride=cfg.LI_FUSION.DeConv_Kernels[i]))

3.2 Geometric Stream几何流（点云流）

该几何流由四对SA模块层和特征传播（FP）层组成，用于特征提取。

1、SA模块——点云编码

采用的是PointNet++中的SA模块来聚合点的特征，具体可参考：最全PointNet和PointNet++要点梳理总结

注意：这里的SA模块融合的是：LI-Fusion融合的特征 + 前一个输入的特征，实现如下：

li_xyz, li_features, li_index = self.SA_modules[i](l_xyz[i], l_features[i])
# l_xyz是前一个SA输出的xyz，l_feature是LI-Fusion融合的特征

可以参考原文中的图：

位置：EPNet-master\EPNet-master\lib\net\pointnet2_msg.py
SA定义代码如下：

        # PointNet++中的SA模块定义self.SA_modules = nn.ModuleList()channel_in = input_channelsskip_channel_list = [input_channels]for k in range(cfg.RPN.SA_CONFIG.NPOINTS.__len__()):mlps = cfg.RPN.SA_CONFIG.MLPS[k].copy()channel_out = 0for idx in range(mlps.__len__()):mlps[idx] = [channel_in] + mlps[idx]channel_out += mlps[idx][-1]# 调用PointNet++中的SA模块，聚合点云信息self.SA_modules.append(PointnetSAModuleMSG(npoint = cfg.RPN.SA_CONFIG.NPOINTS[k],radii = cfg.RPN.SA_CONFIG.RADIUS[k],nsamples = cfg.RPN.SA_CONFIG.NSAMPLE[k],mlps = mlps,use_xyz = use_xyz,bn = cfg.RPN.USE_BN))skip_channel_list.append(channel_out)channel_in = channel_out

由于SA模块是调用PointNet++的，所以下面的代码均是PointNet++的，不做过多介绍：

（1）SA模块采用的是PointNet++中的PointnetSAModuleMSG。
位置：\EPNet-master\EPNet-master\pointnet2_lib\pointnet2\pointnet2_modules.py

这里不详细赘述，详细可以看PointNet++源码，下面是PointnetSAModuleMSG代码：

class PointnetSAModuleMSG(_PointnetSAModuleBase):"""Pointnet set abstraction layer with multiscale grouping"""def __init__(self, *, npoint: int, radii: List[float], nsamples: List[int], mlps: List[List[int]], bn: bool = True,use_xyz: bool = True, pool_method = 'max_pool', instance_norm = False):""":param npoint: int:param radii: list of float, list of radii to group with:param nsamples: list of int, number of samples in each ball query:param mlps: list of list of int, spec of the pointnet before the global pooling for each scale:param bn: whether to use batchnorm:param use_xyz::param pool_method: max_pool / avg_pool:param instance_norm: whether to use instance_norm"""super().__init__()assert len(radii) == len(nsamples) == len(mlps)# 初始化self.npoint = npointself.groupers = nn.ModuleList()self.mlps = nn.ModuleList()for i in range(len(radii)):# 球半径radius = radii[i]# 最大采样数nsample = nsamples[i]self.groupers.append(# 采用ball query和groupingpointnet2_utils.QueryAndGroup(radius, nsample, use_xyz = use_xyz)if npoint is not None else pointnet2_utils.GroupAll(use_xyz))mlp_spec = mlps[i]if use_xyz:mlp_spec[0] += 3# Shared MLPself.mlps.append(pt_utils.SharedMLP(mlp_spec, bn = bn, instance_norm = instance_norm))self.pool_method = pool_method

（2）SA的CUDA文件
其中需要注意的一点是：FPS，Ball_query，group均采用cuda编写，原因我认为是这些代码的计算量都很大，采用cuda并行计算能够提高模型效率。

代码注释我有写过，但是由于底层代码量太大的原因，就没有放上来，之后有需要我可以补充。

位置：EPNet-master\EPNet-master\pointnet2_lib\pointnet2\src
下面是文件截图：

2、Feature Propogation（FP）层——点云解码

（1）公式
FP采用的是PointNet++中反距离加权插值(Inverse Distance Weight,IDW) ，具体可参考：最全PointNet和PointNet++要点梳理总结

公式：

按照原图来说，共采用四个FP模块：前三个FP模块（P1、P2、P3） 用于和之前SA模块提取的特征进行插值，最后一个FP模块(P4) 会和原始点云信息进行插值。插值完后，再进行LI-Fusion。

位置：EPNet-master\EPNet-master\lib\net\pointnet2_msg.py
代码如下：

        # FP模块（逆距离加权平均）定义self.FP_modules = nn.ModuleList()# FP模块初始化for k in range(cfg.RPN.FP_MLPS.__len__()):pre_channel = cfg.RPN.FP_MLPS[k + 1][-1] if k + 1 < len(cfg.RPN.FP_MLPS) else channel_outself.FP_modules.append(PointnetFPModule(mlp = [pre_channel + skip_channel_list[k]] + cfg.RPN.FP_MLPS[k]))

（2）代码
PointnetFPModule模块也在PointNet++代码里。
位置：\EPNet-master\EPNet-master\pointnet2_lib\pointnet2\pointnet2_modules.py

class PointnetFPModule(nn.Module):r"""Propigates the features of one set to another"""def __init__(self, *, mlp: List[int], bn: bool = True, activation = nn.ReLU(inplace = True)):""":param mlp: list of int:param bn: whether to use batchnorm"""super().__init__()self.mlp = pt_utils.SharedMLP(mlp, bn = bn, activation = activation)def forward(self, unknown: torch.Tensor, known: torch.Tensor, unknow_feats: torch.Tensor, known_feats: torch.Tensor) -> torch.Tensor:""":param unknown: (B, n, 3) tensor of the xyz positions of the unknown features:param known: (B, m, 3) tensor of the xyz positions of the known features:param unknow_feats: (B, C1, n) tensor of the features to be propigated to:param known_feats: (B, C2, m) tensor of features to be propigated:return:new_features: (B, mlp[-1], n) tensor of the features of the unknown features"""# known 表示已知点的位置信息 [m,4]# known_feats 表示已知点的特征信息 [m,C]# unknown 表示需要插值点的位置信息 [n,4]，一般来所，n>m# interpolated_feats 表示需要插值点的特征信息 [n,C]，这是返回结果if known is not None:# 获取 unknown 和 known 之间的近邻关系和距离信息，three_nn:找到目标点最近的三个点dist, idx = pointnet2_utils.three_nn(unknown, known)# 权值是距离的倒数dist_recip = 1.0 / (dist + 1e-8)norm = torch.sum(dist_recip, dim = 2, keepdim = True)weight = dist_recip / norm# 根据近邻关系以及距离信息，直接插值特征信息interpolated_feats = pointnet2_utils.three_interpolate(known_feats, idx, weight)else:interpolated_feats = known_feats.expand(*known_feats.size()[0:2], unknown.size(1))if unknow_feats is not None:new_features = torch.cat([interpolated_feats, unknow_feats], dim = 1)  # (B, C2 + C1, n)else:new_features = interpolated_featsnew_features = new_features.unsqueeze(-1)# 经过MLP：采用共享卷积改变维度new_features = self.mlp(new_features)return new_features.squeeze(-1)

（3）CUDA文件
插值的核心程序由CUDA编写，能够并行计算提高模型效率。
插值cuda文件：interpolate_gpu.cu（包含three_nn、three_interpolate）
位置在：\EPNet-master\EPNet-master\pointnet2_lib\pointnet2\src\interpolate_gpu.cu

具体参考：小白科研笔记：理解PointNet++中的three_interpolate前向计算和反向求导

代码示例：
①three_nn

__global__ void three_nn_kernel_fast(int b, int n, int m, const float *__restrict__ unknown, const float *__restrict__ known, float *__restrict__ dist2, int *__restrict__ idx) {// unknown: (B, N, 3)// known: (B, M, 3)// output: //      dist2: (B, N, 3)//      idx: (B, N, 3)int bs_idx = blockIdx.y;int pt_idx = blockIdx.x * blockDim.x + threadIdx.x;if (bs_idx >= b || pt_idx >= n) return;unknown += bs_idx * n * 3 + pt_idx * 3;known += bs_idx * m * 3;dist2 += bs_idx * n * 3 + pt_idx * 3;idx += bs_idx * n * 3 + pt_idx * 3;float ux = unknown[0];float uy = unknown[1];float uz = unknown[2];double best1 = 1e40, best2 = 1e40, best3 = 1e40;int besti1 = 0, besti2 = 0, besti3 = 0;for (int k = 0; k < m; ++k) {float x = known[k * 3 + 0];float y = known[k * 3 + 1];float z = known[k * 3 + 2];float d = (ux - x) * (ux - x) + (uy - y) * (uy - y) + (uz - z) * (uz - z); //计算两点之间的欧几里得距离//找到目标点最近的三个点if (d < best1) {best3 = best2; besti3 = besti2;best2 = best1; besti2 = besti1;best1 = d; besti1 = k;} else if (d < best2) {best3 = best2; besti3 = besti2;best2 = d; besti2 = k;} else if (d < best3) {best3 = d; besti3 = k;}}dist2[0] = best1; dist2[1] = best2; dist2[2] = best3;idx[0] = besti1; idx[1] = besti2; idx[2] = besti3;
}

②three_interpolate

__global__ void three_interpolate_kernel_fast(int b, int c, int m, int n, const float *__restrict__ points, const int *__restrict__ idx, const float *__restrict__ weight, float *__restrict__ out) {// points: (B, C, M)// idx: (B, N, 3)// weight: (B, N, 3)// output://      out: (B, C, N)int bs_idx = blockIdx.z;int c_idx = blockIdx.y;int pt_idx = blockIdx.x * blockDim.x + threadIdx.x;if (bs_idx >= b || c_idx >= c || pt_idx >= n) return;weight += bs_idx * n * 3 + pt_idx * 3;points += bs_idx * c * m + c_idx * m;idx += bs_idx * n * 3 + pt_idx * 3;out += bs_idx * c * n + c_idx * n;//点*权重out[pt_idx] = weight[0] * points[idx[0]] + weight[1] * points[idx[1]] + weight[2] * points[idx[2]];
}

3.3 Li-Fusion

1、图

LI-Fusion其实就是按照下面的图，尤其是右半边图：图像点云元素级融合LI-Fusion Layer

图中上半支线是LiDAR Features（也就是点云特征），下半支线是Point-wise Image Feature（也就是含有点云信息的图像特征）。

2、步骤

两支线通过以下步骤进行融合：

（1）IA_Layer——元素级融合图像-点云特征
原代码中，采用IA_Layer融合图像和点云信息：

①缩放及融合：两者支线首先经过FC层缩放到一个维度，然后才进行元素相加操作（注意：此处的相加就是将两个特征向量直接相加，而非拼接。原代码中是：ri + rp，ri是图像特征，rp是点云特征）。
②经过tanh将数值分布变换到[-1,1]，加入tanh既符合数据分布，也可以提高模型效率。
③通过FC层变换单维度通道，以便于生成一维权重矩阵，能够和图像信息相乘。其次，也起到了融合信息的作用。
④经过sigmoid激活函数，将数值限制在[0,1]，得到权重矩阵，这样图像或点云中每一个元素都有自己的权重值，我们自然也就能知道图像像素能贡献多少。
⑤最后图像像素×权重矩阵得到具有一定贡献度的图像特征。

论文中的原话：

我们首先将激光雷达特征FP和点向特征FI输入一个完全连接的层，并将它们映射到同一个通道中。然后我们将它们加在一起，形成一个紧凑的特征表示，然后通过另一个完全连接的层压缩成一个单通道的权重映射w。我们使用一个sigmoid激活函数来将权值图w归一化到[0,1]的范围内。

代码如下：
位置：EPNet-master\EPNet-master\lib\net\pointnet2_msg.py

#================addition attention (add)=======================#
class IA_Layer(nn.Module):def __init__(self, channels):print('##############ADDITION ATTENTION(ADD)#########')super(IA_Layer, self).__init__()self.ic, self.pc = channelsrc = self.pc // 4self.conv1 = nn.Sequential(nn.Conv1d(self.ic, self.pc, 1),nn.BatchNorm1d(self.pc),nn.ReLU())self.fc1 = nn.Linear(self.ic, rc)self.fc2 = nn.Linear(self.pc, rc)self.fc3 = nn.Linear(rc, 1)def forward(self, img_feas, point_feas):batch = img_feas.size(0)# 初始化图像和点云特征信息img_feas_f = img_feas.transpose(1,2).contiguous().view(-1, self.ic) #BCN->BNC->(BN)Cpoint_feas_f = point_feas.transpose(1,2).contiguous().view(-1, self.pc) #BCN->BNC->(BN)C'# print(img_feas)# 将图像特征和点云特征分别输入到FC层中，目的：把两者变换到一个维度上去，以便于后面融合ri = self.fc1(img_feas_f)rp = self.fc2(point_feas_f)# 1、特征融合：图像特征和点云特征融合的方式是元素级相加的方式：即ri+rp# 2、然后经过tanh：将其分布变换到[-1,1]中# 3、FC3层变换维度，以便于后面与图像信息进行相乘# 4、通过sigmoid将值限制在[0,1]内，得到权重矩阵，这样图像或点云中每一个元素都有自己的权重值，我们自然也就能知道图像像素能贡献多少att = F.sigmoid(self.fc3(F.tanh(ri + rp))) #BNx1att = att.squeeze(1)att = att.view(batch, 1, -1) #B1N# print(img_feas.size(), att.size())# 原始图像维度变换：图像特征经过一个卷积层变换维度，使得其能够与权重矩阵进行相乘img_feas_new = self.conv1(img_feas)# 图像融合特征：用图像*权重矩阵，得到的是图像贡献的信息out = img_feas_new * att# 返回融合后图像的信息：outreturn out

（2）Atten_Fusion_Conv 融合——原始点云信息再融合

通过之前的IA_Layer已经得到了图像和点云的融合特征，该特征是由图像*权重矩阵得出，也就是包含大量的图像信息。可见，最后还需要加入原始点云的信息，如下图所示：

在代码中实现为Atten_Fusion_Conv类，实现步骤为：
①通过IA_Layer模块，得到融合后的图像特征（上一节已提到）
②通过将原始点云信息LiDAR Features和融合后的图像特征img_features进行**“拼接”**（注意，这里不是元素相加，而是直接拼接），

代码为：

      # 将原始点云信息和融合后的图像信息直接拼接，这样既保留了具有一定权重的图像信息（去除了不重要的图像信息），也保留了原始点云信息fusion_features = torch.cat([point_features, img_features], dim=1)。

③将拼接后的特征经过Conv1d+BN+relu，将其特征变换维度，得到最终的融合结果。

完整的代码实现为：

class Atten_Fusion_Conv(nn.Module):# Li—Fusion融合函数def __init__(self, inplanes_I, inplanes_P, outplanes):super(Atten_Fusion_Conv, self).__init__()# 采用IA_Layer融合图像和点云信息self.IA_Layer = IA_Layer(channels = [inplanes_I, inplanes_P])# self.conv1 = torch.nn.Conv1d(inplanes_P, outplanes, 1)self.conv1 = torch.nn.Conv1d(inplanes_P + inplanes_P, outplanes, 1)self.bn1 = torch.nn.BatchNorm1d(outplanes)def forward(self, point_features, img_features):# print(point_features.shape, img_features.shape)# 利用IA_Layer得到融合后的图像信息img_features =  self.IA_Layer(img_features, point_features)# print("img_features:", img_features.shape)# 将原始点云信息和融合后的图像信息直接拼接，这样既保留了具有一定权重的图像信息（去除了不重要的图像信息），也保留了原始点云信息# fusion_features = img_features + point_featuresfusion_features = torch.cat([point_features, img_features], dim=1)# Conv+bn+relu得到最终融合结果fusion_features = F.relu(self.bn1(self.conv1(fusion_features)))return fusion_features

3.4 forward函数

上面介绍完各个模块，接下来就是整体的流程如下：

①遍历SA模块，执行 SA模块融合

li_xyz, li_features, li_index = self.SA_modules[i](l_xyz[i], l_features[i])

②图像卷积抽象特征

image = self.Img_Block[i](img[i])

③对图像进行插值

img_gather_feature = Feature_Gather(image,li_xy_cor) #, scale= 2**(i+1))

④采用Li-Fusion模块对图像和点云信息进行融合

li_features = self.Fusion_Conv[i](li_features,img_gather_feature)

⑤FP模块：对应到原图中共有三个FP模块：P1,P2,P3（原文图2）

    for i in range(-1, -(len(self.FP_modules) + 1), -1):l_features[i - 1] = self.FP_modules[i](l_xyz[i - 1], l_xyz[i], l_features[i - 1], l_features[i])

⑥Image Stream的反卷积

        if cfg.LI_FUSION.ENABLED:#for i in range(1,len(img))DeConv = []for i in range(len(cfg.LI_FUSION.IMG_CHANNELS) - 1):# 将img作为self.DeConv的输入进行反卷积# 注：这里的DeConv数组和self.DeConv[i]是不同的，前者保存上采样的结果，后者是反卷积层DeConv.append(self.DeConv[i](img[i + 1]))# 将反卷积的结果进行拼接de_concat = torch.cat(DeConv,dim=1)

⑦将反卷积后的结果经过Conv+Bn+Relu，对应到原文是FU层

img_fusion = F.relu(self.image_fusion_bn(self.image_fusion_conv(de_concat)))

⑧最后一次FP插值:P4（原文图2）

img_fusion_gather_feature = Feature_Gather(img_fusion, xy)

⑨最后一次Li-Fusion融合(原文图2)

l_features[0] = self.final_fusion_img_point(l_features[0], img_fusion_gather_feature)

具体代码实现如下：

 def forward(self, pointcloud: torch.cuda.FloatTensor, image=None, xy=None):# 点云坐标xyz及其特征featuresxyz, features = self._break_up_pc(pointcloud)# 将点云xyz坐标和特征升维后传给l_xyz和l_featuresl_xyz, l_features = [xyz], [features]# 根据cig文件，决定是否采用LI_FUSION# cig文件：LI_FUSION.ENABLED: Trueif cfg.LI_FUSION.ENABLED:#### normalize xy to [-1,1]size_range = [1280.0, 384.0]xy[:, :, 0] = xy[:, :, 0] / (size_range[0] - 1.0) * 2.0 - 1.0xy[:, :, 1] = xy[:, :, 1] / (size_range[1] - 1.0) * 2.0 - 1.0  # = xy / (size_range - 1.) * 2 - 1.# 保存归一化后的结果l_xy_cor = [xy]img = [image]# 遍历SA模块：S1，S2，S3，S4（原文图2），严格意义上来讲，应该是遍历SA和图像卷积模块，因为两者数量，所以用len(self.SA_modules)for i in range(len(self.SA_modules)):# 使用SA_modules对点云特征进行提取li_xyz, li_features, li_index = self.SA_modules[i](l_xyz[i], l_features[i])# 根据cig文件，决定是否采用LI_FUSIONif cfg.LI_FUSION.ENABLED:li_index = li_index.long().unsqueeze(-1).repeat(1,1,2)li_xy_cor = torch.gather(l_xy_cor[i],1,li_index)  # 作用：收集输入的特定维度指定位置的数值# 得到图像卷积的结果image = self.Img_Block[i](img[i])#print(image.shape)# 对图像进行插值img_gather_feature = Feature_Gather(image,li_xy_cor) #, scale= 2**(i+1))# 采用Li-Fusion模块对图像和点云信息进行融合li_features = self.Fusion_Conv[i](li_features,img_gather_feature)# 保存数据l_xy_cor.append(li_xy_cor)# 图像每个尺度都要保存img.append(image)# 保存点云xyzl_xyz.append(li_xyz)# 保存li-fusion融合后的结果l_features.append(li_features)# FP模块：对应到原图中共有三个FP模块：P1,P2,P3（原文图2）for i in range(-1, -(len(self.FP_modules) + 1), -1):l_features[i - 1] = self.FP_modules[i](l_xyz[i - 1], l_xyz[i], l_features[i - 1], l_features[i])# 下面是Image Stream的反卷积if cfg.LI_FUSION.ENABLED:#for i in range(1,len(img))DeConv = []for i in range(len(cfg.LI_FUSION.IMG_CHANNELS) - 1):# 将img作为self.DeConv的输入进行反卷积# 注：这里的DeConv数组和self.DeConv[i]是不同的，前者保存上采样的结果，后者是反卷积层DeConv.append(self.DeConv[i](img[i + 1]))# 将反卷积的结果进行拼接de_concat = torch.cat(DeConv,dim=1)# 将反卷积后的结果经过Conv+Bn+Relu，对应到原文是FU层img_fusion = F.relu(self.image_fusion_bn(self.image_fusion_conv(de_concat)))# 最后一次FP插值:P4（原文图2）img_fusion_gather_feature = Feature_Gather(img_fusion, xy)# 最后一次Li-Fusion融合(原文图2)l_features[0] = self.final_fusion_img_point(l_features[0], img_fusion_gather_feature)return l_xyz[0], l_features[0]

更多推荐

EPNet代码解读——LI

本文发布于:2024-03-23 20:46:31，感谢您对本站的认可！

本文链接:https://www.elefans.com/category/jswz/34/1742640.html