当前位置：首页 > news >正文

CR-NeRF 代码eval.py解析

news 2025/11/26 20:58:27

这段代码是一个用于CR-NeRF（Neural Radiance Fields）模型的推理脚本。它主要用于生成和保存渲染的图像，并计算图像质量的评价指标（如PSNR和SSIM）。以下是对这段代码的详细解析：

（1）导入了所需的库和模块

包括PyTorch、NumPy、tqdm（用于进度条）、imageio（用于图像保存）、以及其他自定义模块和函数。

import torch
import os
import numpy as np
from collections import defaultdict
from tqdm import tqdm
import imageio
from argparse import ArgumentParserfrom models.rendering import render_rays_cross_ray
from models.nerf import *
from models.nerf_decoder_stylenerf import get_renderer
from utils import load_ckpt
import metrics
from einops import rearrange
from datasets import dataset_dict
from datasets.depth_utils import *
from models.linearStyleTransfer import encoder3, encoder_sameoutputsize
from models.networks import E_attr
from math import sqrt
import math
import json
from PIL import Image
from torchvision import transforms as T
from opt import get_opts
from train_mask_grid_sample import get_model
torch.backends.cudnn.benchmark = True

（2）定义函数

batched_inference()，这个函数用于对光线进行批量推理。它将光线分成小块进行处理，以避免内存不足的问题。

from collections import defaultdict
import torchdef batched_inference(models, embeddings,rays, ts, N_samples, N_importance, use_disp,chunk,white_back,**kwargs):"""对光线进行批量推理。参数:models: 包含模型（如粗略模型和精细模型）的字典。embeddings: 包含位置和方向嵌入的字典。rays: 光线数据，形状为 [B, 6]，其中 B 是光线的数量。ts: 时间戳数据，形状为 [B]。N_samples: 每条光线的样本数量。N_importance: 重要性采样的样本数量。use_disp: 是否使用视差。chunk: 每个小块的大小。white_back: 背景是否为白色。**kwargs: 其他关键字参数。返回:包含渲染结果的字典。"""B = rays.shape[0]  # 光线的总数results = defaultdict(list)  # 用于存储每个键的结果列表# 循环处理每个小块的光线for i in range(0, B, chunk):rendered_ray_chunks = \render_rays_cross_ray(models,embeddings,rays[i:i+chunk],  # 当前小块的光线ts[i:i+chunk] if ts is not None else None,  # 当前小块的时间戳N_samples,use_disp,0,0,N_importance,chunk,white_back,test_time=True,**kwargs)# 将渲染结果中的每个键值对添加到 results 字典中for k, v in rendered_ray_chunks.items():results[k] += [v]# 将 results 字典中的每个键的结果列表合并成一个张量for k, v in results.items():results[k] = torch.cat(v, 0)return results  # 返回合并后的结果字典

定义函数eulerAnglesToRotationMatrix()，这个函数用于将欧拉角转换为旋转矩阵。

def eulerAnglesToRotationMatrix(theta):R_x = np.array([[1,         0,                  0                   ],[0,         math.cos(theta[0]), -math.sin(theta[0]) ],[0,         math.sin(theta[0]), math.cos(theta[0])  ]])R_y = np.array([[math.cos(theta[1]),    0,      math.sin(theta[1])  ],[0,                     1,      0                   ],[-math.sin(theta[1]),   0,      math.cos(theta[1])  ]])R_z = np.array([[math.cos(theta[2]),    -math.sin(theta[2]),    0],[math.sin(theta[2]),    math.cos(theta[2]),     0],[0,                     0,                      1]])R = np.dot(R_z, np.dot( R_y, R_x ))return R

（3）主程序

这段代码是主程序的开始部分，主要负责初始化参数、加载数据集、定义嵌入和编码器等。

if __name__ == "__main__":# 检查是否是主程序入口args = get_opts()# 获取命令行参数，存储在args对象中kwargs = {'root_dir': args.root_dir, 'split': args.split}# 初始化关键字参数字典，包含根目录和数据集分割信息if args.dataset_name == 'blender':# 如果数据集名称是'blender'kwargs['img_wh'] = tuple(args.img_wh)# 添加图像宽度和高度到关键字参数字典else:# 否则kwargs['img_downscale'] = args.img_downscalekwargs['use_cache'] = args.use_cache# 添加图像降采样因子和是否使用缓存到关键字参数字典dataset = dataset_dict[args.dataset_name](args=args, **kwargs)# 根据数据集名称初始化数据集对象scene = os.path.basename(args.root_dir.strip('/'))# 获取场景名称，即根目录的最后一个部分embedding_xyz = PosEmbedding(args.N_emb_xyz-1, args.N_emb_xyz)embedding_dir = PosEmbedding(args.N_emb_dir-1, args.N_emb_dir)# 初始化位置编码和方向编码对象embeddings = {'xyz': embedding_xyz, 'dir': embedding_dir}# 将位置编码和方向编码存储在字典中if args.encode_a:# 如果启用了外观编码enc_a = encoder_sameoutputsize(out_channel=args.nerf_out_dim).cuda()# 初始化外观编码器并将其移动到GPUload_ckpt(enc_a, args.ckpt_path, model_name='enc_a')# 从检查点文件加载外观编码器的权重kwargs = {}# 重置关键字参数字典if args.dataset_name == 'blender':# 如果数据集名称是'blender'with open(os.path.join(args.root_dir, f"transforms_train.json"), 'r') as f:meta_train = json.load(f)# 读取训练数据的变换信息frame = meta_train['frames'][0]# 获取第一帧的信息image_path = os.path.join(args.root_dir, f"{frame['file_path']}.png")# 构建图像文件路径img = Image.open(image_path)img = img.resize(args.img_wh, Image.LANCZOS)# 打开图像并调整大小toTensor = T.ToTensor()normalize = T.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])# 初始化图像转换工具img = toTensor(img) # (4, h, w)img = img[:3, :, :]*img[-1:, :, :] + (1-img[-1:, :, :]) # blend A to RGB (3, h, w)# 将图像转换为Tensor并进行预处理whole_img = normalize(img).unsqueeze(0).cuda()# 归一化图像并将其移动到GPUkwargs['a_embedded_from_img'] = enc_a(whole_img)# 使用外观编码器对图像进行编码，并将结果存储在关键字参数字典中

（4）模型加载和初始化

这段代码加载了NeRF模型和解码器，并从checkpoints文件中恢复它们的权重。

models=get_model(args)
nerf_coarse=models['coarse']
nerf_fine=models['fine']
decoder=models['decoder']
load_ckpt(nerf_coarse, args.ckpt_path, model_name='nerf_coarse')
load_ckpt(nerf_fine, args.ckpt_path, model_name='nerf_fine')
load_ckpt(decoder, args.ckpt_path, model_name='decoder')

（5）数据集预处理

为不同场景中的场景进行特定的预处理，包括图像的读取、下采样、归一化以及相机姿态的生成。每个场景有其特定的处理逻辑以确保测试数据的一致性和合理性。

# 初始化保存图像和度量结果的列表
imgs, psnrs, ssims = [], [], []# 设置结果保存目录并创建该目录
dir_name = os.path.join(args.save_dir, f'results/{args.dataset_name}/{args.scene_name}')
os.makedirs(dir_name, exist_ok=True)# 设置 kwargs 参数
kwargs['args']=args# 如果数据集是 phototourism 且数据划分为测试集，进行特定处理
if args.dataset_name == 'phototourism' and args.split == 'test':# 定义测试图像的宽度和高度dataset.test_img_w, dataset.test_img_h = args.img_wh# 计算焦距，定义相机内参 (fov=60 degrees)dataset.test_focal = dataset.test_img_w / 2 / np.tan(np.pi/6)dataset.test_K = np.array([[dataset.test_focal, 0, dataset.test_img_w / 2],[0, dataset.test_focal, dataset.test_img_h / 2],[0, 0, 1]])# 根据不同的场景进行不同的处理if scene == 'brandenburg_gate':# 选择特定图像作为外观嵌入img = Image.open(os.path.join(args.root_dir, 'dense/images', dataset.image_paths[dataset.img_ids_train[314]])).convert('RGB')img_downscale = 8img_w, img_h = img.sizeimg_w = img_w // img_downscaleimg_h = img_h // img_downscaleimg = img.resize((img_w, img_h), Image.LANCZOS)# 对图像进行归一化和转换为张量toTensor = T.ToTensor()normalize = T.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])img = toTensor(img)whole_img = normalize(img).unsqueeze(0).cuda()kwargs['a_embedded_from_img'] = enc_a(whole_img)dataset.test_appearance_idx = 314N_frames = 30 * 8# 定义相机的轨迹变换参数dx1 = np.linspace(-0.25, 0.25, N_frames)dx2 = np.linspace(0.25, 0.38, N_frames - N_frames // 2)dx = np.concatenate((dx1, dx2))dy1 = np.linspace(0.05, -0.1, N_frames // 2)dy2 = np.linspace(-0.1, 0.05, N_frames - N_frames // 2)dy = np.concatenate((dy1, dy2))dz1 = np.linspace(0.1, 0.3, N_frames // 2)dz2 = np.linspace(0.3, 0.1, N_frames - N_frames // 2)dz = np.concatenate((dz1, dz2))theta_x1 = np.linspace(math.pi / 30, 0, N_frames // 2)theta_x2 = np.linspace(0, math.pi / 30, N_frames - N_frames // 2)theta_x = np.concatenate((theta_x1, theta_x2))theta_y = np.linspace(math.pi / 10, -math.pi / 10, N_frames)theta_z = np.linspace(0, 0, N_frames)# 复制初始的相机姿态并在每一帧上应用变换dataset.poses_test = np.tile(dataset.poses_dict[1123], (N_frames, 1, 1))for i in range(N_frames):dataset.poses_test[i, 0, 3] += dx[i]dataset.poses_test[i, 1, 3] += dy[i]dataset.poses_test[i, 2, 3] += dz[i]dataset.poses_test[i, :, :3] = np.dot(eulerAnglesToRotationMatrix([theta_x[i],theta_y[i],theta_z[i]]), dataset.poses_test[i, :, :3])elif scene == 'trevi_fountain':# 选择特定图像作为外观嵌入img = Image.open(os.path.join(args.root_dir, 'dense/images', dataset.image_paths[dataset.img_ids_train[1548]])).convert('RGB')img_downscale = 8img_w, img_h = img.sizeimg_w = img_w // img_downscaleimg_h = img_h // img_downscaleimg = img.resize((img_w, img_h), Image.LANCZOS)# 对图像进行归一化和转换为张量toTensor = T.ToTensor()normalize = T.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])img = toTensor(img)whole_img = normalize(img).unsqueeze(0).cuda()kwargs['a_embedded_from_img'] = enc_a(whole_img)dataset.test_appearance_idx = dataset.img_ids_train[1548]N_frames = 30 * 8# 定义相机的轨迹变换参数dx = np.linspace(-0.8, 0.7, N_frames)dy1 = np.linspace(-0., 0.05, N_frames // 2)dy2 = np.linspace(0.05, -0., N_frames - N_frames // 2)dy = np.concatenate((dy1, dy2))dz1 = np.linspace(0.4, 0.1, N_frames // 4)dz2 = np.linspace(0.1, 0.5, N_frames // 4)dz3 = np.linspace(0.5, 0.1, N_frames // 4)dz4 = np.linspace(0.1, 0.4, N_frames - 3 * (N_frames // 4))dz = np.concatenate((dz1, dz2, dz3, dz4))theta_x1 = np.linspace(-0, 0, N_frames // 2)theta_x2 = np.linspace(0, -0, N_frames - N_frames // 2)theta_x = np.concatenate((theta_x1, theta_x2))theta_y = np.linspace(math.pi / 6, -math.pi / 6, N_frames)theta_z = np.linspace(0, 0, N_frames)# 复制初始的相机姿态并在每一帧上应用变换dataset.poses_test = np.tile(dataset.poses_dict[dataset.img_ids_train[1548]], (N_frames, 1, 1))for i in range(N_frames):dataset.poses_test[i, 0, 3] += dx[i]dataset.poses_test[i, 1, 3] += dy[i]dataset.poses_test[i, 2, 3] += dz[i]dataset.poses_test[i, :, :3] = np.dot(eulerAnglesToRotationMatrix([theta_x[i],theta_y[i],theta_z[i]]), dataset.poses_test[i, :, :3])elif scene == 'sacre_coeur':# 选择特定图像作为外观嵌入img = Image.open(os.path.join(args.root_dir, 'dense/images', dataset.image_paths[dataset.img_ids_train[58]])).convert('RGB')img_downscale = 8img_w, img_h = img.sizeimg_w = img_w // img_downscaleimg_h = img_h // img_downscaleimg = img.resize((img_w, img_h), Image.LANCZOS)# 对图像进行归一化和转换为张量toTensor = T.ToTensor()normalize = T.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])img = toTensor(img)whole_img = normalize(img).unsqueeze(0).cuda()kwargs['a_embedded_from_img'] = enc_a(whole_img)dataset.test_appearance_idx = dataset.img_ids_train[58]N_frames = 30 * 8# 定义相机的轨迹变换参数dx = np.linspace(-2, 2, N_frames)dy1 = np.linspace(-0., 2, N_frames // 2)dy2 = np.linspace(2, -0., N_frames - N_frames // 2)dy = np.concatenate((dy1, dy2))dz1 = np.linspace(0, -3, N_frames // 2)dz2 = np.linspace(-3, 0, N_frames - N_frames // 2)dz = np.concatenate((dz1, dz2))theta_x1 = np.linspace(-0, 0, N_frames // 2)theta_x2 = np.linspace(0, -0, N_frames - N_frames // 2)theta_x = np.concatenate((

（6）渲染和保存图像

遍历数据集中的每个样本，使用NeRF模型进行渲染，并将渲染结果保存为图像文件

    # 遍历数据集：for i in tqdm(range(len(dataset))):#  使用 tqdm库创建一个进度条，遍历数据集中的每个样本。# 获取样本数据：sample = dataset[i]rays = sample['rays']ts = sample['ts']# 从数据集中获取当前样本的光线（rays）和时间戳（ts）。# 处理测试集和外观编码：if args.split == 'test_test' and args.encode_a:whole_img = sample['whole_img'].unsqueeze(0).cuda()whole_img=(whole_img+1)/2kwargs['a_embedded_from_img'] = enc_a(whole_img)# 如果当前是测试集并且启用了外观编码，则对整个图像进行处理并生成外观嵌入。# 进行批量推理：results = batched_inference(models, embeddings, rays.cuda(), ts.cuda(),args.N_samples, args.N_importance, args.use_disp,args.chunk,dataset.white_back,**kwargs)# 调用 batched_inference函数进行批量推理，获取渲染结果。# 处理图像尺寸：if args.dataset_name == 'blender':w, h = args.img_whelse:w, h = sample['img_wh']# 根据数据集类型获取图像的宽度和高度。# 处理特征：feature=results['feature_fine'] #torch.Size([699008, 4])print("using fine feature")lastdim=feature.size(-1)feature = rearrange(feature, 'n1 n3 -> n3 n1', n3=lastdim)feature = rearrange(feature, ' n3 (h w) ->  1 n3 h w',  h=int(h), w=int(w),n3=lastdim)  ##torch.Size([1, 64, 340, 514])# 从渲染结果中获取精细特征，并重新排列其形状以匹配解码器输入格式。# 解码特征并生成RGB图像：rgbs_pred=models['decoder'](feature, kwargs['a_embedded_from_img'])rgbs_pred=rearrange(rgbs_pred, ' 1 n1 h w ->  (h w) n1',  h=int(h), w=int(w),n1=3)results['rgb_fine']=rgbs_pred.cpu()#保存渲染图象img_pred = np.clip(results['rgb_fine'].view(h, w, 3).detach().numpy(), 0, 1)img_pred_ = (img_pred*255).astype(np.uint8)imgs += [img_pred_]imageio.imwrite(os.path.join(dir_name, f'{i:03d}.png'), img_pred_)print("image saving path",os.path.join(dir_name, f'{i:03d}.png'))# 将渲染的RGB图像转换为NumPy数组，并保存为PNG文件。同时，将图像添加到 imgs列表中。if args.dataset_name == 'blender' or \(args.dataset_name == 'phototourism' and args.split == 'test'):imageio.mimsave(os.path.join(dir_name, f'{args.scene_name}.{args.video_format}'),imgs, fps=30)print('Done')