CR-NeRF 代码eval.py解析
这段代码是一个用于CR-NeRF(Neural Radiance Fields)模型的推理脚本。它主要用于生成和保存渲染的图像,并计算图像质量的评价指标(如PSNR和SSIM)。以下是对这段代码的详细解析:
(1)导入了所需的库和模块
包括PyTorch、NumPy、tqdm(用于进度条)、imageio(用于图像保存)、以及其他自定义模块和函数。
import torch
import os
import numpy as np
from collections import defaultdict
from tqdm import tqdm
import imageio
from argparse import ArgumentParserfrom models.rendering import render_rays_cross_ray
from models.nerf import *
from models.nerf_decoder_stylenerf import get_renderer
from utils import load_ckpt
import metrics
from einops import rearrange
from datasets import dataset_dict
from datasets.depth_utils import *
from models.linearStyleTransfer import encoder3, encoder_sameoutputsize
from models.networks import E_attr
from math import sqrt
import math
import json
from PIL import Image
from torchvision import transforms as T
from opt import get_opts
from train_mask_grid_sample import get_model
torch.backends.cudnn.benchmark = True
(2)定义函数
batched_inference(), 这个函数用于对光线进行批量推理。它将光线分成小块进行处理,以避免内存不足的问题。
from collections import defaultdict
import torchdef batched_inference(models, embeddings,rays, ts, N_samples, N_importance, use_disp,chunk,white_back,**kwargs):"""对光线进行批量推理。参数:models: 包含模型(如粗略模型和精细模型)的字典。embeddings: 包含位置和方向嵌入的字典。rays: 光线数据,形状为 [B, 6],其中 B 是光线的数量。ts: 时间戳数据,形状为 [B]。N_samples: 每条光线的样本数量。N_importance: 重要性采样的样本数量。use_disp: 是否使用视差。chunk: 每个小块的大小。white_back: 背景是否为白色。**kwargs: 其他关键字参数。返回:包含渲染结果的字典。"""B = rays.shape[0] # 光线的总数results = defaultdict(list) # 用于存储每个键的结果列表# 循环处理每个小块的光线for i in range(0, B, chunk):rendered_ray_chunks = \render_rays_cross_ray(models,embeddings,rays[i:i+chunk], # 当前小块的光线ts[i:i+chunk] if ts is not None else None, # 当前小块的时间戳N_samples,use_disp,0,0,N_importance,chunk,white_back,test_time=True,**kwargs)# 将渲染结果中的每个键值对添加到 results 字典中for k, v in rendered_ray_chunks.items():results[k] += [v]# 将 results 字典中的每个键的结果列表合并成一个张量for k, v in results.items():results[k] = torch.cat(v, 0)return results # 返回合并后的结果字典
定义函数eulerAnglesToRotationMatrix(),这个函数用于将欧拉角转换为旋转矩阵。
def eulerAnglesToRotationMatrix(theta):R_x = np.array([[1, 0, 0 ],[0, math.cos(theta[0]), -math.sin(theta[0]) ],[0, math.sin(theta[0]), math.cos(theta[0]) ]])R_y = np.array([[math.cos(theta[1]), 0, math.sin(theta[1]) ],[0, 1, 0 ],[-math.sin(theta[1]), 0, math.cos(theta[1]) ]])R_z = np.array([[math.cos(theta[2]), -math.sin(theta[2]), 0],[math.sin(theta[2]), math.cos(theta[2]), 0],[0, 0, 1]])R = np.dot(R_z, np.dot( R_y, R_x ))return R
(3)主程序
这段代码是主程序的开始部分,主要负责初始化参数、加载数据集、定义嵌入和编码器等。
if __name__ == "__main__":# 检查是否是主程序入口args = get_opts()# 获取命令行参数,存储在args对象中kwargs = {'root_dir': args.root_dir, 'split': args.split}# 初始化关键字参数字典,包含根目录和数据集分割信息if args.dataset_name == 'blender':# 如果数据集名称是'blender'kwargs['img_wh'] = tuple(args.img_wh)# 添加图像宽度和高度到关键字参数字典else:# 否则kwargs['img_downscale'] = args.img_downscalekwargs['use_cache'] = args.use_cache# 添加图像降采样因子和是否使用缓存到关键字参数字典dataset = dataset_dict[args.dataset_name](args=args, **kwargs)# 根据数据集名称初始化数据集对象scene = os.path.basename(args.root_dir.strip('/'))# 获取场景名称,即根目录的最后一个部分embedding_xyz = PosEmbedding(args.N_emb_xyz-1, args.N_emb_xyz)embedding_dir = PosEmbedding(args.N_emb_dir-1, args.N_emb_dir)# 初始化位置编码和方向编码对象embeddings = {'xyz': embedding_xyz, 'dir': embedding_dir}# 将位置编码和方向编码存储在字典中if args.encode_a:# 如果启用了外观编码enc_a = encoder_sameoutputsize(out_channel=args.nerf_out_dim).cuda()# 初始化外观编码器并将其移动到GPUload_ckpt(enc_a, args.ckpt_path, model_name='enc_a')# 从检查点文件加载外观编码器的权重kwargs = {}# 重置关键字参数字典if args.dataset_name == 'blender':# 如果数据集名称是'blender'with open(os.path.join(args.root_dir, f"transforms_train.json"), 'r') as f:meta_train = json.load(f)# 读取训练数据的变换信息frame = meta_train['frames'][0]# 获取第一帧的信息image_path = os.path.join(args.root_dir, f"{frame['file_path']}.png")# 构建图像文件路径img = Image.open(image_path)img = img.resize(args.img_wh, Image.LANCZOS)# 打开图像并调整大小toTensor = T.ToTensor()normalize = T.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])# 初始化图像转换工具img = toTensor(img) # (4, h, w)img = img[:3, :, :]*img[-1:, :, :] + (1-img[-1:, :, :]) # blend A to RGB (3, h, w)# 将图像转换为Tensor并进行预处理whole_img = normalize(img).unsqueeze(0).cuda()# 归一化图像并将其移动到GPUkwargs['a_embedded_from_img'] = enc_a(whole_img)# 使用外观编码器对图像进行编码,并将结果存储在关键字参数字典中
(4)模型加载和初始化
这段代码加载了NeRF模型和解码器,并从checkpoints文件中恢复它们的权重。
models=get_model(args)
nerf_coarse=models['coarse']
nerf_fine=models['fine']
decoder=models['decoder']
load_ckpt(nerf_coarse, args.ckpt_path, model_name='nerf_coarse')
load_ckpt(nerf_fine, args.ckpt_path, model_name='nerf_fine')
load_ckpt(decoder, args.ckpt_path, model_name='decoder')
(5)数据集预处理
为不同场景中的场景进行特定的预处理,包括图像的读取、下采样、归一化以及相机姿态的生成。每个场景有其特定的处理逻辑以确保测试数据的一致性和合理性。
# 初始化保存图像和度量结果的列表
imgs, psnrs, ssims = [], [], []# 设置结果保存目录并创建该目录
dir_name = os.path.join(args.save_dir, f'results/{args.dataset_name}/{args.scene_name}')
os.makedirs(dir_name, exist_ok=True)# 设置 kwargs 参数
kwargs['args']=args# 如果数据集是 phototourism 且数据划分为测试集,进行特定处理
if args.dataset_name == 'phototourism' and args.split == 'test':# 定义测试图像的宽度和高度dataset.test_img_w, dataset.test_img_h = args.img_wh# 计算焦距,定义相机内参 (fov=60 degrees)dataset.test_focal = dataset.test_img_w / 2 / np.tan(np.pi/6)dataset.test_K = np.array([[dataset.test_focal, 0, dataset.test_img_w / 2],[0, dataset.test_focal, dataset.test_img_h / 2],[0, 0, 1]])# 根据不同的场景进行不同的处理if scene == 'brandenburg_gate':# 选择特定图像作为外观嵌入img = Image.open(os.path.join(args.root_dir, 'dense/images', dataset.image_paths[dataset.img_ids_train[314]])).convert('RGB')img_downscale = 8img_w, img_h = img.sizeimg_w = img_w // img_downscaleimg_h = img_h // img_downscaleimg = img.resize((img_w, img_h), Image.LANCZOS)# 对图像进行归一化和转换为张量toTensor = T.ToTensor()normalize = T.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])img = toTensor(img)whole_img = normalize(img).unsqueeze(0).cuda()kwargs['a_embedded_from_img'] = enc_a(whole_img)dataset.test_appearance_idx = 314N_frames = 30 * 8# 定义相机的轨迹变换参数dx1 = np.linspace(-0.25, 0.25, N_frames)dx2 = np.linspace(0.25, 0.38, N_frames - N_frames // 2)dx = np.concatenate((dx1, dx2))dy1 = np.linspace(0.05, -0.1, N_frames // 2)dy2 = np.linspace(-0.1, 0.05, N_frames - N_frames // 2)dy = np.concatenate((dy1, dy2))dz1 = np.linspace(0.1, 0.3, N_frames // 2)dz2 = np.linspace(0.3, 0.1, N_frames - N_frames // 2)dz = np.concatenate((dz1, dz2))theta_x1 = np.linspace(math.pi / 30, 0, N_frames // 2)theta_x2 = np.linspace(0, math.pi / 30, N_frames - N_frames // 2)theta_x = np.concatenate((theta_x1, theta_x2))theta_y = np.linspace(math.pi / 10, -math.pi / 10, N_frames)theta_z = np.linspace(0, 0, N_frames)# 复制初始的相机姿态并在每一帧上应用变换dataset.poses_test = np.tile(dataset.poses_dict[1123], (N_frames, 1, 1))for i in range(N_frames):dataset.poses_test[i, 0, 3] += dx[i]dataset.poses_test[i, 1, 3] += dy[i]dataset.poses_test[i, 2, 3] += dz[i]dataset.poses_test[i, :, :3] = np.dot(eulerAnglesToRotationMatrix([theta_x[i],theta_y[i],theta_z[i]]), dataset.poses_test[i, :, :3])elif scene == 'trevi_fountain':# 选择特定图像作为外观嵌入img = Image.open(os.path.join(args.root_dir, 'dense/images', dataset.image_paths[dataset.img_ids_train[1548]])).convert('RGB')img_downscale = 8img_w, img_h = img.sizeimg_w = img_w // img_downscaleimg_h = img_h // img_downscaleimg = img.resize((img_w, img_h), Image.LANCZOS)# 对图像进行归一化和转换为张量toTensor = T.ToTensor()normalize = T.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])img = toTensor(img)whole_img = normalize(img).unsqueeze(0).cuda()kwargs['a_embedded_from_img'] = enc_a(whole_img)dataset.test_appearance_idx = dataset.img_ids_train[1548]N_frames = 30 * 8# 定义相机的轨迹变换参数dx = np.linspace(-0.8, 0.7, N_frames)dy1 = np.linspace(-0., 0.05, N_frames // 2)dy2 = np.linspace(0.05, -0., N_frames - N_frames // 2)dy = np.concatenate((dy1, dy2))dz1 = np.linspace(0.4, 0.1, N_frames // 4)dz2 = np.linspace(0.1, 0.5, N_frames // 4)dz3 = np.linspace(0.5, 0.1, N_frames // 4)dz4 = np.linspace(0.1, 0.4, N_frames - 3 * (N_frames // 4))dz = np.concatenate((dz1, dz2, dz3, dz4))theta_x1 = np.linspace(-0, 0, N_frames // 2)theta_x2 = np.linspace(0, -0, N_frames - N_frames // 2)theta_x = np.concatenate((theta_x1, theta_x2))theta_y = np.linspace(math.pi / 6, -math.pi / 6, N_frames)theta_z = np.linspace(0, 0, N_frames)# 复制初始的相机姿态并在每一帧上应用变换dataset.poses_test = np.tile(dataset.poses_dict[dataset.img_ids_train[1548]], (N_frames, 1, 1))for i in range(N_frames):dataset.poses_test[i, 0, 3] += dx[i]dataset.poses_test[i, 1, 3] += dy[i]dataset.poses_test[i, 2, 3] += dz[i]dataset.poses_test[i, :, :3] = np.dot(eulerAnglesToRotationMatrix([theta_x[i],theta_y[i],theta_z[i]]), dataset.poses_test[i, :, :3])elif scene == 'sacre_coeur':# 选择特定图像作为外观嵌入img = Image.open(os.path.join(args.root_dir, 'dense/images', dataset.image_paths[dataset.img_ids_train[58]])).convert('RGB')img_downscale = 8img_w, img_h = img.sizeimg_w = img_w // img_downscaleimg_h = img_h // img_downscaleimg = img.resize((img_w, img_h), Image.LANCZOS)# 对图像进行归一化和转换为张量toTensor = T.ToTensor()normalize = T.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])img = toTensor(img)whole_img = normalize(img).unsqueeze(0).cuda()kwargs['a_embedded_from_img'] = enc_a(whole_img)dataset.test_appearance_idx = dataset.img_ids_train[58]N_frames = 30 * 8# 定义相机的轨迹变换参数dx = np.linspace(-2, 2, N_frames)dy1 = np.linspace(-0., 2, N_frames // 2)dy2 = np.linspace(2, -0., N_frames - N_frames // 2)dy = np.concatenate((dy1, dy2))dz1 = np.linspace(0, -3, N_frames // 2)dz2 = np.linspace(-3, 0, N_frames - N_frames // 2)dz = np.concatenate((dz1, dz2))theta_x1 = np.linspace(-0, 0, N_frames // 2)theta_x2 = np.linspace(0, -0, N_frames - N_frames // 2)theta_x = np.concatenate((
(6)渲染和保存图像
遍历数据集中的每个样本,使用NeRF模型进行渲染,并将渲染结果保存为图像文件
# 遍历数据集:for i in tqdm(range(len(dataset))):# 使用 tqdm库创建一个进度条,遍历数据集中的每个样本。# 获取样本数据:sample = dataset[i]rays = sample['rays']ts = sample['ts']# 从数据集中获取当前样本的光线(rays)和时间戳(ts)。# 处理测试集和外观编码:if args.split == 'test_test' and args.encode_a:whole_img = sample['whole_img'].unsqueeze(0).cuda()whole_img=(whole_img+1)/2kwargs['a_embedded_from_img'] = enc_a(whole_img)# 如果当前是测试集并且启用了外观编码,则对整个图像进行处理并生成外观嵌入。# 进行批量推理:results = batched_inference(models, embeddings, rays.cuda(), ts.cuda(),args.N_samples, args.N_importance, args.use_disp,args.chunk,dataset.white_back,**kwargs)# 调用 batched_inference函数进行批量推理,获取渲染结果。# 处理图像尺寸:if args.dataset_name == 'blender':w, h = args.img_whelse:w, h = sample['img_wh']# 根据数据集类型获取图像的宽度和高度。# 处理特征:feature=results['feature_fine'] #torch.Size([699008, 4])print("using fine feature")lastdim=feature.size(-1)feature = rearrange(feature, 'n1 n3 -> n3 n1', n3=lastdim)feature = rearrange(feature, ' n3 (h w) -> 1 n3 h w', h=int(h), w=int(w),n3=lastdim) ##torch.Size([1, 64, 340, 514])# 从渲染结果中获取精细特征,并重新排列其形状以匹配解码器输入格式。# 解码特征并生成RGB图像:rgbs_pred=models['decoder'](feature, kwargs['a_embedded_from_img'])rgbs_pred=rearrange(rgbs_pred, ' 1 n1 h w -> (h w) n1', h=int(h), w=int(w),n1=3)results['rgb_fine']=rgbs_pred.cpu()#保存渲染图象img_pred = np.clip(results['rgb_fine'].view(h, w, 3).detach().numpy(), 0, 1)img_pred_ = (img_pred*255).astype(np.uint8)imgs += [img_pred_]imageio.imwrite(os.path.join(dir_name, f'{i:03d}.png'), img_pred_)print("image saving path",os.path.join(dir_name, f'{i:03d}.png'))# 将渲染的RGB图像转换为NumPy数组,并保存为PNG文件。同时,将图像添加到 imgs列表中。if args.dataset_name == 'blender' or \(args.dataset_name == 'phototourism' and args.split == 'test'):imageio.mimsave(os.path.join(dir_name, f'{args.scene_name}.{args.video_format}'),imgs, fps=30)print('Done')



