西安网站建设公司 云阔厦门哪家做网站好

当前位置: 首页 > news >正文

西安网站建设公司 云阔,厦门哪家做网站好,王也踏青,手机网站报价表【请尊重原创#xff01;转载和引用文章内容务必注明出处#xff01;未经许可上传到某文库或其他收费阅读/下载网站赚钱的必追究责任#xff01;】 无论是mmdetection3D还是OpenPCDet都只有使用数据集(使用哪个数据集由配置文件里指定)训练和测试的代码#xff0c;没有使用…【请尊重原创转载和引用文章内容务必注明出处未经许可上传到某文库或其他收费阅读/下载网站赚钱的必追究责任】 无论是mmdetection3D还是OpenPCDet都只有使用数据集(使用哪个数据集由配置文件里指定)训练和测试的代码没有使用某种数据集格式的单帧数据进行推理的代码(也就是一些2D目标检测框里里提供的推理demo代码)而OpenPCDet尤其是mmdetection里面为了支持多种不同的数据集和多种不同模型的训练和测试把很多实现步骤高度配置化这是好事也是坏事如果你只是使用这些框架(尤其是使用它们已支持的公开数据集)进行训练和测试那么相对简单如果是要使用这些框架对单帧数据(可以是他们支持的公开数据集里的数据或者遵循这些数据集的格式自己制作的数据集的数据)进行推理就比较复杂了框架没有提供工具代码只能自己去写写之前当然得把框架的相关代码翻看几遍把支持某个数据集和模型的配置文件和分散在多处的代码的逻辑理顺连贯起来不然没法下手因为框架高度配置化的话相关的代码就会非常分散在连贯性可读性方面很差需要花时间先理清楚然后把那些配置化的相关零散代码都整理并修改和串联起来完成数据的多步预处理、调用模型推理、对推理结果的解析和后处理的全部流程的实现。 用nuScense格式数据调用detr3d模型的一个最烦的地方就是你在调用模型推理时非得提供对应的img_metas数据模型的这个设计我觉得不是很合理起码不算友好对于硬件和相关标定参数定了后这种参数完全可以通过配置隐式提供给模型为何每次调用都得提供这些meta参数呢反正我写下面的推理代码之前是浪费了不少时间在厘清img_metas这个参数的数据该怎么准备上 此处已DETR3D最初的官方实现代码GitHub - WangYueFt/detr3d基础给出了相关数据预处理和模型调用及推理结果后处理等代码另外这些代码是用于在ROS2节点中运行解析出的推理结果还按项目实际要求转换成autoware的ObjectDetection数据格式后发布所以还包含了ROS2和autoware相关的代码这些代码不是重点仅供做代码逻辑完整性展示。当然如果你项目中有类似需要的话可以直接拷贝使用这些代码。 import rclpy from rclpy.node import Node from rclpy.executors import MultiThreadedExecutor from rclpy.callback_groups import MutuallyExclusiveCallbackGroup, ReentrantCallbackGroupfrom cv_bridge import CvBridge from sensor_msgs.msg import CompressedImage, Image from std_msgs.msg import String from autoware_auto_perception_msgs.msg import DetectedObjects from autoware_auto_perception_msgs.msg import DetectedObject from autoware_auto_perception_msgs.msg import ObjectClassification from autoware_auto_perception_msgs.msg import DetectedObjectKinematics from autoware_auto_perception_msgs.msg import Shapeimport numpy as np import mmcv import sys, os import torch import warnings from mmcv import Config, DictAction from mmcv.cnn import fuse_conv_bn from mmcv.parallel import MMDataParallel, MMDistributedDataParallel from mmcv.runner import (get_dist_info, init_dist, load_checkpoint,wrap_fp16_model)from mmdet3d.models import build_model from mmdet.apis import multi_gpu_test, set_random_seed from mmdet.datasets import replace_ImageToTensor from mmdet3d.core.bbox.structures.box_3d_mode import (Box3DMode, CameraInstance3DBoxes,DepthInstance3DBoxes, LiDARInstance3DBoxes)from nuscenes import NuScenes from pyquaternion import Quaternion from geometry_msgs.msg import Point from geometry_msgs.msg import Vector3 from trimesh.transformations import quaternion_from_euler from geometry_msgs.msg import Quaternion as GeoQuaternion from geometry_msgs.msg import Twist import math import timeclass DetectionPublisher(Node):def init(self):super().init(DetectionPublisherpython)self.publisher self.create_publisher(String, example_topic, 10)timer_period 0.5 # secondsself.timer self.create_timer(timer_period, self.timer_callback)self.i 0def timercallback(self):msg String()msg.data Hello World: %d % self.iself.publisher.publish(msg)self.get_logger().info(Publishing: %s % msg.data)self.i 1class PadMultiViewImage(object):Pad the multi-view image.There are two padding modes: (1) pad to a fixed size and (2) pad to theminimum size that is divisible by some number.Added keys are pad_shape, pad_fixed_size, pad_size_divisor,Args:size (tuple, optional): Fixed padding size.size_divisor (int, optional): The divisor of padded size.pad_val (float, optional): Padding value, 0 by default.def init(self, sizeNone, size_divisor32, pad_val0):self.size sizeself.size_divisor size_divisorself.pad_val pad_val# only one of size and size_divisor should be validassert size is not None or size_divisor is not Noneassert size is None or size_divisor is Nonedef _pad_img(self, results):Pad images according to self.size.if self.size is not None:padded_img [mmcv.impad(img, shapeself.size, pad_valself.pad_val) for img in results[img]]elif self.size_divisor is not None:padded_img [mmcv.impad_to_multiple(img, self.size_divisor, pad_valself.pad_val) for img in results[img]]results[img] padded_imgresults[img_shape] [img.shape for img in padded_img]results[pad_shape] [img.shape for img in padded_img]results[pad_fixed_size] self.sizeresults[pad_size_divisor] self.size_divisordef call(self, results):Call function to pad images, masks, semantic segmentation maps.Args:results (dict): Result dict from loading pipeline.Returns:dict: Updated result dict.self._pad_img(results)return resultsdef repr(self):repr_str self.class.namerepr_str f(size{self.size}, repr_str fsize_divisor{self.size_divisor}, repr_str fpad_val{self.pad_val})return repr_strclass NormalizeMultiviewImage(object):Normalize the image.Added key is img_norm_cfg.Args:mean (sequence): Mean values of 3 channels.std (sequence): Std values of 3 channels.to_rgb (bool): Whether to convert the image from BGR to RGB,default is true.def init(self, mean[103.530, 116.280, 123.675], std[1.0, 1.0, 1.0], to_rgbTrue):self.mean np.array(mean, dtypenp.float32)self.std np.array(std, dtypenp.float32)self.to_rgb to_rgbdef call(self, results):Call function to normalize images.Args:results (dict): Result dict from loading pipeline.Returns:dict: Normalized results, img_norm_cfg key is added intoresult dict.results[img] [mmcv.imnormalize(img, self.mean, self.std, self.to_rgb) for img in results[img]]results[img_norm_cfg] dict(meanself.mean, stdself.std, to_rgbself.to_rgb)return resultsdef repr(self):repr_str self.class.namerepr_str f(mean{self.mean}, std{self.std}, to_rgb{self.to_rgb})return repr_strclass LoadSingleViewImageFromFiles(object):Load multi channel images from a list of separate channel files.Expects results[img_filename] to be a list of filenames.Args:to_float32 (bool): Whether to convert the img to float32.Defaults to False.color_type (str): Color type of the file. Defaults to unchanged.def init(self, to_float32True, color_typeunchanged):self.to_float32 to_float32self.color_type color_typedef call(self, results):Call function to load multi-view image from files.Args:results (dict): Result dict containing multi-view image filenames.Returns:dict: The result dict containing the multi-view image data. \Added keys and values are described below.- filename (str): Multi-view image filenames.- img (np.ndarray): Multi-view image arrays.- img_shape (tuple[int]): Shape of multi-view image arrays.- ori_shape (tuple[int]): Shape of original image arrays.- pad_shape (tuple[int]): Shape of padded image arrays.- scale_factor (float): Scale factor.- img_norm_cfg (dict): Normalization configuration of images.results[filename] sample.jpg# h,w,c h, w, c, nv img np.stack([results[img]], axis-1)if self.to_float32:img img.astype(np.float32)results[img] [img[…, i] for i in range(img.shape[-1])]results[img_shape] img.shaperesults[ori_shape] img.shape# Set initial values for default meta_keysresults[pad_shape] img.shaperesults[scale_factor] 1.0num_channels 1 if len(img.shape) 3 else img.shape[2]results[img_norm_cfg] dict(meannp.zeros(num_channels, dtypenp.float32),stdnp.ones(num_channels, dtypenp.float32),to_rgbFalse)return resultsdef repr(self):str: Return a string that describes the module.repr_str self.class.namerepr_str f(to_float32{self.to_float32}, repr_str fcolor_type{self.color_type})return repr_strdef obtain_sensor2top(nusc,sensor_token,l2e_t,l2e_r_mat,e2g_t,e2g_r_mat,sensor_typelidar):Obtain the info with RT matric from general sensor to Top LiDAR.Args:nusc (class): Dataset class in the nuScenes dataset.sensor_token (str): Sample data token corresponding to thespecific sensor type.l2e_t (np.ndarray): Translation from lidar to ego in shape (1, 3).l2e_r_mat (np.ndarray): Rotation matrix from lidar to egoin shape (3, 3).e2g_t (np.ndarray): Translation from ego to global in shape (1, 3).e2g_r_mat (np.ndarray): Rotation matrix from ego to globalin shape (3, 3).sensor_type (str, optional): Sensor to calibrate. Default: lidar.Returns:sweep (dict): Sweep information after transformation.sd_rec nusc.get(sample_data, sensor_token)cs_record nusc.get(calibrated_sensor,sd_rec[calibrated_sensor_token])pose_record nusc.get(ego_pose, sd_rec[ego_pose_token])data_path str(nusc.get_sample_data_path(sd_rec[token]))if os.getcwd() in data_path: # path from lyftdataset is absolute pathdata_path data_path.split(f{os.getcwd()}/)[-1] # relative pathsweep {data_path: data_path,type: sensor_type,sample_data_token: sd_rec[token],sensor2ego_translation: cs_record[translation],sensor2ego_rotation: cs_record[rotation],ego2global_translation: pose_record[translation],ego2global_rotation: pose_record[rotation],timestamp: sd_rec[timestamp]}l2e_r_s sweep[sensor2ego_rotation]l2e_t_s sweep[sensor2ego_translation]e2g_r_s sweep[ego2global_rotation]e2g_t_s sweep[ego2global_translation]# obtain the RT from sensor to Top LiDAR# sweep-ego-global-ego-lidarl2e_r_s_mat Quaternion(l2e_r_s).rotation_matrixe2g_r_s_mat Quaternion(e2g_r_s).rotation_matrixR (l2e_r_s_mat.T e2g_r_s_mat.T) (np.linalg.inv(e2g_r_mat).T np.linalg.inv(l2e_r_mat).T)T (l2e_t_s e2g_r_s_mat.T e2g_t_s) (np.linalg.inv(e2g_r_mat).T np.linalg.inv(l2e_r_mat).T)T - e2g_t (np.linalg.inv(e2g_r_mat).T np.linalg.inv(l2e_r_mat).T) l2e_t np.linalg.inv(l2e_r_mat).Tsweep[sensor2lidar_rotation] R.T # points R.T Tsweep[sensor2lidar_translation] Treturn sweepdef getSemanticType(class_name):if (class_name CAR or class_name Car):return ObjectClassification.CARelif (class_name TRUCK or class_name Medium_Truck or class_name Big_Truck):return ObjectClassification.TRUCKelif (class_name BUS):return ObjectClassification.BUSelif (class_name TRAILER):return ObjectClassification.TRAILERelif (class_name BICYCLE):return ObjectClassification.BICYCLEelif (class_name MOTORBIKE):return ObjectClassification.MOTORCYCLEelif (class_name PEDESTRIAN or class_name Pedestrian):return ObjectClassification.PEDESTRIANelse: return ObjectClassification.UNKNOWNclass CustomBox3D(object):def init(self,nid,score,x,y,z,w,l,h,rt,vel_x,vel_y):self.id nidself.score scoreself.x xself.y yself.z zself.w wself.l lself.h hself.rt rtself.vel_x vel_xself.vel_y vel_ydef isCarLikeVehicle(label):return label ObjectClassification.BICYCLE or label ObjectClassification.BUS or \label ObjectClassification.CAR or label ObjectClassification.MOTORCYCLE or \label ObjectClassification.TRAILER or label ObjectClassification.TRUCK def createPoint(x, y, z):p Point()p.x float(x)p.y float(y)p.z float(z)return pdef createQuaternionFromYaw(yaw):# tf2.Quaternion# q.setRPY(0, 0, yaw)q quaternion_from_euler(0, 0, yaw)# geometry_msgs.msg.Quaternion#return tf2.toMsg(q)#return GeoQuaternion(*q)return GeoQuaternion(xq[0],yq[1],zq[2],wq[3])def createTranslation(x, y, z):v Vector3()v.x float(x)v.y float(y)v.z float(z)return vdef box3DToDetectedObject(box3d, class_names, has_twist, is_sign):obj DetectedObject()obj.existence_probability float(box3d.score)classification ObjectClassification()classification.probability 1.0if (box3d.id 0 and box3d.id len(class_names)):classification.label getSemanticType(class_names[box3d.id])else: if is_sign:sign_label 255classification.label sign_labelelse:classification.label ObjectClassification.UNKNOWNprint(Unexpected label: UNKNOWN is set.)if (isCarLikeVehicle(classification.label)):obj.kinematics.orientation_availability DetectedObjectKinematics.SIGN_UNKNOWNobj.classification.append(classification)# pose and shape# mmdet3d yaw format to ros yaw formatyaw -box3d.rt - np.pi / 2obj.kinematics.pose_with_covariance.pose.position createPoint(box3d.x, box3d.y, box3d.z)obj.kinematics.pose_with_covariance.pose.orientation createQuaternionFromYaw(yaw)obj.shape.type Shape.BOUNDING_BOXobj.shape.dimensions createTranslation(box3d.l, box3d.w, box3d.h)# twistif (has_twist):vel_x float(box3d.vel_x)vel_y float(box3d.vel_y)twist Twist()twist.linear.x math.sqrt(pow(vel_x, 2) pow(vel_y, 2))twist.angular.z 2 * (math.atan2(vel_y, vel_x) - yaw)obj.kinematics.twist_with_covariance.twist twistobj.kinematics.has_twist has_twistreturn obj class ImageSubscriber(Node):def init(self):super().init(ImageSubscriber_python)cb_group MutuallyExclusiveCallbackGroup()self.img_sub self.create_subscription(CompressedImage,pub_image/compressed,self.image_callback,10,callback_groupcb_group)self.img_subself.od_pub self.create_publisher(DetectedObjects, pub_detection, 10)self.cvBridge CvBridge()self.pad PadMultiViewImage()self.norm NormalizeMultiviewImage()self.file_loader LoadSingleViewImageFromFiles() config_path ./detr3d_res101_gridmask_wst.pyself.cfg Config.fromfile(config_path)if self.cfg.get(custom_imports, None):from mmcv.utils import import_modules_from_stringsimport_modules_from_strings(**self.cfg[custom_imports])if hasattr(self.cfg, plugin):if self.cfg.plugin:import importlibif hasattr(self.cfg, plugin_dir):plugin_dir self.cfg.plugin_dir_module_dir os.path.dirname(plugin_dir)_module_dir _module_dir.split(/)_module_path _module_dir[0]for m in _module_dir[1:]:_module_path _module_path . mprint(_module_path)print(sys.path)plg_lib importlib.import_module(_module_path)if self.cfg.get(cudnn_benchmark, False):torch.backends.cudnn.benchmark Trueself.cfg.model.pretrained Noneself.cfg.model.train_cfg Noneself.model build_model(self.cfg.model, test_cfgself.cfg.get(test_cfg))fp16_cfg self.cfg.get(fp16, None)if fp16_cfg is not None:wrap_fp16_model(self.model)checkpoint load_checkpoint(self.model, epoch_200.pth, map_locationcpu)if CLASSES in checkpoint.get(meta, {}):self.model.CLASSES checkpoint[meta][CLASSES]else:self.model.CLASSS (car, truck, trailer, bus, construction_vehicle,bicycle, motorcycle, pedestrian, traffic_cone,barrier)# palette for visualization in segmentation tasksif PALETTE in checkpoint.get(meta, {}):self.model.PALETTE checkpoint[meta][PALETTE]self.model.cfg self.cfg self.model.cuda()self.model.eval()#if torch.cuda.device_count() 1: # for server side# self.model nn.DataParallel(self.model)print(model is created!)nusc NuScenes(versionv1.0-mini, datarootnuscenes_mini, verboseTrue)scene0 nusc.scene[0]first_sample_token scene0[first_sample_token]first_sample nusc.get(sample, first_sample_token)sd_rec nusc.get(sample_data, first_sample[data][LIDAR_TOP])cs_record nusc.get(calibrated_sensor,sd_rec[calibrated_sensor_token])pose_record nusc.get(ego_pose, sd_rec[ego_pose_token])lidar_token first_sample[data][LIDAR_TOP]lidar_path, boxes, _ nusc.get_sample_data(lidar_token)info {lidar_path: lidar_path,token: first_sample[token],sweeps: [],cams: dict(),lidar2ego_translation: cs_record[translation],lidar2ego_rotation: cs_record[rotation],ego2global_translation: pose_record[translation],ego2global_rotation: pose_record[rotation],timestamp: first_sample[timestamp],}l2e_r info[lidar2ego_rotation]l2e_t info[lidar2ego_translation]e2g_r info[ego2global_rotation]e2g_t info[ego2global_translation]l2e_r_mat Quaternion(l2e_r).rotation_matrixe2g_r_mat Quaternion(e2g_r).rotation_matrixcamera_types [CAM_FRONT,CAM_FRONT_RIGHT,CAM_FRONT_LEFT,CAM_BACK,CAM_BACK_LEFT,CAM_BACK_RIGHT,]for cam in camera_types:cam_token first_sample[data][cam]cam_path, _, cam_intrinsic nusc.get_sample_data(cam_token)cam_info obtain_sensor2top(nusc, cam_token, l2e_t, l2e_r_mat,e2g_t, e2g_r_mat, cam)cam_info.update(cam_intrinsiccam_intrinsic)info[cams].update({cam: cam_info})cam_front_sample_data nusc.get(sample_data, first_sample[data][CAM_FRONT])cam_front_sample_path os.path.join(nusc.dataroot, cam_front_sample_data[filename])print(sample image file:, cam_front_sample_path)cam_front_calibrate nusc.get(calibrated_sensor, cam_front_sample_data[calibrated_sensor_token])sensor2lidar translation np.expand_dims(np.array(cam_front_calibrate[translation]), axis-1)sensor2lidar_rotation np.expand_dims(np.array(cam_front_calibrate[rotation]), axis-1)camera_intrinsic np.array(cam_front_calibrate[camera_intrinsic])image_paths []lidar2img_rts []lidar2cam_rts []cam_intrinsics []for cam_type, cam_info in info[cams].items():image_paths.append(cam_info[data_path])# obtain lidar to image transformation matrixlidar2cam_r np.linalg.inv(cam_info[sensor2lidar_rotation])lidar2cam_t cam_info[sensor2lidar_translation] lidar2cam_r.Tlidar2cam_rt np.eye(4)lidar2cam_rt[:3, :3] lidar2cam_r.Tlidar2cam_rt[3, :3] -lidar2cam_tintrinsic cam_info[cam_intrinsic]viewpad np.eye(4)viewpad[:intrinsic.shape[0], :intrinsic.shape[1]] intrinsiclidar2img_rt (viewpad lidar2cam_rt.T)lidar2img_rts.append(lidar2img_rt)cam_intrinsics.append(viewpad)lidar2cam_rts.append(lidar2cam_rt.T)self.img_metas {}self.img_metas.update(dict(img_filenameimage_paths,lidar2imglidar2img_rts,cam_intrinsiccam_intrinsics,lidar2camlidar2cam_rts,))self.classnames self.cfg.class_namesprint(ImageSubscriber init done)def image_callback(self, msg):#image self.cvBridge.imgmsg_to_cv2(msg, bgr8)image self.cvBridge.compressed_imgmsg_to_cv2(msg, bgr8)#print(image received, shape:, image.shape)results {img: image}self.file_loader(results)self.norm(results)self.pad(results)image results[img][0]meta {filename: results[filename], img_shape: results[img_shape],ori_shape: results[ori_shape],pad_shape: results[pad_shape],scale_factor: results[scale_factor],box_type_3d: LiDARInstance3DBoxes, #CameraInstance3DBoxes/LiDARInstance3DBoxesbox_mode_3d: Box3DMode.LIDAR}meta.update(self.img_metas)#print(meta:, meta)img_metas [[meta]]inputs torch.tensor(image).to(cuda)# h,w,c bs,nv,c,h,winputs inputs.permute(2,0,1)inputs torch.unsqueeze(inputs, 0)inputs torch.unsqueeze(inputs, 0)#print(input tensor shape:, inputs.shape)with torch.no_grad():outputs self.model(return_lossFalse, rescaleTrue, imginputs, img_metasimg_metas) torch.cuda.synchronize()pts_bbox outputs[0][pts_bbox]boxes_3d_enc pts_bbox[boxes_3d]scores_3d pts_bbox[scores_3d]labels_3d pts_bbox[labels_3d]filter scores_3d 0.5boxes_3d_enc.tensor boxes_3d_enc.tensor[filter]boxes_3d boxes_3d_enc.tensor.numpy() # [[cx, cy, cz, w, l, h, rot, vx, vy]]scores_3d scores_3d[filter].numpy()labels_3d labels_3d[filter].numpy()custom_boxes_3d []for i, box_3d in enumerate(boxes_3d):box3d CustomBox3D(labels_3d[i], scores_3d[i],box_3d[0],box_3d[1],box_3d[2],box_3d[3],box_3d[4],box_3d[5],box_3d[6],box_3d[7],box_3d[8])custom_boxes_3d.append(box3d)#print(boxes_3d, boxes_3d)#print(scores_3d, scores_3d)#print(labels_3d, labels_3d)output_msg DetectedObjects()obj_num len(boxes_3d)for i, box3d in enumerate(custom_boxes_3d):obj box3DToDetectedObject(box3d, self.classnames, True, False);output_msg.objects.append(obj)output_msg.header.stamp self.get_clock().now().to_msg() #rclpy.time.Time()output_msg.header.frame_id base_linkself.od_pub.publish(output_msg)print(obj_num, Objects published)def main(argsNone):rclpy.init(argsargs)sys.path.append(os.getcwd())image_subscriber ImageSubscriber()executor MultiThreadedExecutor()executor.add_node(image_subscriber)executor.spin()image_subscriber.destroy_node()detection_publisher.destroy_node()rclpy.shutdown()if name main:main() 这里说的单帧数据可以是单张图片或者nuScenes格式的6张环视图片之所以说一帧是对应点云的概念实际做3D目标检测尤其是雷视融合推理时一般都是以点云帧的频率为时间单位进行推理(在获取一帧点云的同时获取一张或多张图片(取决于有几个摄像头)前融合的话是将点云和图像的数据做数据融合后输入模型或直接输入模型由模型内部抽取特征做特征融合后融合的话是将点云和图像分别输入不同的模型推理再对模型各自的结果进行融合处理)。 上面的代码由于是以DETR3D官方代码为基础的所以和mmdetection3D后来集成DETR3D的实现代码不同对比两者代码可以发现mmdetection3D将DETR3D官方代码里的一些数据预处理环节的实现代码都移走了在模型内部单独提供了一个Det3DDataPreprocessor类来进行一些公共处理总体的配置文件和实现思路还是比较类似毕竟DETR3D官方是在mmdetection3D的早期版本基础上开发的例如两者的detr3d_r101_gridmask.py配置文件里面有些配置不同但是思路是相似的 mmdetection3d/projects/DETR3D/configs/detr3d_r101_gridmask.py model dict(typeDETR3D,use_grid_maskTrue,data_preprocessordict(typeDet3DDataPreprocessor, **img_norm_cfg, pad_size_divisor32),img_backbonedict(typemmdet.ResNet,depth101,num_stages4,out_indices(0, 1, 2, 3),frozen_stages1,norm_cfgdict(typeBN2d, requires_gradFalse),norm_evalTrue,stylecaffe,dcndict(typeDCNv2, deform_groups1, fallback_on_strideFalse),stage_with_dcn(False, False, True, True)),img_neckdict(typemmdet.FPN,in_channels[256, 512, 1024, 2048],out_channels256,start_level1,add_extra_convson_output,num_outs4,relu_before_extra_convsTrue),pts_bbox_headdict(typeDETR3DHead,num_query900,num_classes10,in_channels256,sync_cls_avg_factorTrue,with_box_refineTrue,as_two_stageFalse,transformerdict(typeDetr3DTransformer,decoderdict(typeDetr3DTransformerDecoder,num_layers6,return_intermediateTrue,transformerlayersdict(typemmdet.DetrTransformerDecoderLayer,attn_cfgs[dict(typeMultiheadAttention, # mmcv.embed_dims256,num_heads8,dropout0.1),dict(typeDetr3DCrossAtten,pc_rangepoint_cloud_range,num_points1,embed_dims256)],feedforward_channels512,ffn_dropout0.1,operation_order(self_attn, norm, cross_attn, norm,ffn, norm)))),bbox_coderdict(typeNMSFreeCoder,post_center_range[-61.2, -61.2, -10.0, 61.2, 61.2, 10.0],pc_rangepoint_cloud_range,max_num300,voxel_sizevoxel_size,num_classes10),positional_encodingdict(typemmdet.SinePositionalEncoding,num_feats128,normalizeTrue,offset-0.5),loss_clsdict(typemmdet.FocalLoss,use_sigmoidTrue,gamma2.0,alpha0.25,loss_weight2.0),loss_bboxdict(typemmdet.L1Loss, loss_weight0.25),loss_ioudict(typemmdet.GIoULoss, loss_weight0.0)),# model training and testing settingstrain_cfgdict(ptsdict(grid_size[512, 512, 1],voxel_sizevoxel_size,point_cloud_rangepoint_cloud_range,out_size_factor4,assignerdict(typeHungarianAssigner3D,cls_costdict(typemmdet.FocalLossCost, weight2.0),reg_costdict(typeBBox3DL1Cost, weight0.25),# ↓ Fake cost. This is just to get compatible with DETR headiou_costdict(typemmdet.IoUCost, weight0.0),pc_rangepoint_cloud_range))))dataset_type NuScenesDataset data_root data/nuscenes/test_transforms [dict(typeRandomResize3D,scale(1600, 900),ratio_range(1., 1.),keep_ratioTrue) ] train_transforms [dict(typePhotoMetricDistortion3D)] test_transformsbackend_args None train_pipeline [dict(typeLoadMultiViewImageFromFiles,to_float32True,num_views6,backend_argsbackend_args),dict(typeLoadAnnotations3D,with_bbox_3dTrue,with_label_3dTrue,with_attr_labelFalse),dict(typeMultiViewWrapper, transformstrain_transforms),dict(typeObjectRangeFilter, point_cloud_rangepoint_cloud_range),dict(typeObjectNameFilter, classesclass_names),dict(typePack3DDetInputs, keys[img, gt_bboxes_3d, gt_labels_3d]) ]test_pipeline [dict(typeLoadMultiViewImageFromFiles,to_float32True,num_views6,backend_argsbackend_args),dict(typeMultiViewWrapper, transformstest_transforms),dict(typePack3DDetInputs, keys[img]) ] detr3d/projects/configs/detr3d/detr3d_res101_gridmask.py model dict(typeDetr3D,use_grid_maskTrue,img_backbonedict(typeResNet,depth101,num_stages4,out_indices(0, 1, 2, 3),frozen_stages1,norm_cfgdict(typeBN2d, requires_gradFalse),norm_evalTrue,stylecaffe,dcndict(typeDCNv2, deform_groups1, fallback_on_strideFalse),stage_with_dcn(False, False, True, True)),img_neckdict(typeFPN,in_channels[256, 512, 1024, 2048],out_channels256,start_level1,add_extra_convson_output,num_outs4,relu_before_extra_convsTrue),pts_bbox_headdict(typeDetr3DHead,num_query900,num_classes10,in_channels256,sync_cls_avg_factorTrue,with_box_refineTrue,as_two_stageFalse,transformerdict(typeDetr3DTransformer,decoderdict(typeDetr3DTransformerDecoder,num_layers6,return_intermediateTrue,transformerlayersdict(typeDetrTransformerDecoderLayer,attn_cfgs[dict(typeMultiheadAttention,embed_dims256,num_heads8,dropout0.1),dict(typeDetr3DCrossAtten,pc_rangepoint_cloud_range,num_points1,embed_dims256)],feedforward_channels512,ffn_dropout0.1,operation_order(self_attn, norm, cross_attn, norm,ffn, norm)))),bbox_coderdict(typeNMSFreeCoder,post_center_range[-61.2, -61.2, -10.0, 61.2, 61.2, 10.0],pc_rangepoint_cloud_range,max_num300,voxel_sizevoxel_size,num_classes10), positional_encodingdict(typeSinePositionalEncoding,num_feats128,normalizeTrue,offset-0.5),loss_clsdict(typeFocalLoss,use_sigmoidTrue,gamma2.0,alpha0.25,loss_weight2.0),loss_bboxdict(typeL1Loss, loss_weight0.25),loss_ioudict(typeGIoULoss, loss_weight0.0)),# model training and testing settingstrain_cfgdict(ptsdict(grid_size[512, 512, 1],voxel_sizevoxel_size,point_cloud_rangepoint_cloud_range,out_size_factor4,assignerdict(typeHungarianAssigner3D,cls_costdict(typeFocalLossCost, weight2.0),reg_costdict(typeBBox3DL1Cost, weight0.25),iou_costdict(typeIoUCost, weight0.0), # Fake cost. This is just to make it compatible with DETR head. pc_rangepoint_cloud_range))))dataset_type NuScenesDataset data_root data/nuscenes/…train_pipeline [dict(typeLoadMultiViewImageFromFiles, to_float32True),dict(typePhotoMetricDistortionMultiViewImage),dict(typeLoadAnnotations3D, with_bbox_3dTrue, with_label_3dTrue, with_attr_labelFalse),dict(typeObjectRangeFilter, point_cloud_rangepoint_cloud_range),dict(typeObjectNameFilter, classesclass_names),dict(typeNormalizeMultiviewImage, **img_norm_cfg),dict(typePadMultiViewImage, size_divisor32),dict(typeDefaultFormatBundle3D, class_namesclass_names),dict(typeCollect3D, keys[gt_bboxes_3d, gt_labels_3d, img]) ] test_pipeline [dict(typeLoadMultiViewImageFromFiles, to_float32True),dict(typeNormalizeMultiviewImage, **img_norm_cfg),dict(typePadMultiViewImage, size_divisor32),dict(typeMultiScaleFlipAug3D,img_scale(1333, 800),pts_scale_ratio1,flipFalse,transforms[dict(typeDefaultFormatBundle3D,class_namesclass_names,with_labelFalse),dict(typeCollect3D, keys[img])]) ]