Overview
The DNN (Deep Neural Networks) module provides:
Loading models from popular frameworks (TensorFlow, PyTorch, ONNX, Caffe, Darknet)
Forward inference (no training)
Multiple backend support (CPU, OpenCL, CUDA)
Pre-trained model zoo
The DNN module is for inference only . For training, use frameworks like TensorFlow or PyTorch.
Quick Start
#include <opencv2/dnn.hpp>
using namespace cv :: dnn ;
// Load model
Net net = readNet ( "model.onnx" );
// Prepare input
Mat blob = blobFromImage (img, 1.0 / 255 , Size ( 224 , 224 ),
Scalar (), true , false );
// Set input
net . setInput (blob);
// Forward pass
Mat output = net . forward ();
Net Class
Loading Models
// From ONNX
Net net = readNetFromONNX ( "model.onnx" );
// From TensorFlow
Net net = readNetFromTensorflow ( "model.pb" , "config.pbtxt" );
// From Caffe
Net net = readNetFromCaffe ( "deploy.prototxt" , "model.caffemodel" );
// From Darknet (YOLO)
Net net = readNetFromDarknet ( "yolov4.cfg" , "yolov4.weights" );
// From PyTorch (via ONNX)
Net net = readNetFromONNX ( "model.onnx" );
// Auto-detect format
Net net = readNet ( "model.onnx" ); // Detects format automatically
Setting Backend and Target
// CPU backend
net . setPreferableBackend (DNN_BACKEND_OPENCV);
net . setPreferableTarget (DNN_TARGET_CPU);
// OpenCL (GPU)
net . setPreferableBackend (DNN_BACKEND_OPENCV);
net . setPreferableTarget (DNN_TARGET_OPENCL);
// CUDA (NVIDIA GPU)
net . setPreferableBackend (DNN_BACKEND_CUDA);
net . setPreferableTarget (DNN_TARGET_CUDA);
// Intel OpenVINO
net . setPreferableBackend (DNN_BACKEND_INFERENCE_ENGINE);
net . setPreferableTarget (DNN_TARGET_CPU);
Inference
// Single output
net . setInput (blob, "input_name" );
Mat output = net . forward ( "output_name" );
// Multiple outputs
std ::vector < String > outNames = { "output1" , "output2" };
std ::vector < Mat > outputs;
net . forward (outputs, outNames);
// All outputs
std ::vector < String > outNames = net . getUnconnectedOutLayersNames ();
std ::vector < Mat > outputs;
net . forward (outputs, outNames);
Blob Preparation
blobFromImage
Mat blob = blobFromImage (
image, // Input image
1.0 / 255.0 , // Scale factor
Size ( 224 , 224 ), // Target size
Scalar ( 0 , 0 , 0 ), // Mean subtraction
true , // swapRB (BGR to RGB)
false , // crop
CV_32F // Output type
);
blobFromImages (Batch)
std ::vector < Mat > images = {img1, img2, img3};
Mat blob = blobFromImages (images, 1.0 / 255.0 ,
Size ( 224 , 224 ), Scalar (), true );
Blobs use NCHW format:
N : Batch size
C : Channels
H : Height
W : Width
// Blob shape: [1, 3, 224, 224]
// 1 image, 3 channels (RGB), 224x224 pixels
Common Tasks
Image Classification
// Load model
Net net = readNet ( "mobilenet_v2.onnx" );
net . setPreferableBackend (DNN_BACKEND_OPENCV);
net . setPreferableTarget (DNN_TARGET_CPU);
// Prepare input
Mat img = imread ( "image.jpg" );
Mat blob = blobFromImage (img, 1.0 / 255 , Size ( 224 , 224 ),
Scalar (), true , false );
// Inference
net . setInput (blob);
Mat prob = net . forward ();
// Get top class
Point classIdPoint;
minMaxLoc ( prob . reshape ( 1 , 1 ), 0 , 0 , 0 , & classIdPoint);
int classId = classIdPoint . x ;
Object Detection (YOLO)
// Load YOLO
Net net = readNetFromDarknet ( "yolov4.cfg" , "yolov4.weights" );
// Prepare input
Mat blob = blobFromImage (img, 1 / 255.0 , Size ( 416 , 416 ),
Scalar (), true , false );
// Forward
net . setInput (blob);
std ::vector < Mat > outputs;
net . forward (outputs, net . getUnconnectedOutLayersNames ());
// Process detections
for (Mat & output : outputs) {
for ( int i = 0 ; i < output . rows ; i ++ ) {
float * data = output . ptr < float > (i);
float confidence = data [ 4 ];
if (confidence > 0.5 ) {
int classId = max_element (data + 5 , data + output . cols ) - (data + 5 );
float x = data [ 0 ] * img . cols ;
float y = data [ 1 ] * img . rows ;
float w = data [ 2 ] * img . cols ;
float h = data [ 3 ] * img . rows ;
// Draw bounding box
}
}
}
Semantic Segmentation
Net net = readNet ( "fcn-resnet50.onnx" );
Mat blob = blobFromImage (img, 1.0 , Size ( 500 , 500 ));
net . setInput (blob);
Mat score = net . forward ();
// score shape: [1, num_classes, H, W]
// Get class per pixel
Mat classMap;
for ( int h = 0 ; h < score . size [ 2 ]; h ++ ) {
for ( int w = 0 ; w < score . size [ 3 ]; w ++ ) {
// Get class with max score
// ...
}
}
Model Zoo
OpenCV provides pre-trained models:
// Face detection
Net faceNet = readNet ( "opencv_face_detector.caffemodel" ,
"opencv_face_detector.prototxt" );
// Age/Gender
Net ageNet = readNet ( "age_net.caffemodel" , "age_deploy.prototxt" );
Net genderNet = readNet ( "gender_net.caffemodel" , "gender_deploy.prototxt" );
// OpenPose (pose estimation)
Net poseNet = readNet ( "pose_iter_440000.caffemodel" ,
"pose_deploy_linevec.prototxt" );
Backend Selection
net . setPreferableBackend (DNN_BACKEND_OPENCV);
net . setPreferableTarget (DNN_TARGET_CPU);
// Smaller input = faster inference
Mat blob = blobFromImage (img, 1.0 / 255 ,
Size ( 320 , 320 ), // Reduce from 640x640
Scalar (), true );
Batch Processing
// Process multiple images at once
std ::vector < Mat > images;
for ( int i = 0 ; i < 4 ; i ++ ) {
images . push_back ( imread ( files [i]));
}
Mat blob = blobFromImages (images, 1.0 / 255 , Size ( 224 , 224 ));
net . setInput (blob);
Mat output = net . forward (); // Batch inference
Best Practices
Use ONNX Format ONNX provides best compatibility across frameworks
Enable GPU Use CUDA backend for 5-10x speedup on NVIDIA GPUs
Optimize Input Size Smaller inputs trade accuracy for speed
Batch When Possible Batch processing improves GPU utilization
Troubleshooting
Model Loading Issues
if ( net . empty ()) {
std ::cerr << "Failed to load model \n " ;
return - 1 ;
}
Check Backend Support
auto backends = getAvailableBackends ();
for ( auto & backend : backends) {
std ::cout << "Backend: " << backend . first
<< ", Target: " << backend . second << " \n " ;
}
Enable Diagnostic Mode
enableModelDiagnostics ( true );
Net net = readNet ( "model.onnx" ); // Verbose loading
See Also