Documentation Index
Fetch the complete documentation index at: https://mintlify.com/opencv/opencv/llms.txt
Use this file to discover all available pages before exploring further.
Deep Learning with OpenCV DNN Module
Learn how to use OpenCV’s DNN (Deep Neural Networks) module to load and run pre-trained models for object detection, classification, and more.Introduction to OpenCV DNN
OpenCV’s DNN module allows you to:- Load models from TensorFlow, PyTorch, Caffe, ONNX, and Darknet
- Run inference without installing deep learning frameworks
- Deploy on CPU, GPU (CUDA), or OpenVINO backends
- Use pre-trained models for various tasks
Supported Frameworks
Supported Model Formats
Supported Model Formats
- ONNX (.onnx) - Universal format, recommended
- TensorFlow (.pb, .pbtxt)
- PyTorch (via ONNX export)
- Caffe (.caffemodel, .prototxt)
- Darknet (.weights, .cfg) - YOLO models
- TensorFlow Lite (.tflite)
Loading and Running Models
Basic Model Loading
- Python
- C++
import cv2 as cv
import numpy as np
# Load a model (example: ONNX format)
net = cv.dnn.readNet('model.onnx')
# Or load specific formats:
# net = cv.dnn.readNetFromTensorflow('model.pb', 'model.pbtxt')
# net = cv.dnn.readNetFromCaffe('deploy.prototxt', 'model.caffemodel')
# net = cv.dnn.readNetFromDarknet('yolov3.cfg', 'yolov3.weights')
# Set computation backend and target
net.setPreferableBackend(cv.dnn.DNN_BACKEND_OPENCV)
net.setPreferableTarget(cv.dnn.DNN_TARGET_CPU)
# For GPU acceleration:
# net.setPreferableBackend(cv.dnn.DNN_BACKEND_CUDA)
# net.setPreferableTarget(cv.dnn.DNN_TARGET_CUDA)
#include <opencv2/dnn.hpp>
#include <opencv2/opencv.hpp>
using namespace cv;
using namespace cv::dnn;
int main() {
// Load model
Net net = readNet("model.onnx");
// Or specific formats:
// Net net = readNetFromTensorflow("model.pb", "model.pbtxt");
// Net net = readNetFromCaffe("deploy.prototxt", "model.caffemodel");
// Net net = readNetFromDarknet("yolov3.cfg", "yolov3.weights");
// Set backend and target
net.setPreferableBackend(DNN_BACKEND_OPENCV);
net.setPreferableTarget(DNN_TARGET_CPU);
// For GPU:
// net.setPreferableBackend(DNN_BACKEND_CUDA);
// net.setPreferableTarget(DNN_TARGET_CUDA);
return 0;
}
YOLO Object Detection
YOLO (You Only Look Once) is a popular real-time object detection system.YOLOv3/YOLOv4 Detection
- Python
- C++
import cv2 as cv
import numpy as np
# Load YOLO network
net = cv.dnn.readNetFromDarknet('yolov4.cfg', 'yolov4.weights')
net.setPreferableBackend(cv.dnn.DNN_BACKEND_OPENCV)
net.setPreferableTarget(cv.dnn.DNN_TARGET_CPU)
# Load class names
with open('coco.names', 'r') as f:
classes = [line.strip() for line in f.readlines()]
# Load image
img = cv.imread('street.jpg')
height, width = img.shape[:2]
# Create blob from image
blob = cv.dnn.blobFromImage(img, 1/255.0, (416, 416),
swapRB=True, crop=False)
# Set input and run forward pass
net.setInput(blob)
# Get output layer names
output_layers = net.getUnconnectedOutLayersNames()
# Forward pass
outputs = net.forward(output_layers)
# Process detections
boxes = []
confidences = []
class_ids = []
for output in outputs:
for detection in output:
scores = detection[5:]
class_id = np.argmax(scores)
confidence = scores[class_id]
if confidence > 0.5:
# Scale bounding box back to image size
center_x = int(detection[0] * width)
center_y = int(detection[1] * height)
w = int(detection[2] * width)
h = int(detection[3] * height)
# Get top-left corner
x = int(center_x - w / 2)
y = int(center_y - h / 2)
boxes.append([x, y, w, h])
confidences.append(float(confidence))
class_ids.append(class_id)
# Apply Non-Maximum Suppression
indices = cv.dnn.NMSBoxes(boxes, confidences, 0.5, 0.4)
# Draw detections
if len(indices) > 0:
for i in indices.flatten():
x, y, w, h = boxes[i]
label = f"{classes[class_ids[i]]}: {confidences[i]:.2f}"
color = (0, 255, 0)
cv.rectangle(img, (x, y), (x+w, y+h), color, 2)
cv.putText(img, label, (x, y-10),
cv.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)
cv.imshow('YOLO Detection', img)
cv.waitKey(0)
#include <opencv2/dnn.hpp>
#include <opencv2/opencv.hpp>
#include <fstream>
using namespace cv;
using namespace cv::dnn;
using namespace std;
int main() {
// Load network
Net net = readNetFromDarknet("yolov4.cfg", "yolov4.weights");
net.setPreferableBackend(DNN_BACKEND_OPENCV);
net.setPreferableTarget(DNN_TARGET_CPU);
// Load class names
vector<string> classes;
ifstream ifs("coco.names");
string line;
while(getline(ifs, line)) classes.push_back(line);
// Load image
Mat img = imread("street.jpg");
// Create blob
Mat blob;
blobFromImage(img, blob, 1/255.0, Size(416, 416),
Scalar(), true, false);
net.setInput(blob);
// Get output layers
vector<String> outNames = net.getUnconnectedOutLayersNames();
vector<Mat> outs;
net.forward(outs, outNames);
// Process detections
vector<int> classIds;
vector<float> confidences;
vector<Rect> boxes;
for(size_t i = 0; i < outs.size(); ++i) {
float* data = (float*)outs[i].data;
for(int j = 0; j < outs[i].rows; ++j, data += outs[i].cols) {
Mat scores = outs[i].row(j).colRange(5, outs[i].cols);
Point classIdPoint;
double confidence;
minMaxLoc(scores, 0, &confidence, 0, &classIdPoint);
if(confidence > 0.5) {
int centerX = (int)(data[0] * img.cols);
int centerY = (int)(data[1] * img.rows);
int width = (int)(data[2] * img.cols);
int height = (int)(data[3] * img.rows);
int left = centerX - width / 2;
int top = centerY - height / 2;
classIds.push_back(classIdPoint.x);
confidences.push_back((float)confidence);
boxes.push_back(Rect(left, top, width, height));
}
}
}
// NMS
vector<int> indices;
NMSBoxes(boxes, confidences, 0.5, 0.4, indices);
// Draw
for(size_t i = 0; i < indices.size(); ++i) {
int idx = indices[i];
Rect box = boxes[idx];
rectangle(img, box, Scalar(0, 255, 0), 2);
string label = classes[classIds[idx]] + ": " +
format("%.2f", confidences[idx]);
putText(img, label, Point(box.x, box.y - 10),
FONT_HERSHEY_SIMPLEX, 0.5, Scalar(0, 255, 0), 2);
}
imshow("YOLO Detection", img);
waitKey(0);
return 0;
}
YOLOv8 with ONNX
Modern YOLO versions export to ONNX format:- Python
import cv2 as cv
import numpy as np
# Load YOLOv8 model (ONNX format)
net = cv.dnn.readNetFromONNX('yolov8n.onnx')
net.setPreferableBackend(cv.dnn.DNN_BACKEND_OPENCV)
net.setPreferableTarget(cv.dnn.DNN_TARGET_CPU)
# Load image
img = cv.imread('image.jpg')
original_height, original_width = img.shape[:2]
# Preprocess
input_size = 640
blob = cv.dnn.blobFromImage(img, 1/255.0, (input_size, input_size),
swapRB=True, crop=False)
# Run inference
net.setInput(blob)
output = net.forward()
# YOLOv8 outputs shape: [1, 84, 8400] for COCO
# Format: [x, y, w, h, class_scores...]
output = output[0].transpose() # [8400, 84]
# Process detections
boxes = []
confidences = []
class_ids = []
img_height, img_width = img.shape[:2]
x_scale = img_width / input_size
y_scale = img_height / input_size
for detection in output:
# Extract box coordinates
x, y, w, h = detection[:4]
# Get class scores and find max
class_scores = detection[4:]
class_id = np.argmax(class_scores)
confidence = class_scores[class_id]
if confidence > 0.5:
# Scale back to original image
x = int((x - w/2) * x_scale)
y = int((y - h/2) * y_scale)
w = int(w * x_scale)
h = int(h * y_scale)
boxes.append([x, y, w, h])
confidences.append(float(confidence))
class_ids.append(class_id)
# Apply NMS
indices = cv.dnn.NMSBoxes(boxes, confidences, 0.5, 0.4)
# Draw results
for i in indices.flatten():
x, y, w, h = boxes[i]
cv.rectangle(img, (x, y), (x+w, y+h), (0, 255, 0), 2)
label = f"Class {class_ids[i]}: {confidences[i]:.2f}"
cv.putText(img, label, (x, y-10),
cv.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
cv.imshow('YOLOv8 Detection', img)
cv.waitKey(0)
SSD Object Detection
SSD (Single Shot MultiBox Detector) for faster detection:- Python
- C++
import cv2 as cv
import numpy as np
# Load MobileNet-SSD model
net = cv.dnn.readNetFromCaffe(
'MobileNetSSD_deploy.prototxt',
'MobileNetSSD_deploy.caffemodel'
)
# COCO class names
classes = ["background", "aeroplane", "bicycle", "bird", "boat",
"bottle", "bus", "car", "cat", "chair", "cow",
"diningtable", "dog", "horse", "motorbike", "person",
"pottedplant", "sheep", "sofa", "train", "tvmonitor"]
# Load image
img = cv.imread('image.jpg')
height, width = img.shape[:2]
# Prepare input
blob = cv.dnn.blobFromImage(img, 0.007843, (300, 300), 127.5)
# Run detection
net.setInput(blob)
detections = net.forward()
# Process detections
for i in range(detections.shape[2]):
confidence = detections[0, 0, i, 2]
if confidence > 0.5:
class_id = int(detections[0, 0, i, 1])
# Get box coordinates
box = detections[0, 0, i, 3:7] * np.array([width, height, width, height])
(x1, y1, x2, y2) = box.astype("int")
# Draw detection
label = f"{classes[class_id]}: {confidence:.2f}"
cv.rectangle(img, (x1, y1), (x2, y2), (0, 255, 0), 2)
cv.putText(img, label, (x1, y1-10),
cv.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
cv.imshow('SSD Detection', img)
cv.waitKey(0)
#include <opencv2/dnn.hpp>
#include <opencv2/opencv.hpp>
using namespace cv;
using namespace cv::dnn;
int main() {
Net net = readNetFromCaffe(
"MobileNetSSD_deploy.prototxt",
"MobileNetSSD_deploy.caffemodel"
);
Mat img = imread("image.jpg");
Mat blob;
blobFromImage(img, blob, 0.007843, Size(300, 300),
Scalar(127.5, 127.5, 127.5));
net.setInput(blob);
Mat detections = net.forward();
Mat detectionMat(detections.size[2], detections.size[3],
CV_32F, detections.ptr<float>());
for(int i = 0; i < detectionMat.rows; i++) {
float confidence = detectionMat.at<float>(i, 2);
if(confidence > 0.5) {
int x1 = detectionMat.at<float>(i, 3) * img.cols;
int y1 = detectionMat.at<float>(i, 4) * img.rows;
int x2 = detectionMat.at<float>(i, 5) * img.cols;
int y2 = detectionMat.at<float>(i, 6) * img.rows;
rectangle(img, Point(x1, y1), Point(x2, y2),
Scalar(0, 255, 0), 2);
}
}
imshow("SSD Detection", img);
waitKey(0);
return 0;
}
Image Classification
- Python
import cv2 as cv
import numpy as np
# Load ResNet model
net = cv.dnn.readNetFromCaffe(
'ResNet-50-deploy.prototxt',
'ResNet-50-model.caffemodel'
)
# Load ImageNet class labels
with open('imagenet_classes.txt', 'r') as f:
classes = [line.strip() for line in f.readlines()]
# Load and preprocess image
img = cv.imread('dog.jpg')
# Create blob (ResNet expects 224x224 input)
blob = cv.dnn.blobFromImage(img, 1.0, (224, 224),
(104, 117, 123), swapRB=False, crop=False)
# Run inference
net.setInput(blob)
predictions = net.forward()
# Get top 5 predictions
top5_indices = np.argsort(predictions[0])[::-1][:5]
print("Top 5 predictions:")
for i, idx in enumerate(top5_indices):
label = classes[idx]
confidence = predictions[0][idx]
print(f"{i+1}. {label}: {confidence*100:.2f}%")
# Display result
top_label = classes[top5_indices[0]]
cv.putText(img, top_label, (10, 30),
cv.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
cv.imshow('Classification', img)
cv.waitKey(0)
Face Detection with DNN
Deep learning-based face detection (more accurate than Haar cascades):- Python
import cv2 as cv
# Load face detection model
net = cv.dnn.readNetFromCaffe(
'deploy.prototxt',
'res10_300x300_ssd_iter_140000.caffemodel'
)
# Load image
img = cv.imread('faces.jpg')
height, width = img.shape[:2]
# Preprocess
blob = cv.dnn.blobFromImage(img, 1.0, (300, 300),
(104.0, 177.0, 123.0))
# Detect faces
net.setInput(blob)
detections = net.forward()
# Draw detections
for i in range(detections.shape[2]):
confidence = detections[0, 0, i, 2]
if confidence > 0.5:
box = detections[0, 0, i, 3:7] * np.array([width, height, width, height])
(x1, y1, x2, y2) = box.astype("int")
# Draw box and confidence
text = f"{confidence*100:.2f}%"
cv.rectangle(img, (x1, y1), (x2, y2), (0, 255, 0), 2)
cv.putText(img, text, (x1, y1-10),
cv.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
cv.imshow('Face Detection', img)
cv.waitKey(0)
Video Processing with DNN
- Python
import cv2 as cv
import time
# Load model
net = cv.dnn.readNetFromCaffe(
'MobileNetSSD_deploy.prototxt',
'MobileNetSSD_deploy.caffemodel'
)
# Open video
cap = cv.VideoCapture('video.mp4')
while True:
ret, frame = cap.read()
if not ret:
break
height, width = frame.shape[:2]
# Prepare input
blob = cv.dnn.blobFromImage(frame, 0.007843, (300, 300), 127.5)
# Measure inference time
start = time.time()
net.setInput(blob)
detections = net.forward()
end = time.time()
# Process detections
for i in range(detections.shape[2]):
confidence = detections[0, 0, i, 2]
if confidence > 0.5:
box = detections[0, 0, i, 3:7] * np.array([width, height, width, height])
(x1, y1, x2, y2) = box.astype("int")
cv.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
# Display FPS
fps = 1 / (end - start)
cv.putText(frame, f'FPS: {fps:.1f}', (10, 30),
cv.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
cv.imshow('Detection', frame)
if cv.waitKey(1) & 0xFF == ord('q'):
break
cap.release()
cv.destroyAllWindows()
Performance Optimization
Using GPU Acceleration
Using GPU Acceleration
import cv2 as cv
net = cv.dnn.readNet('model.onnx')
# CUDA backend (requires OpenCV built with CUDA)
net.setPreferableBackend(cv.dnn.DNN_BACKEND_CUDA)
net.setPreferableTarget(cv.dnn.DNN_TARGET_CUDA)
# Or CUDA with FP16 (faster, slightly less accurate)
net.setPreferableTarget(cv.dnn.DNN_TARGET_CUDA_FP16)
OpenVINO Backend
OpenVINO Backend
# Intel OpenVINO for optimized inference on Intel hardware
net.setPreferableBackend(cv.dnn.DNN_BACKEND_INFERENCE_ENGINE)
net.setPreferableTarget(cv.dnn.DNN_TARGET_CPU)
# Or use Intel GPU
net.setPreferableTarget(cv.dnn.DNN_TARGET_OPENCL)
Batch Processing
Batch Processing
# Process multiple images at once
images = [img1, img2, img3, img4]
# Create batch blob
blob = cv.dnn.blobFromImages(images, 1/255.0, (640, 640))
net.setInput(blob)
outputs = net.forward()
Backend and target options:
DNN_BACKEND_OPENCV+DNN_TARGET_CPU: Default, works everywhereDNN_BACKEND_CUDA+DNN_TARGET_CUDA: NVIDIA GPU accelerationDNN_BACKEND_INFERENCE_ENGINE+DNN_TARGET_CPU: Intel OpenVINODNN_TARGET_OPENCL: OpenCL accelerationDNN_TARGET_CUDA_FP16: Half-precision for faster inference
Common issues:
- Model input size must match the size used during training
- Check if the model expects RGB or BGR input (use
swapRBparameter) - Normalize input values correctly (typically 0-1 or mean subtraction)
- Ensure OpenCV is built with the desired backend support
Downloading Pre-trained Models
OpenCV provides scripts to download common models:# Download YOLOv3
python opencv/samples/dnn/download_models.py --name yolo
# Download all models
python opencv/samples/dnn/download_models.py --all
Next Steps
- Explore the OpenCV Model Zoo for more pre-trained models
- Learn about Camera Calibration for 3D vision tasks
- Combine with Video Processing for real-time applications
