Object Detection

Learn how to detect objects in images and video using classical computer vision techniques including Haar cascades and Histogram of Oriented Gradients (HOG) detectors.

Haar Cascade Classifiers

Haar cascades are machine learning-based classifiers trained to detect specific objects. OpenCV comes with pre-trained models for faces, eyes, pedestrians, and more.

Loading Cascade Classifiers

Python
C++

import cv2 as cv

# Load pre-trained cascade classifier
face_cascade = cv.CascadeClassifier(cv.data.haarcascades + 
                                   'haarcascade_frontalface_default.xml')
eye_cascade = cv.CascadeClassifier(cv.data.haarcascades + 
                                  'haarcascade_eye.xml')

# Alternative: load from file path
# face_cascade = cv.CascadeClassifier('haarcascade_frontalface_alt.xml')

# Check if cascade loaded successfully
if face_cascade.empty():
    print('Error loading cascade classifier')
    exit()

#include <opencv2/opencv.hpp>
#include <opencv2/objdetect.hpp>
using namespace cv;

int main() {
    CascadeClassifier face_cascade;
    CascadeClassifier eye_cascade;
    
    // Load cascades
    if(!face_cascade.load(samples::findFile(
        "haarcascades/haarcascade_frontalface_default.xml"))) {
        cout << "Error loading face cascade" << endl;
        return -1;
    }
    
    if(!eye_cascade.load(samples::findFile(
        "haarcascades/haarcascade_eye.xml"))) {
        cout << "Error loading eye cascade" << endl;
        return -1;
    }
    
    return 0;
}

Basic Object Detection

Python
C++

import cv2 as cv

# Load image
img = cv.imread('group_photo.jpg')
gray = cv.cvtColor(img, cv.COLOR_BGR2GRAY)

# Load cascade
face_cascade = cv.CascadeClassifier(cv.data.haarcascades + 
                                   'haarcascade_frontalface_default.xml')

# Detect faces
faces = face_cascade.detectMultiScale(
    gray,
    scaleFactor=1.1,      # How much image size is reduced at each scale
    minNeighbors=5,       # How many neighbors each candidate should have
    minSize=(30, 30),     # Minimum object size
    flags=cv.CASCADE_SCALE_IMAGE
)

print(f"Found {len(faces)} faces")

# Draw rectangles around detected faces
for (x, y, w, h) in faces:
    cv.rectangle(img, (x, y), (x+w, y+h), (0, 255, 0), 2)

cv.imshow('Face Detection', img)
cv.waitKey(0)

#include <opencv2/opencv.hpp>
#include <opencv2/objdetect.hpp>
using namespace cv;
using namespace std;

int main() {
    Mat img = imread("group_photo.jpg");
    Mat gray;
    cvtColor(img, gray, COLOR_BGR2GRAY);
    
    CascadeClassifier face_cascade;
    face_cascade.load(samples::findFile(
        "haarcascades/haarcascade_frontalface_default.xml"));
    
    vector<Rect> faces;
    face_cascade.detectMultiScale(gray, faces, 1.1, 5, 
                                 0, Size(30, 30));
    
    cout << "Found " << faces.size() << " faces" << endl;
    
    for(size_t i = 0; i < faces.size(); i++) {
        rectangle(img, faces[i], Scalar(0, 255, 0), 2);
    }
    
    imshow("Face Detection", img);
    waitKey(0);
    return 0;
}

Nested Detection (Faces and Eyes)

Based on OpenCV’s facedetect.py sample:

Python
C++

import cv2 as cv

def detect(img, cascade):
    """Detect objects using cascade classifier"""
    rects = cascade.detectMultiScale(img, scaleFactor=1.3, 
                                    minNeighbors=4, minSize=(30, 30),
                                    flags=cv.CASCADE_SCALE_IMAGE)
    if len(rects) == 0:
        return []
    rects[:,2:] += rects[:,:2]  # Convert to (x1, y1, x2, y2)
    return rects

def draw_rects(img, rects, color):
    """Draw rectangles on image"""
    for x1, y1, x2, y2 in rects:
        cv.rectangle(img, (x1, y1), (x2, y2), color, 2)

# Load cascades
face_cascade = cv.CascadeClassifier(cv.samples.findFile(
    'haarcascades/haarcascade_frontalface_alt.xml'))
eye_cascade = cv.CascadeClassifier(cv.samples.findFile(
    'haarcascades/haarcascade_eye.xml'))

# Load and process image
img = cv.imread('face.jpg')
gray = cv.cvtColor(img, cv.COLOR_BGR2GRAY)
gray = cv.equalizeHist(gray)  # Improve contrast

# Detect faces
faces = detect(gray, face_cascade)
vis = img.copy()
draw_rects(vis, faces, (0, 255, 0))  # Green for faces

# Detect eyes within each face
if not eye_cascade.empty():
    for x1, y1, x2, y2 in faces:
        roi = gray[y1:y2, x1:x2]
        vis_roi = vis[y1:y2, x1:x2]
        eyes = detect(roi.copy(), eye_cascade)
        draw_rects(vis_roi, eyes, (255, 0, 0))  # Blue for eyes

cv.imshow('Face and Eye Detection', vis)
cv.waitKey(0)
cv.destroyAllWindows()

#include <opencv2/opencv.hpp>
#include <opencv2/objdetect.hpp>
using namespace cv;
using namespace std;

void detectAndDraw(Mat& img, CascadeClassifier& face_cascade,
                  CascadeClassifier& eye_cascade) {
    Mat gray, smallImg;
    cvtColor(img, gray, COLOR_BGR2GRAY);
    
    double fx = 1.0 / 1.3;
    resize(gray, smallImg, Size(), fx, fx, INTER_LINEAR_EXACT);
    equalizeHist(smallImg, smallImg);
    
    vector<Rect> faces;
    face_cascade.detectMultiScale(smallImg, faces, 1.1, 2, 
                                 CASCADE_SCALE_IMAGE, Size(30, 30));
    
    for(size_t i = 0; i < faces.size(); i++) {
        Rect r = faces[i];
        Scalar color = Scalar(0, 255, 0);
        
        // Draw face rectangle
        Point center(cvRound((r.x + r.width*0.5)*1.3), 
                    cvRound((r.y + r.height*0.5)*1.3));
        int radius = cvRound((r.width + r.height)*0.25*1.3);
        circle(img, center, radius, color, 3);
        
        // Detect eyes within face
        if(!eye_cascade.empty()) {
            Mat smallImgROI = smallImg(r);
            vector<Rect> eyes;
            eye_cascade.detectMultiScale(smallImgROI, eyes, 1.1, 2,
                                        CASCADE_SCALE_IMAGE, Size(30, 30));
            
            for(size_t j = 0; j < eyes.size(); j++) {
                Rect er = eyes[j];
                Point eye_center(cvRound((r.x + er.x + er.width*0.5)*1.3),
                               cvRound((r.y + er.y + er.height*0.5)*1.3));
                int eye_radius = cvRound((er.width + er.height)*0.25*1.3);
                circle(img, eye_center, eye_radius, color, 3);
            }
        }
    }
    
    imshow("Detection", img);
}

Key parameters for detectMultiScale():

scaleFactor: How much the image size is reduced at each scale (1.1 = 10% reduction). Smaller values are more thorough but slower.
minNeighbors: How many neighbors each candidate rectangle should retain. Higher values result in fewer but more accurate detections.
minSize: Minimum object size. Objects smaller than this are ignored.

HOG (Histogram of Oriented Gradients) Detector

HOG descriptors are excellent for pedestrian detection.

People Detection with HOG

Based on OpenCV’s peopledetect.py sample:

Python
C++

import cv2 as cv

def inside(r, q):
    """Check if rectangle r is inside rectangle q"""
    rx, ry, rw, rh = r
    qx, qy, qw, qh = q
    return rx > qx and ry > qy and rx + rw < qx + qw and ry + rh < qy + qh

def draw_detections(img, rects, thickness=1):
    """Draw detection rectangles"""
    for x, y, w, h in rects:
        # HOG detector returns slightly larger rectangles
        # so we shrink them a bit
        pad_w, pad_h = int(0.15*w), int(0.05*h)
        cv.rectangle(img, (x+pad_w, y+pad_h), 
                    (x+w-pad_w, y+h-pad_h), (0, 255, 0), thickness)

# Load image
img = cv.imread('people.jpg')

# Create HOG descriptor
hog = cv.HOGDescriptor()
# Set default people detector
hog.setSVMDetector(cv.HOGDescriptor_getDefaultPeopleDetector())

# Detect people
found, weights = hog.detectMultiScale(img, 
                                     winStride=(8, 8),
                                     padding=(32, 32),
                                     scale=1.05)

# Filter overlapping detections
found_filtered = []
for ri, r in enumerate(found):
    for qi, q in enumerate(found):
        if ri != qi and inside(r, q):
            break
    else:
        found_filtered.append(r)

print(f"Found {len(found_filtered)} people (from {len(found)} detections)")

# Draw all detections
draw_detections(img, found)
# Highlight filtered detections
draw_detections(img, found_filtered, 3)

cv.imshow('People Detection', img)
cv.waitKey(0)
cv.destroyAllWindows()

#include <opencv2/opencv.hpp>
#include <opencv2/objdetect.hpp>
using namespace cv;
using namespace std;

class Detector {
private:
    HOGDescriptor hog;
    
public:
    Detector() {
        hog.setSVMDetector(HOGDescriptor::getDefaultPeopleDetector());
    }
    
    vector<Rect> detect(Mat& img) {
        vector<Rect> found;
        hog.detectMultiScale(img, found, 0, Size(8,8), 
                           Size(), 1.05, 2, false);
        return found;
    }
    
    void adjustRect(Rect& r) {
        // Shrink rectangles slightly for better visualization
        r.x += cvRound(r.width*0.1);
        r.width = cvRound(r.width*0.8);
        r.y += cvRound(r.height*0.07);
        r.height = cvRound(r.height*0.8);
    }
};

int main() {
    Mat img = imread("people.jpg");
    if(img.empty()) {
        cout << "Error loading image" << endl;
        return -1;
    }
    
    Detector detector;
    vector<Rect> found = detector.detect(img);
    
    cout << "Found " << found.size() << " people" << endl;
    
    for(size_t i = 0; i < found.size(); i++) {
        Rect r = found[i];
        detector.adjustRect(r);
        rectangle(img, r.tl(), r.br(), Scalar(0, 255, 0), 2);
    }
    
    imshow("People Detection", img);
    waitKey(0);
    return 0;
}

Real-time Detection on Video

Python
C++

import cv2 as cv
import time

# Initialize HOG detector
hog = cv.HOGDescriptor()
hog.setSVMDetector(cv.HOGDescriptor_getDefaultPeopleDetector())

# Open video or camera
cap = cv.VideoCapture(0)  # or 'video.mp4'

while True:
    ret, frame = cap.read()
    if not ret:
        break
    
    # Resize for faster processing
    frame = cv.resize(frame, (640, 480))
    
    # Measure detection time
    start_time = time.time()
    
    # Detect people
    found, weights = hog.detectMultiScale(frame, 
                                         winStride=(8, 8),
                                         padding=(8, 8),
                                         scale=1.05)
    
    elapsed_time = time.time() - start_time
    fps = 1.0 / elapsed_time
    
    # Draw detections
    for (x, y, w, h) in found:
        cv.rectangle(frame, (x, y), (x+w, y+h), (0, 255, 0), 2)
    
    # Display FPS and count
    cv.putText(frame, f'People: {len(found)}', (10, 30),
              cv.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
    cv.putText(frame, f'FPS: {fps:.1f}', (10, 70),
              cv.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
    
    cv.imshow('HOG People Detection', frame)
    
    if cv.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv.destroyAllWindows()

#include <opencv2/opencv.hpp>
#include <opencv2/objdetect.hpp>
using namespace cv;
using namespace std;

int main() {
    VideoCapture cap(0);
    if(!cap.isOpened())
        return -1;
    
    HOGDescriptor hog;
    hog.setSVMDetector(HOGDescriptor::getDefaultPeopleDetector());
    
    Mat frame;
    while(cap.read(frame)) {
        resize(frame, frame, Size(640, 480));
        
        int64 t = getTickCount();
        
        vector<Rect> found;
        hog.detectMultiScale(frame, found, 0, Size(8,8), 
                           Size(), 1.05, 2, false);
        
        t = getTickCount() - t;
        double fps = getTickFrequency() / t;
        
        // Draw detections
        for(size_t i = 0; i < found.size(); i++) {
            rectangle(frame, found[i], Scalar(0, 255, 0), 2);
        }
        
        // Display info
        putText(frame, format("People: %d", found.size()),
               Point(10, 30), FONT_HERSHEY_SIMPLEX, 1, 
               Scalar(0, 255, 0), 2);
        putText(frame, format("FPS: %.1f", fps),
               Point(10, 70), FONT_HERSHEY_SIMPLEX, 1,
               Scalar(0, 255, 0), 2);
        
        imshow("HOG People Detection", frame);
        
        if(waitKey(1) == 'q')
            break;
    }
    
    return 0;
}

Available Pre-trained Cascades

OpenCV includes many pre-trained cascade classifiers:

Face Detection Cascades

haarcascade_frontalface_default.xml - General frontal face detection
haarcascade_frontalface_alt.xml - Alternative frontal face
haarcascade_frontalface_alt2.xml - Another alternative
haarcascade_profileface.xml - Profile (side) faces
lbpcascade_frontalface.xml - LBP-based face detection (faster)

Eye Detection Cascades

haarcascade_eye.xml - General eye detection
haarcascade_eye_tree_eyeglasses.xml - Eyes with glasses
haarcascade_lefteye_2splits.xml - Left eye
haarcascade_righteye_2splits.xml - Right eye

Body and Gesture Cascades

haarcascade_fullbody.xml - Full body detection
haarcascade_upperbody.xml - Upper body
haarcascade_lowerbody.xml - Lower body
haarcascade_smile.xml - Smile detection

Other Object Cascades

haarcascade_frontalcatface.xml - Cat face detection
haarcascade_frontalcatface_extended.xml - Extended cat face
haarcascade_licence_plate_rus_16stages.xml - Russian license plates

Custom Cascade Training

You can train custom cascade classifiers for specific objects:

Collect Training Data

Gather positive samples (images containing the object) and negative samples (images without the object).

Create Sample Description

Create text files listing the locations of positive samples and paths to negative samples.

Generate Samples

Use opencv_createsamples to generate training samples from your positive images.

Train Cascade

Use opencv_traincascade to train the classifier. This can take hours or days depending on data size.

Test and Refine

Test the classifier and collect more samples if needed to improve accuracy.

Training custom cascades requires:

Hundreds to thousands of positive samples
Even more negative samples
Significant computation time (can take days)
Careful parameter tuning

For most modern applications, consider using deep learning-based detection instead.

Performance Optimization

Python
C++

import cv2 as cv

img = cv.imread('image.jpg')
gray = cv.cvtColor(img, cv.COLOR_BGR2GRAY)

# Resize for faster detection
scale = 0.5
small = cv.resize(gray, None, fx=scale, fy=scale)

face_cascade = cv.CascadeClassifier(cv.data.haarcascades + 
                                   'haarcascade_frontalface_default.xml')

# Detect on smaller image
faces = face_cascade.detectMultiScale(small, 1.1, 5)

# Scale coordinates back to original size
faces = [[int(x/scale), int(y/scale), 
         int(w/scale), int(h/scale)] for (x, y, w, h) in faces]

# Draw on original image
for (x, y, w, h) in faces:
    cv.rectangle(img, (x, y), (x+w, y+h), (0, 255, 0), 2)

Mat img = imread("image.jpg");
Mat gray, small;
cvtColor(img, gray, COLOR_BGR2GRAY);

double scale = 0.5;
resize(gray, small, Size(), scale, scale);

CascadeClassifier face_cascade;
face_cascade.load("haarcascade_frontalface_default.xml");

vector<Rect> faces;
face_cascade.detectMultiScale(small, faces, 1.1, 5);

// Scale back to original size
for(size_t i = 0; i < faces.size(); i++) {
    faces[i].x /= scale;
    faces[i].y /= scale;
    faces[i].width /= scale;
    faces[i].height /= scale;
    rectangle(img, faces[i], Scalar(0, 255, 0), 2);
}

Performance tips:

Process at lower resolution (0.5x or 0.25x scale)
Use histogram equalization on grayscale images
Adjust scaleFactor (larger = faster but less accurate)
Increase minNeighbors to reduce false positives
Set appropriate minSize to skip small detections

Next Steps

Learn Face Detection for specialized face detection techniques
Explore Deep Learning for more accurate modern detection methods
Try Video Processing to apply detection to video streams

Getting Started

Core Concepts

Modules

Tutorials

Language Bindings

Platform Support

Object Detection

Object Detection

Haar Cascade Classifiers

Loading Cascade Classifiers

Basic Object Detection

Nested Detection (Faces and Eyes)

HOG (Histogram of Oriented Gradients) Detector

People Detection with HOG

Real-time Detection on Video

Available Pre-trained Cascades

Custom Cascade Training

Performance Optimization

Next Steps

Getting Started

Core Concepts

Modules

Tutorials

Language Bindings

Platform Support

​Object Detection

​Haar Cascade Classifiers

​Loading Cascade Classifiers

​Basic Object Detection

​Nested Detection (Faces and Eyes)

​HOG (Histogram of Oriented Gradients) Detector

​People Detection with HOG

​Real-time Detection on Video

​Available Pre-trained Cascades

​Custom Cascade Training

​Performance Optimization

​Next Steps

Object Detection

Haar Cascade Classifiers

Loading Cascade Classifiers

Basic Object Detection

Nested Detection (Faces and Eyes)

HOG (Histogram of Oriented Gradients) Detector

People Detection with HOG

Real-time Detection on Video

Available Pre-trained Cascades

Custom Cascade Training

Performance Optimization

Next Steps