Documentation Index Fetch the complete documentation index at: https://mintlify.com/opencv/opencv/llms.txt
Use this file to discover all available pages before exploring further.
Object Detection
Learn how to detect objects in images and video using classical computer vision techniques including Haar cascades and Histogram of Oriented Gradients (HOG) detectors.
Haar Cascade Classifiers
Haar cascades are machine learning-based classifiers trained to detect specific objects. OpenCV comes with pre-trained models for faces, eyes, pedestrians, and more.
Loading Cascade Classifiers
import cv2 as cv
# Load pre-trained cascade classifier
face_cascade = cv.CascadeClassifier(cv.data.haarcascades +
'haarcascade_frontalface_default.xml' )
eye_cascade = cv.CascadeClassifier(cv.data.haarcascades +
'haarcascade_eye.xml' )
# Alternative: load from file path
# face_cascade = cv.CascadeClassifier('haarcascade_frontalface_alt.xml')
# Check if cascade loaded successfully
if face_cascade.empty():
print ( 'Error loading cascade classifier' )
exit ()
#include <opencv2/opencv.hpp>
#include <opencv2/objdetect.hpp>
using namespace cv ;
int main () {
CascadeClassifier face_cascade;
CascadeClassifier eye_cascade;
// Load cascades
if ( ! face_cascade . load ( samples :: findFile (
"haarcascades/haarcascade_frontalface_default.xml" ))) {
cout << "Error loading face cascade" << endl;
return - 1 ;
}
if ( ! eye_cascade . load ( samples :: findFile (
"haarcascades/haarcascade_eye.xml" ))) {
cout << "Error loading eye cascade" << endl;
return - 1 ;
}
return 0 ;
}
Basic Object Detection
import cv2 as cv
# Load image
img = cv.imread( 'group_photo.jpg' )
gray = cv.cvtColor(img, cv. COLOR_BGR2GRAY )
# Load cascade
face_cascade = cv.CascadeClassifier(cv.data.haarcascades +
'haarcascade_frontalface_default.xml' )
# Detect faces
faces = face_cascade.detectMultiScale(
gray,
scaleFactor = 1.1 , # How much image size is reduced at each scale
minNeighbors = 5 , # How many neighbors each candidate should have
minSize = ( 30 , 30 ), # Minimum object size
flags = cv. CASCADE_SCALE_IMAGE
)
print ( f "Found { len (faces) } faces" )
# Draw rectangles around detected faces
for (x, y, w, h) in faces:
cv.rectangle(img, (x, y), (x + w, y + h), ( 0 , 255 , 0 ), 2 )
cv.imshow( 'Face Detection' , img)
cv.waitKey( 0 )
#include <opencv2/opencv.hpp>
#include <opencv2/objdetect.hpp>
using namespace cv ;
using namespace std ;
int main () {
Mat img = imread ( "group_photo.jpg" );
Mat gray;
cvtColor (img, gray, COLOR_BGR2GRAY);
CascadeClassifier face_cascade;
face_cascade . load ( samples :: findFile (
"haarcascades/haarcascade_frontalface_default.xml" ));
vector < Rect > faces;
face_cascade . detectMultiScale (gray, faces, 1.1 , 5 ,
0 , Size ( 30 , 30 ));
cout << "Found " << faces . size () << " faces" << endl;
for ( size_t i = 0 ; i < faces . size (); i ++ ) {
rectangle (img, faces [i], Scalar ( 0 , 255 , 0 ), 2 );
}
imshow ( "Face Detection" , img);
waitKey ( 0 );
return 0 ;
}
Nested Detection (Faces and Eyes)
Based on OpenCV’s facedetect.py sample:
import cv2 as cv
def detect ( img , cascade ):
"""Detect objects using cascade classifier"""
rects = cascade.detectMultiScale(img, scaleFactor = 1.3 ,
minNeighbors = 4 , minSize = ( 30 , 30 ),
flags = cv. CASCADE_SCALE_IMAGE )
if len (rects) == 0 :
return []
rects[:, 2 :] += rects[:,: 2 ] # Convert to (x1, y1, x2, y2)
return rects
def draw_rects ( img , rects , color ):
"""Draw rectangles on image"""
for x1, y1, x2, y2 in rects:
cv.rectangle(img, (x1, y1), (x2, y2), color, 2 )
# Load cascades
face_cascade = cv.CascadeClassifier(cv.samples.findFile(
'haarcascades/haarcascade_frontalface_alt.xml' ))
eye_cascade = cv.CascadeClassifier(cv.samples.findFile(
'haarcascades/haarcascade_eye.xml' ))
# Load and process image
img = cv.imread( 'face.jpg' )
gray = cv.cvtColor(img, cv. COLOR_BGR2GRAY )
gray = cv.equalizeHist(gray) # Improve contrast
# Detect faces
faces = detect(gray, face_cascade)
vis = img.copy()
draw_rects(vis, faces, ( 0 , 255 , 0 )) # Green for faces
# Detect eyes within each face
if not eye_cascade.empty():
for x1, y1, x2, y2 in faces:
roi = gray[y1:y2, x1:x2]
vis_roi = vis[y1:y2, x1:x2]
eyes = detect(roi.copy(), eye_cascade)
draw_rects(vis_roi, eyes, ( 255 , 0 , 0 )) # Blue for eyes
cv.imshow( 'Face and Eye Detection' , vis)
cv.waitKey( 0 )
cv.destroyAllWindows()
#include <opencv2/opencv.hpp>
#include <opencv2/objdetect.hpp>
using namespace cv ;
using namespace std ;
void detectAndDraw ( Mat & img , CascadeClassifier & face_cascade ,
CascadeClassifier & eye_cascade ) {
Mat gray, smallImg;
cvtColor (img, gray, COLOR_BGR2GRAY);
double fx = 1.0 / 1.3 ;
resize (gray, smallImg, Size (), fx, fx, INTER_LINEAR_EXACT);
equalizeHist (smallImg, smallImg);
vector < Rect > faces;
face_cascade . detectMultiScale (smallImg, faces, 1.1 , 2 ,
CASCADE_SCALE_IMAGE, Size ( 30 , 30 ));
for ( size_t i = 0 ; i < faces . size (); i ++ ) {
Rect r = faces [i];
Scalar color = Scalar ( 0 , 255 , 0 );
// Draw face rectangle
Point center ( cvRound (( r . x + r . width * 0.5 ) * 1.3 ),
cvRound (( r . y + r . height * 0.5 ) * 1.3 ));
int radius = cvRound (( r . width + r . height ) * 0.25 * 1.3 );
circle (img, center, radius, color, 3 );
// Detect eyes within face
if ( ! eye_cascade . empty ()) {
Mat smallImgROI = smallImg (r);
vector < Rect > eyes;
eye_cascade . detectMultiScale (smallImgROI, eyes, 1.1 , 2 ,
CASCADE_SCALE_IMAGE, Size ( 30 , 30 ));
for ( size_t j = 0 ; j < eyes . size (); j ++ ) {
Rect er = eyes [j];
Point eye_center ( cvRound (( r . x + er . x + er . width * 0.5 ) * 1.3 ),
cvRound (( r . y + er . y + er . height * 0.5 ) * 1.3 ));
int eye_radius = cvRound (( er . width + er . height ) * 0.25 * 1.3 );
circle (img, eye_center, eye_radius, color, 3 );
}
}
}
imshow ( "Detection" , img);
}
Key parameters for detectMultiScale():
scaleFactor : How much the image size is reduced at each scale (1.1 = 10% reduction). Smaller values are more thorough but slower.
minNeighbors : How many neighbors each candidate rectangle should retain. Higher values result in fewer but more accurate detections.
minSize : Minimum object size. Objects smaller than this are ignored.
HOG (Histogram of Oriented Gradients) Detector
HOG descriptors are excellent for pedestrian detection.
People Detection with HOG
Based on OpenCV’s peopledetect.py sample:
import cv2 as cv
def inside ( r , q ):
"""Check if rectangle r is inside rectangle q"""
rx, ry, rw, rh = r
qx, qy, qw, qh = q
return rx > qx and ry > qy and rx + rw < qx + qw and ry + rh < qy + qh
def draw_detections ( img , rects , thickness = 1 ):
"""Draw detection rectangles"""
for x, y, w, h in rects:
# HOG detector returns slightly larger rectangles
# so we shrink them a bit
pad_w, pad_h = int ( 0.15 * w), int ( 0.05 * h)
cv.rectangle(img, (x + pad_w, y + pad_h),
(x + w - pad_w, y + h - pad_h), ( 0 , 255 , 0 ), thickness)
# Load image
img = cv.imread( 'people.jpg' )
# Create HOG descriptor
hog = cv.HOGDescriptor()
# Set default people detector
hog.setSVMDetector(cv.HOGDescriptor_getDefaultPeopleDetector())
# Detect people
found, weights = hog.detectMultiScale(img,
winStride = ( 8 , 8 ),
padding = ( 32 , 32 ),
scale = 1.05 )
# Filter overlapping detections
found_filtered = []
for ri, r in enumerate (found):
for qi, q in enumerate (found):
if ri != qi and inside(r, q):
break
else :
found_filtered.append(r)
print ( f "Found { len (found_filtered) } people (from { len (found) } detections)" )
# Draw all detections
draw_detections(img, found)
# Highlight filtered detections
draw_detections(img, found_filtered, 3 )
cv.imshow( 'People Detection' , img)
cv.waitKey( 0 )
cv.destroyAllWindows()
#include <opencv2/opencv.hpp>
#include <opencv2/objdetect.hpp>
using namespace cv ;
using namespace std ;
class Detector {
private:
HOGDescriptor hog;
public:
Detector () {
hog . setSVMDetector ( HOGDescriptor :: getDefaultPeopleDetector ());
}
vector < Rect > detect ( Mat & img ) {
vector < Rect > found;
hog . detectMultiScale (img, found, 0 , Size ( 8 , 8 ),
Size (), 1.05 , 2 , false );
return found;
}
void adjustRect ( Rect & r ) {
// Shrink rectangles slightly for better visualization
r . x += cvRound ( r . width * 0.1 );
r . width = cvRound ( r . width * 0.8 );
r . y += cvRound ( r . height * 0.07 );
r . height = cvRound ( r . height * 0.8 );
}
};
int main () {
Mat img = imread ( "people.jpg" );
if ( img . empty ()) {
cout << "Error loading image" << endl;
return - 1 ;
}
Detector detector;
vector < Rect > found = detector . detect (img);
cout << "Found " << found . size () << " people" << endl;
for ( size_t i = 0 ; i < found . size (); i ++ ) {
Rect r = found [i];
detector . adjustRect (r);
rectangle (img, r . tl (), r . br (), Scalar ( 0 , 255 , 0 ), 2 );
}
imshow ( "People Detection" , img);
waitKey ( 0 );
return 0 ;
}
Real-time Detection on Video
import cv2 as cv
import time
# Initialize HOG detector
hog = cv.HOGDescriptor()
hog.setSVMDetector(cv.HOGDescriptor_getDefaultPeopleDetector())
# Open video or camera
cap = cv.VideoCapture( 0 ) # or 'video.mp4'
while True :
ret, frame = cap.read()
if not ret:
break
# Resize for faster processing
frame = cv.resize(frame, ( 640 , 480 ))
# Measure detection time
start_time = time.time()
# Detect people
found, weights = hog.detectMultiScale(frame,
winStride = ( 8 , 8 ),
padding = ( 8 , 8 ),
scale = 1.05 )
elapsed_time = time.time() - start_time
fps = 1.0 / elapsed_time
# Draw detections
for (x, y, w, h) in found:
cv.rectangle(frame, (x, y), (x + w, y + h), ( 0 , 255 , 0 ), 2 )
# Display FPS and count
cv.putText(frame, f 'People: { len (found) } ' , ( 10 , 30 ),
cv. FONT_HERSHEY_SIMPLEX , 1 , ( 0 , 255 , 0 ), 2 )
cv.putText(frame, f 'FPS: { fps :.1f} ' , ( 10 , 70 ),
cv. FONT_HERSHEY_SIMPLEX , 1 , ( 0 , 255 , 0 ), 2 )
cv.imshow( 'HOG People Detection' , frame)
if cv.waitKey( 1 ) & 0x FF == ord ( 'q' ):
break
cap.release()
cv.destroyAllWindows()
#include <opencv2/opencv.hpp>
#include <opencv2/objdetect.hpp>
using namespace cv ;
using namespace std ;
int main () {
VideoCapture cap ( 0 );
if ( ! cap . isOpened ())
return - 1 ;
HOGDescriptor hog;
hog . setSVMDetector ( HOGDescriptor :: getDefaultPeopleDetector ());
Mat frame;
while ( cap . read (frame)) {
resize (frame, frame, Size ( 640 , 480 ));
int64 t = getTickCount ();
vector < Rect > found;
hog . detectMultiScale (frame, found, 0 , Size ( 8 , 8 ),
Size (), 1.05 , 2 , false );
t = getTickCount () - t;
double fps = getTickFrequency () / t;
// Draw detections
for ( size_t i = 0 ; i < found . size (); i ++ ) {
rectangle (frame, found [i], Scalar ( 0 , 255 , 0 ), 2 );
}
// Display info
putText (frame, format ( "People: %d " , found . size ()),
Point ( 10 , 30 ), FONT_HERSHEY_SIMPLEX, 1 ,
Scalar ( 0 , 255 , 0 ), 2 );
putText (frame, format ( "FPS: %.1f " , fps),
Point ( 10 , 70 ), FONT_HERSHEY_SIMPLEX, 1 ,
Scalar ( 0 , 255 , 0 ), 2 );
imshow ( "HOG People Detection" , frame);
if ( waitKey ( 1 ) == 'q' )
break ;
}
return 0 ;
}
Available Pre-trained Cascades
OpenCV includes many pre-trained cascade classifiers:
haarcascade_frontalface_default.xml - General frontal face detection
haarcascade_frontalface_alt.xml - Alternative frontal face
haarcascade_frontalface_alt2.xml - Another alternative
haarcascade_profileface.xml - Profile (side) faces
lbpcascade_frontalface.xml - LBP-based face detection (faster)
haarcascade_eye.xml - General eye detection
haarcascade_eye_tree_eyeglasses.xml - Eyes with glasses
haarcascade_lefteye_2splits.xml - Left eye
haarcascade_righteye_2splits.xml - Right eye
Body and Gesture Cascades
haarcascade_fullbody.xml - Full body detection
haarcascade_upperbody.xml - Upper body
haarcascade_lowerbody.xml - Lower body
haarcascade_smile.xml - Smile detection
haarcascade_frontalcatface.xml - Cat face detection
haarcascade_frontalcatface_extended.xml - Extended cat face
haarcascade_licence_plate_rus_16stages.xml - Russian license plates
Custom Cascade Training
You can train custom cascade classifiers for specific objects:
Collect Training Data
Gather positive samples (images containing the object) and negative samples (images without the object).
Create Sample Description
Create text files listing the locations of positive samples and paths to negative samples.
Generate Samples
Use opencv_createsamples to generate training samples from your positive images.
Train Cascade
Use opencv_traincascade to train the classifier. This can take hours or days depending on data size.
Test and Refine
Test the classifier and collect more samples if needed to improve accuracy.
Training custom cascades requires:
Hundreds to thousands of positive samples
Even more negative samples
Significant computation time (can take days)
Careful parameter tuning
For most modern applications, consider using deep learning-based detection instead.
import cv2 as cv
img = cv.imread( 'image.jpg' )
gray = cv.cvtColor(img, cv. COLOR_BGR2GRAY )
# Resize for faster detection
scale = 0.5
small = cv.resize(gray, None , fx = scale, fy = scale)
face_cascade = cv.CascadeClassifier(cv.data.haarcascades +
'haarcascade_frontalface_default.xml' )
# Detect on smaller image
faces = face_cascade.detectMultiScale(small, 1.1 , 5 )
# Scale coordinates back to original size
faces = [[ int (x / scale), int (y / scale),
int (w / scale), int (h / scale)] for (x, y, w, h) in faces]
# Draw on original image
for (x, y, w, h) in faces:
cv.rectangle(img, (x, y), (x + w, y + h), ( 0 , 255 , 0 ), 2 )
Mat img = imread ( "image.jpg" );
Mat gray, small;
cvtColor (img, gray, COLOR_BGR2GRAY);
double scale = 0.5 ;
resize (gray, small, Size (), scale, scale);
CascadeClassifier face_cascade;
face_cascade . load ( "haarcascade_frontalface_default.xml" );
vector < Rect > faces;
face_cascade . detectMultiScale (small, faces, 1.1 , 5 );
// Scale back to original size
for ( size_t i = 0 ; i < faces . size (); i ++ ) {
faces [i]. x /= scale;
faces [i]. y /= scale;
faces [i]. width /= scale;
faces [i]. height /= scale;
rectangle (img, faces [i], Scalar ( 0 , 255 , 0 ), 2 );
}
Performance tips:
Process at lower resolution (0.5x or 0.25x scale)
Use histogram equalization on grayscale images
Adjust scaleFactor (larger = faster but less accurate)
Increase minNeighbors to reduce false positives
Set appropriate minSize to skip small detections
Next Steps