-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathTTS.py
More file actions
174 lines (133 loc) · 5.98 KB
/
TTS.py
File metadata and controls
174 lines (133 loc) · 5.98 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
import cv2
import time
import urllib.request
import numpy as np
# --- Configuration ---
# IMPORTANT: Replace with the IP address shown in the ESP32 Serial Monitor
ESP32_CAM_IP = "esp32camurl" # <--- CHANGE ME
STREAM_URL = f'http://{ESP32_CAM_IP}/stream'
# --- YOLO Model Files (YOLOv3-Tiny is recommended for Raspberry Pi performance) ---
# You need to download these files and place them in the same directory as this script.
# Search online for "YOLOv3-tiny" configuration files.
CONFIG_FILE = "yolov3-tiny.cfg"
WEIGHTS_FILE = "yolov3-tiny.weights"
NAMES_FILE = "coco.names"
# --- Constants ---
CONFIDENCE_THRESHOLD = 0.5
NMS_THRESHOLD = 0.3 # Non-Maximum Suppression threshold
INPUT_SIZE = 320 # Smaller size for faster inference on Pi (320x320 is good for tiny models)
def initialize_yolo_model(config_path, weights_path, names_path):
"""Loads the YOLO model and class names."""
try:
# Load class names
with open(names_path, 'r') as f:
classes = [line.strip() for line in f.readlines()]
# Load the network using OpenCV's DNN module
net = cv2.dnn.readNetFromDarknet(config_path, weights_path)
# Determine the output layer names
layer_names = net.getLayerNames()
output_layers = [layer_names[i - 1] for i in net.getUnconnectedOutLayers()]
# Set target backend and target device for Raspberry Pi CPU optimization
net.setPreferableBackend(cv2.dnn.DNN_BACKEND_OPENCV)
net.setPreferableTarget(cv2.dnn.DNN_TARGET_CPU)
print("YOLO model loaded successfully. Ready for detection.")
return net, classes, output_layers
except Exception as e:
print(f"Error loading YOLO files. Check paths and file integrity: {e}")
print("Please ensure you have the following files in the script directory:")
print(f" - {CONFIG_FILE}")
print(f" - {WEIGHTS_FILE}")
print(f" - {NAMES_FILE}")
return None, None, None
def process_frame(net, classes, output_layers, frame):
"""Runs object detection on a single frame."""
height, width, _ = frame.shape
# Create blob from the frame and set it as input for the network
blob = cv2.dnn.blobFromImage(frame, 1/255.0, (INPUT_SIZE, INPUT_SIZE), swapRB=True, crop=False)
net.setInput(blob)
# Run forward pass (inference)
layer_outputs = net.forward(output_layers)
# Initialize lists for detected bounding boxes, confidences, and class IDs
boxes = []
confidences = []
class_ids = []
# Process each output layer
for output in layer_outputs:
for detection in output:
# The first 5 values are center_x, center_y, width, height, and object confidence
scores = detection[5:]
class_id = np.argmax(scores)
confidence = scores[class_id]
if confidence > CONFIDENCE_THRESHOLD:
# YOLO output is normalized to the input size
center_x = int(detection[0] * width)
center_y = int(detection[1] * height)
w = int(detection[2] * width)
h = int(detection[3] * height)
# Rectangle coordinates
x = int(center_x - w / 2)
y = int(center_y - h / 2)
boxes.append([x, y, w, h])
confidences.append(float(confidence))
class_ids.append(class_id)
# Apply Non-Maximum Suppression (NMS) to remove overlapping redundant boxes
indexes = cv2.dnn.NMSBoxes(boxes, confidences, CONFIDENCE_THRESHOLD, NMS_THRESHOLD)
# Draw bounding boxes and labels
for i in range(len(boxes)):
if i in indexes:
x, y, w, h = boxes[i]
label = str(classes[class_ids[i]])
confidence = confidences[i]
color = (0, 255, 0) # Green box
# Draw rectangle
cv2.rectangle(frame, (x, y), (x + w, y + h), color, 2)
# Draw label and confidence
text = f"{label}: {confidence:.2f}"
cv2.putText(frame, text, (x, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)
return frame
def run_detector():
"""Main function to capture the stream and run detection."""
# 1. Initialize YOLO
net, classes, output_layers = initialize_yolo_model(CONFIG_FILE, WEIGHTS_FILE, NAMES_FILE)
if net is None:
return
# 2. Initialize Video Stream Capture
cap = cv2.VideoCapture(STREAM_URL)
# Check if stream opened successfully
if not cap.isOpened():
print(f"Error: Could not connect to ESP32 stream at {STREAM_URL}")
return
print("Successfully connected to ESP32 stream. Starting loop...")
frame_count = 0
start_time = time.time()
try:
while True:
# Read a frame from the video stream
ret, frame = cap.read()
if not ret:
print("Stream ended or frame could not be read. Attempting reconnect...")
cap = cv2.VideoCapture(STREAM_URL) # Attempt to reconnect
time.sleep(1)
continue
# Run detection on the captured frame
processed_frame = process_frame(net, classes, output_layers, frame)
# Display the resulting frame
cv2.imshow("ESP32-CAM YOLO Detection", processed_frame)
# Calculate FPS
frame_count += 1
if frame_count % 10 == 0:
end_time = time.time()
fps = 10 / (end_time - start_time)
print(f"FPS: {fps:.2f}")
start_time = time.time()
# Exit on 'q' press
if cv2.waitKey(1) & 0xFF == ord('q'):
break
except KeyboardInterrupt:
print("\nStopping detection loop...")
finally:
# Release the capture and destroy all windows
cap.release()
cv2.destroyAllWindows()
if __name__ == "__main__":
run_detector()