-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathend.py
More file actions
63 lines (46 loc) · 1.79 KB
/
end.py
File metadata and controls
63 lines (46 loc) · 1.79 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
import cv2
import torch
import numpy as np
from transformers import AutoImageProcessor, VideoMAEForVideoClassification
np.random.seed(0)
def preprocess_frames(frames, image_processor):
inputs = image_processor(frames, return_tensors="pt")
return inputs
# Initialize the video capture object
cap = cv2.VideoCapture('http://192.168.1.9:8080/video')
# Set the frame size (optional)
cap.set(cv2.CAP_PROP_FRAME_WIDTH, 640)
cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 480)
image_processor = AutoImageProcessor.from_pretrained("archit11/videomae-base-finetuned-ucfcrime-full")
model = VideoMAEForVideoClassification.from_pretrained("archit11/videomae-base-finetuned-ucfcrime-full")
frame_buffer = []
buffer_size = 16
while True:
ret, frame = cap.read()
if not ret:
print("Failed to capture frame")
break
# Add the current frame to the buffer
frame_buffer.append(frame)
# Check if we have enough frames for inference
if len(frame_buffer) >= buffer_size:
# Preprocess the frames
inputs = preprocess_frames(frame_buffer, image_processor)
with torch.no_grad():
outputs = model(**inputs)
logits = outputs.logits
# model predicts one of the 13 ucf-crime classes
predicted_label = logits.argmax(-1).item()
prediction_text = model.config.id2label[predicted_label]
print(prediction_text)
# Clear the frame buffer and continue from the next frame
frame_buffer.clear()
# Display the prediction on the frame
cv2.putText(frame, prediction_text, (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
# Display the resulting frame
cv2.imshow('Video', frame)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
# Release everything when done
cap.release()
cv2.destroyAllWindows()