Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
57 commits
Select commit Hold shift + click to select a range
5935e2c
docs: tutorials for all current e2e models
TranHuuNhatHuy Feb 16, 2026
821eb03
docs: basic tutorial structure
TranHuuNhatHuy Feb 16, 2026
e0bf1a2
docs: done I.1: Introducing Autoware VisionPilot and its End-to-End M…
TranHuuNhatHuy Feb 16, 2026
cf2da6a
docs: done I.2: Download Autoware VisionPilot
TranHuuNhatHuy Feb 16, 2026
cc24591
docs: done I.3: Environment Setup
TranHuuNhatHuy Feb 16, 2026
82cd3ca
refactor the tutorial to match Zain ideas
TranHuuNhatHuy Feb 23, 2026
c6bcbff
done II.1.a EgoLanes brief introduction
TranHuuNhatHuy Feb 23, 2026
16b0d00
massive refactoring of tutorial narratives
TranHuuNhatHuy Feb 25, 2026
0c9de13
finalize model downloading and env installments
TranHuuNhatHuy Feb 25, 2026
482dfbf
update requirements.txt to accommodate for the newer Python versions
TranHuuNhatHuy Feb 26, 2026
26a5aca
determined the path convention of this tutorial => need to do proper …
TranHuuNhatHuy Feb 26, 2026
28a5227
add init to Models/inference to expose all model inference classes
TranHuuNhatHuy Feb 26, 2026
98c7316
add all inits to Models/visualizations and each of model subdirs
TranHuuNhatHuy Feb 26, 2026
8d326c6
remove all init temporarily
TranHuuNhatHuy Feb 27, 2026
be5c39c
resolve all possible path conflicts and ill-logics
TranHuuNhatHuy Feb 27, 2026
00281b4
add test images for egolanes as tutorial assets (totalling 3.0 MB sho…
TranHuuNhatHuy Feb 27, 2026
e68944e
more python package fix for requirements.txt
TranHuuNhatHuy Feb 27, 2026
f601fca
done Image Inference
TranHuuNhatHuy Feb 27, 2026
de64c8e
refactor assets, add 42 MB video
TranHuuNhatHuy Feb 27, 2026
6c95bc5
make sure the dirpath is automatically created during video gen in vi…
TranHuuNhatHuy Feb 27, 2026
089cfc1
finish EgoLanes Quick Inference
TranHuuNhatHuy Feb 27, 2026
0bfc3da
finish Model Training - how to prepare 5 opensauce datasets
TranHuuNhatHuy Feb 27, 2026
5d8d4a3
finish Model Training - pretrain or vanilla load
TranHuuNhatHuy Feb 27, 2026
d9f4978
finish Model Training - how to load data
TranHuuNhatHuy Feb 27, 2026
75d6c8b
finish Model Training - how to run training
TranHuuNhatHuy Feb 27, 2026
48fe107
finish Model Training - how to visualize results
TranHuuNhatHuy Feb 27, 2026
c4e4e5f
finalizing EgoLAnes tutorials
TranHuuNhatHuy Feb 27, 2026
8a37393
update autospeed image visualization script to allow for batch proces…
TranHuuNhatHuy Mar 1, 2026
f56cdc6
revamp autospeed video vis
TranHuuNhatHuy Mar 1, 2026
4306ebc
init AutoSpeed tutorial notebook
TranHuuNhatHuy Mar 1, 2026
58ea770
init structure for AutoSpeed tutorial notebook
TranHuuNhatHuy Mar 1, 2026
8e09abd
init SceneSeg tutorial notebook
TranHuuNhatHuy Mar 1, 2026
4cd92db
init Scene3D tutorial notebook
TranHuuNhatHuy Mar 1, 2026
2d09ed4
init DomainSeg tutorial notebook
TranHuuNhatHuy Mar 1, 2026
ad9ee69
done autospeed I.1. Model intro
TranHuuNhatHuy Mar 1, 2026
2700caf
done autospeed I.2. Env setup
TranHuuNhatHuy Mar 1, 2026
9a77457
done autospeed I.3. Model download
TranHuuNhatHuy Mar 1, 2026
802c2ae
edit autospeed inference pipeline to avoid hard-coded best.pt weight …
TranHuuNhatHuy Mar 1, 2026
d1e7d3e
done II. Quick Inference
TranHuuNhatHuy Mar 1, 2026
718ed6a
done III thus wrapping up AutoSpeed
TranHuuNhatHuy Mar 1, 2026
42accdf
added DomainSeg model intro
TranHuuNhatHuy Mar 1, 2026
ec4b058
done getting started for domainseg
TranHuuNhatHuy Mar 2, 2026
5f85a66
rough revamp of image vis, DomainSeg
TranHuuNhatHuy Mar 2, 2026
26b88c5
final revamp of DomainSeg img vis
TranHuuNhatHuy Mar 2, 2026
8579f44
finished revamp video vis DomainSeg
TranHuuNhatHuy Mar 2, 2026
7c61763
done DomainSeg quick inference part
TranHuuNhatHuy Mar 2, 2026
b4f8c6f
finished DomainSeg tutorial
TranHuuNhatHuy Mar 2, 2026
c861742
done scene3D getting started
TranHuuNhatHuy Mar 2, 2026
4889084
done revamping scene3D img vis
TranHuuNhatHuy Mar 2, 2026
1f6af32
done revamping scene3D img vis
TranHuuNhatHuy Mar 2, 2026
82ff5d3
done quick inference for scene 3D tutuorials
TranHuuNhatHuy Mar 2, 2026
1d2300e
done Scene3D tutorials
TranHuuNhatHuy Mar 2, 2026
9371a87
done sceneseg intro
TranHuuNhatHuy Mar 2, 2026
b70a7ce
revamped sceneseg img and video vis scripts
TranHuuNhatHuy Mar 2, 2026
a72a519
finish quick inference of SceneSeg
TranHuuNhatHuy Mar 2, 2026
9101cb8
finished SceneSeg toturials
TranHuuNhatHuy Mar 2, 2026
063103d
beautify README
TranHuuNhatHuy Mar 2, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -219,4 +219,5 @@ __marimo__/
# VisionPilot specific
3rdparty/
trt_cache/
assets/
Tutorials/E2E_Models/autoware_vision_pilot/
Tutorials/E2E_Models/weights/
2 changes: 1 addition & 1 deletion Models/inference/auto_speed_infer.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
# print(f'Using {self.device} for inference')

# Load model
self.model = torch.load(checkpoint_path + "/best.pt", map_location="cpu", weights_only=False)['model']
self.model = torch.load(checkpoint_path, map_location="cpu", weights_only=False)['model']
self.model = self.model.to(self.device).eval()

def resize_letterbox(self, img: Image.Image):
Expand Down Expand Up @@ -42,7 +42,7 @@
tensor = transforms.ToTensor()(img).to(self.device).half()
return tensor.unsqueeze(0), scale, pad_x, pad_y

def xywh2xyxy(self, x):

Check warning on line 45 in Models/inference/auto_speed_infer.py

View workflow job for this annotation

GitHub Actions / spell-check-differential

Unknown word (xyxy)

Check warning on line 45 in Models/inference/auto_speed_infer.py

View workflow job for this annotation

GitHub Actions / spell-check-differential

Unknown word (xywh)
"""Convert [cx, cy, w, h] to [x1, y1, x2, y2]"""
y = x.clone()
y[:, 0] = x[:, 0] - x[:, 2] / 2 # x1
Expand Down Expand Up @@ -71,11 +71,11 @@
if mask.sum() == 0:
return torch.empty(0, 6)

# --- convert to xyxy before NMS ---

Check warning on line 74 in Models/inference/auto_speed_infer.py

View workflow job for this annotation

GitHub Actions / spell-check-differential

Unknown word (xyxy)
boxes_xyxy = self.xywh2xyxy(boxes[mask])

Check warning on line 75 in Models/inference/auto_speed_infer.py

View workflow job for this annotation

GitHub Actions / spell-check-differential

Unknown word (xyxy)

Check warning on line 75 in Models/inference/auto_speed_infer.py

View workflow job for this annotation

GitHub Actions / spell-check-differential

Unknown word (xywh)

Check warning on line 75 in Models/inference/auto_speed_infer.py

View workflow job for this annotation

GitHub Actions / spell-check-differential

Unknown word (xyxy)

combined = torch.cat([
boxes_xyxy,

Check warning on line 78 in Models/inference/auto_speed_infer.py

View workflow job for this annotation

GitHub Actions / spell-check-differential

Unknown word (xyxy)
scores[mask].unsqueeze(1),
class_ids[mask].float().unsqueeze(1)
], dim=1)
Expand All @@ -97,7 +97,7 @@
if predictions.numel() == 0:
return []

# --- adjust from letterboxed to original coords ---

Check warning on line 100 in Models/inference/auto_speed_infer.py

View workflow job for this annotation

GitHub Actions / spell-check-differential

Unknown word (letterboxed)
predictions[:, [0, 2]] = (predictions[:, [0, 2]] - pad_x) / scale
predictions[:, [1, 3]] = (predictions[:, [1, 3]] - pad_y) / scale

Expand Down
33 changes: 16 additions & 17 deletions Models/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,17 +1,16 @@
albumentations==1.4.18
cmapy==0.6.6
matplotlib==3.5.3
numpy==2.2.5
onnx==1.17.0
onnxruntime==1.21.0
opencv_contrib_python==4.10.0.84
opencv_python==4.10.0.84
opencv_python_headless==4.11.0.86
Pillow==11.3.0
pytorch_model_summary==0.1.2
thop==0.1.1.post2209072238
torch==2.7.0
torchvision==0.22.0
tensorboard==2.20.0
tensorboard-data-server==0.7.2

albumentations>=1.4.18

Check warning on line 1 in Models/requirements.txt

View workflow job for this annotation

GitHub Actions / spell-check-differential

Unknown word (albumentations)
cmapy>=0.6.6

Check warning on line 2 in Models/requirements.txt

View workflow job for this annotation

GitHub Actions / spell-check-differential

Unknown word (cmapy)
matplotlib>=3.8.0
numpy>=1.21.0,<2.0.0
onnx>=1.17.0
onnxruntime>=1.21.0
opencv-contrib-python>=4.10.0.84
opencv-python>=4.10.0.84
opencv-python-headless>=4.11.0.86
Pillow>=11.3.0
pytorch-model-summary>=0.1.2
thop>=0.1.1.post2209072238
torch>=2.7.0
torchvision>=0.22.0
tensorboard>=2.20.0
tensorboard-data-server>=0.7.2
97 changes: 79 additions & 18 deletions Models/visualizations/AutoSpeed/image_visualization.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,19 @@
from argparse import ArgumentParser
import os
import cv2
from PIL import Image
from argparse import ArgumentParser

from Models.inference.auto_speed_infer import AutoSpeedNetworkInfer

color_map = {
1: (0, 0, 255), # red
2: (0, 255, 255), # yellow
3: (255, 255, 0) # cyan
color_map = { # BGR
1: (0, 0, 255), # Red
2: (0, 255, 255), # Yellow
3: (255, 255, 0) # Cyan
}


def make_visualization(prediction, input_image_filepath):

img_cv = cv2.imread(input_image_filepath)
for pred in prediction:
x1, y1, x2, y2, conf, cls = pred
Expand All @@ -19,27 +22,85 @@ def make_visualization(prediction, input_image_filepath):
color = color_map.get(int(cls), (255, 255, 255))

x1, y1, x2, y2 = map(int, [x1, y1, x2, y2])
# cv2.rectangle(img_cv, (x1, y1), (x2, y2), (0, 255, 0), 2)
cv2.rectangle(img_cv, (x1, y1), (x2, y2), color, 2)

# Uncomment this if wanna show classes
# label = f"Class: {int(cls)} | Score: {conf:.2f}"
# cv2.putText(img_cv, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2)

cv2.imshow('Prediction Objects', img_cv)
cv2.waitKey(0)
# Tran: let's not show imgs, instead saving em in batch.
# cv2.imshow("Prediction Objects", img_cv)
# cv2.waitKey(0)
return Image.fromarray(cv2.cvtColor(img_cv, cv2.COLOR_BGR2RGB))


if __name__ == "__main__":
def main():

parser = ArgumentParser()
parser.add_argument("-p", "--model_checkpoint_path", dest="model_checkpoint_path",
help="path to pytorch checkpoint file to load model dict")
parser.add_argument("-i", "--input_image_filepath", dest="input_image_filepath",
help="path to input image which will be processed by DomainSeg")

parser.add_argument(
"-p",
"--model_checkpoint_path",
dest = "model_checkpoint_path",
help = "Path to Pytorch checkpoint file to load model dict"
)
parser.add_argument(
"-i",
"--input_image_dirpath",
dest = "input_image_dirpath",
help = "Path to input image directory which will be processed by AutoSpeed"
)
parser.add_argument(
"-o",
"--output_image_dirpath",
dest = "output_image_dirpath",
help = "Path to output image directory where visualizations will be saved",
required = True
)

args = parser.parse_args()
model_checkpoint_path = args.model_checkpoint_path
input_image_filepath = args.input_image_filepath

# Arranging I/O dirs
input_image_dirpath = args.input_image_dirpath
output_image_dirpath = args.output_image_dirpath
if (not os.path.exists(output_image_dirpath)):
os.makedirs(output_image_dirpath)

# Model checkpoint path
model_checkpoint_path = args.model_checkpoint_path
model = AutoSpeedNetworkInfer(model_checkpoint_path)
img = Image.open(input_image_filepath).convert("RGB")

prediction = model.inference(img)
make_visualization(prediction, input_image_filepath)
# Process through input image dir
for filename in sorted(os.listdir(input_image_dirpath)):
if (filename.endswith((".png", ".jpg", ".jpeg"))):

# Fetch image
input_image_filepath = os.path.join(
input_image_dirpath, filename
)
img_id = filename.split(".")[0].zfill(3)
print(f"Reading Image: {input_image_filepath}")

# Inference
img = Image.open(input_image_filepath).convert("RGB")
prediction = model.inference(img)

# Visualization
vis_image = make_visualization(
prediction,
input_image_filepath
)

output_image_filepath = os.path.join(
output_image_dirpath,
f"{img_id}_data.png"
)
vis_image.save(output_image_filepath)

else:
print(f"Skipping non-image file: {filename}")
continue


if __name__ == "__main__":
main()
86 changes: 57 additions & 29 deletions Models/visualizations/AutoSpeed/video_visualization.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,10 @@
sys.path.append('../../..')
from Models.inference.auto_speed_infer import AutoSpeedNetworkInfer

color_map = {
1: (0, 0, 255), # red
2: (0, 255, 255), # yellow
3: (255, 255, 0) # cyan
color_map = { # Colors (BGR)
1: (0, 0, 255), # Red
2: (0, 255, 255), # Yellow
3: (255, 255, 0) # Cyan
}


Expand Down Expand Up @@ -129,28 +129,48 @@ def make_visualization(prediction, image):


def main():

parser = ArgumentParser()
parser.add_argument("-p", "--model_checkpoint_path", dest="model_checkpoint_path",
help="path to pytorch checkpoint (.pt) or ONNX model (.onnx)")
parser.add_argument("-i", "--video_filepath", dest="video_filepath",
help="path to input video which will be processed by AutoSpeed")
parser.add_argument("-o", "--output_file", dest="output_file",
help="path to output video visualization file, must include output file name")
parser.add_argument('-v', "--vis", action='store_true', default=False,
help="flag for whether to show frame by frame visualization while processing is occuring")

parser.add_argument(
"-p",
"--model_checkpoint_path",
dest = "model_checkpoint_path",
help = "Path to Pytorch checkpoint (.pth) or ONNX model (.onnx)."
)
parser.add_argument(
"-i",
"--video_filepath",
dest = "video_filepath",
help = "Path to input video which will be processed by AutoSpeed.")
parser.add_argument(
"-o",
"--output_file",
dest = "output_file",
help = "Path to output video visualization file, must include output file name.")
parser.add_argument(
"-v",
"--vis",
action = "store_true",
default = False,
help = "Flag for whether to show frame by frame visualization while processing is occuring."
)
args = parser.parse_args()

# Detect model type and load
model_path = args.model_checkpoint_path

if model_path.endswith('.onnx'):
print('Loading ONNX model...')
model = AutoSpeedONNXInfer(onnx_path=model_path)
print('ONNX Model Loaded')
elif model_path.endswith('.pt') or os.path.isdir(model_path):
print('Loading PyTorch model...')
model = AutoSpeedNetworkInfer(checkpoint_path=model_path)
print('PyTorch Model Loaded')
if model_path.endswith(".onnx"):
print("Loading ONNX model...")
model = AutoSpeedONNXInfer(onnx_path = model_path)
print("ONNX model loaded.")
elif (
(model_path.endswith(".pth")) or
(os.path.isdir(model_path))
):
print("Loading PyTorch model...")
model = AutoSpeedNetworkInfer(checkpoint_path = model_path)
print("PyTorch model loaded.")
else:
raise ValueError(f"Unsupported model format: {model_path}. Use .pt or .onnx")

Expand All @@ -160,15 +180,19 @@ def main():
cap = cv2.VideoCapture(video_filepath)

# Output filepath
output_filepath_obj = args.output_file + '.avi'
output_filepath_obj = args.output_file + ".avi"

fps = cap.get(cv2.CAP_PROP_FPS)
frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

# Video writer object
writer_obj = cv2.VideoWriter(output_filepath_obj,
cv2.VideoWriter_fourcc(*"MJPG"), fps, (frame_width, frame_height))
writer_obj = cv2.VideoWriter(
output_filepath_obj,
cv2.VideoWriter_fourcc(*"MJPG"),
fps,
(frame_width, frame_height)
)

# Check if video catpure opened successfully
if (cap.isOpened() == False):
Expand All @@ -177,8 +201,9 @@ def main():
print('Reading video frames')

# Read until video is completed
print('Processing started')
print("Processing started...")
while (cap.isOpened()):

# Capture frame-by-frame
ret, frame = cap.read()
if ret == True:
Expand All @@ -197,15 +222,18 @@ def main():
display_w = 960
h, w, _ = vis_obj.shape
display_h = int(h * (display_w / w))
vis_display = cv2.resize(vis_obj, (display_w, display_h))
cv2.imshow('Prediction Objects', vis_display)
vis_display = cv2.resize(
vis_obj,
(display_w, display_h)
)
cv2.imshow("Prediction Objects", vis_display)
cv2.waitKey(10)

# Writing to video frame
writer_obj.write(vis_obj)

else:
print('Frame not read - ending processing')
print("Frame not read - ending processing...")
break

# When everything done, release the video capture and writer objects
Expand All @@ -214,9 +242,9 @@ def main():

# Closes all the frames
cv2.destroyAllWindows()
print('Completed')
print("Completed.")


if __name__ == '__main__':
if __name__ == "__main__":
main()
# %%
Loading
Loading