Skip to content

Commit 1187cf4

Browse files
committed
Batch size conf. with env. variable and debug logging
1 parent 644aede commit 1187cf4

File tree

1 file changed

+178
-80
lines changed

1 file changed

+178
-80
lines changed

src/native/src/frame_extractor.mm

Lines changed: 178 additions & 80 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,14 @@
33
#import <CoreMedia/CoreMedia.h>
44
#import <VideoToolbox/VideoToolbox.h>
55
#include "frame_extractor.h"
6+
#include <cstdlib>
7+
#include <iostream>
8+
9+
#define DEBUG_LOG(msg) do { \
10+
if (std::getenv("VITEO_DEBUG")) { \
11+
std::cerr << "[VITEO DEBUG] " << msg << std::endl; \
12+
} \
13+
} while(0)
614

715
namespace viteo {
816

@@ -21,20 +29,30 @@
2129
int64_t currentFrame = 0;
2230

2331
// Internal batch buffer for performance
24-
static constexpr size_t BATCH_SIZE = 16;
32+
size_t batch_size = 16;
2533
std::vector<uint8_t> batch_buffer;
2634
size_t batch_count = 0;
2735
size_t batch_index = 0;
2836

2937
bool isOpen = false;
3038

31-
Impl() {}
39+
Impl() {
40+
const char* batch_env = std::getenv("VITEO_BATCH_SIZE");
41+
if (batch_env) {
42+
int val = std::atoi(batch_env);
43+
if (val > 0 && val <= 256) {
44+
batch_size = val;
45+
DEBUG_LOG("Setting batch size to " << batch_size);
46+
}
47+
}
48+
}
3249

3350
~Impl() {
3451
close();
3552
// ARC handles cleanup automatically
3653
}
3754

55+
/// Releases all resources and resets state
3856
void close() {
3957
@autoreleasepool {
4058
if (reader) {
@@ -47,46 +65,116 @@ void close() {
4765
isOpen = false;
4866
currentFrame = 0;
4967
}
68+
DEBUG_LOG("Closed video resources");
5069
}
5170

52-
bool open(const std::string& path) {
53-
close();
71+
/// Loads asset from file path
72+
AVAsset* loadAsset(const std::string& path) {
73+
NSString* nsPath = [NSString stringWithUTF8String:path.c_str()];
74+
NSURL* url = [NSURL fileURLWithPath:nsPath];
75+
AVAsset* loadedAsset = [AVAsset assetWithURL:url];
5476

55-
@autoreleasepool {
56-
NSString* nsPath = [NSString stringWithUTF8String:path.c_str()];
57-
NSURL* url = [NSURL fileURLWithPath:nsPath];
77+
if (loadedAsset) {
78+
DEBUG_LOG("Loaded asset from: " << path);
79+
} else {
80+
DEBUG_LOG("Failed to load asset from: " << path);
81+
}
5882

59-
asset = [AVAsset assetWithURL:url];
60-
if (!asset) return false;
83+
return loadedAsset;
84+
}
85+
86+
/// Extracts video track from asset
87+
AVAssetTrack* extractVideoTrack(AVAsset* videoAsset) {
88+
#pragma clang diagnostic push
89+
#pragma clang diagnostic ignored "-Wdeprecated-declarations"
90+
NSArray* tracks = [videoAsset tracksWithMediaType:AVMediaTypeVideo];
91+
#pragma clang diagnostic pop
92+
93+
if (tracks.count == 0) {
94+
DEBUG_LOG("No video tracks found");
95+
return nil;
96+
}
97+
98+
DEBUG_LOG("Found " << tracks.count << " video track(s)");
99+
return tracks[0];
100+
}
61101

62-
#pragma clang diagnostic push
63-
#pragma clang diagnostic ignored "-Wdeprecated-declarations"
64-
NSArray* tracks = [asset tracksWithMediaType:AVMediaTypeVideo];
65-
#pragma clang diagnostic pop
102+
/// Caches video metadata from track
103+
void cacheMetadata(AVAssetTrack* track, AVAsset* videoAsset) {
104+
CGSize size = [track naturalSize];
105+
cachedWidth = static_cast<int>(size.width);
106+
cachedHeight = static_cast<int>(size.height);
107+
cachedFPS = [track nominalFrameRate];
108+
109+
CMTime duration = [videoAsset duration];
110+
cachedTotalFrames = static_cast<int64_t>(
111+
CMTimeGetSeconds(duration) * cachedFPS
112+
);
113+
114+
DEBUG_LOG("Video metadata: " << cachedWidth << "x" << cachedHeight
115+
<< " @ " << cachedFPS << " fps, "
116+
<< cachedTotalFrames << " total frames");
117+
}
66118

67-
if (tracks.count == 0) return false;
119+
/// Opens video file and initializes extraction
120+
bool open(const std::string& path) {
121+
close();
68122

69-
videoTrack = tracks[0];
123+
@autoreleasepool {
124+
asset = loadAsset(path);
125+
if (!asset) return false;
70126

71-
CGSize size = [videoTrack naturalSize];
72-
cachedWidth = static_cast<int>(size.width);
73-
cachedHeight = static_cast<int>(size.height);
74-
cachedFPS = [videoTrack nominalFrameRate];
127+
videoTrack = extractVideoTrack(asset);
128+
if (!videoTrack) return false;
75129

76-
CMTime duration = [asset duration];
77-
cachedTotalFrames = static_cast<int64_t>(
78-
CMTimeGetSeconds(duration) * cachedFPS
79-
);
130+
cacheMetadata(videoTrack, asset);
80131

81132
// Allocate batch buffer
82133
size_t frame_size = cachedWidth * cachedHeight * 4;
83-
batch_buffer.resize(BATCH_SIZE * frame_size);
134+
batch_buffer.resize(batch_size * frame_size);
135+
DEBUG_LOG("Allocated batch buffer for " << batch_size << " frames");
84136

85137
isOpen = true;
86138
return setupReader(0);
87139
}
88140
}
89141

142+
/// Creates output settings dictionary for hardware accelerated decoding
143+
NSDictionary* createOutputSettings() {
144+
return @{
145+
(id)kCVPixelBufferPixelFormatTypeKey: @(kCVPixelFormatType_32BGRA),
146+
(id)kCVPixelBufferMetalCompatibilityKey: @YES,
147+
(id)kCVPixelBufferIOSurfacePropertiesKey: @{},
148+
AVVideoDecompressionPropertiesKey: @{
149+
(id)kVTDecompressionPropertyKey_UsingHardwareAcceleratedVideoDecoder: @YES,
150+
(id)kVTDecompressionPropertyKey_PropagatePerFrameHDRDisplayMetadata: @NO,
151+
},
152+
};
153+
}
154+
155+
/// Configures track output for optimal performance
156+
AVAssetReaderTrackOutput* createTrackOutput(AVAssetTrack* track, NSDictionary* settings) {
157+
AVAssetReaderTrackOutput* trackOutput = [[AVAssetReaderTrackOutput alloc]
158+
initWithTrack:track outputSettings:settings];
159+
160+
trackOutput.alwaysCopiesSampleData = NO;
161+
trackOutput.supportsRandomAccess = YES;
162+
163+
DEBUG_LOG("Created track output with hardware acceleration");
164+
return trackOutput;
165+
}
166+
167+
/// Applies time range for seeking to specific frame
168+
void applyTimeRange(AVAssetReader* videoReader, int64_t startFrame) {
169+
if (startFrame > 0) {
170+
CMTime startTime = CMTimeMake(startFrame, cachedFPS);
171+
CMTime duration = CMTimeSubtract([asset duration], startTime);
172+
videoReader.timeRange = CMTimeRangeMake(startTime, duration);
173+
DEBUG_LOG("Seeking to frame " << startFrame);
174+
}
175+
}
176+
177+
/// Initializes reader for frame extraction
90178
bool setupReader(int64_t startFrame) {
91179
@autoreleasepool {
92180
if (reader) {
@@ -97,43 +185,26 @@ bool setupReader(int64_t startFrame) {
97185

98186
NSError* error = nil;
99187
reader = [[AVAssetReader alloc] initWithAsset:asset error:&error];
100-
if (error || !reader) return false;
101-
102-
// Configure for maximum performance with BGRA output
103-
NSDictionary* outputSettings = @{
104-
(id)kCVPixelBufferPixelFormatTypeKey: @(kCVPixelFormatType_32BGRA),
105-
(id)kCVPixelBufferMetalCompatibilityKey: @YES,
106-
(id)kCVPixelBufferIOSurfacePropertiesKey: @{},
107-
// Add VideoToolbox hardware acceleration hints
108-
AVVideoDecompressionPropertiesKey: @{
109-
(id)kVTDecompressionPropertyKey_UsingHardwareAcceleratedVideoDecoder: @YES,
110-
(id)kVTDecompressionPropertyKey_PropagatePerFrameHDRDisplayMetadata: @NO,
111-
},
112-
};
113-
114-
output = [[AVAssetReaderTrackOutput alloc]
115-
initWithTrack:videoTrack outputSettings:outputSettings];
116-
117-
// Critical performance settings
118-
output.alwaysCopiesSampleData = NO; // Avoid unnecessary copies
119-
output.supportsRandomAccess = YES; // Enable seeking
188+
if (error || !reader) {
189+
DEBUG_LOG("Failed to create reader: " << (error ? [[error localizedDescription] UTF8String] : "unknown error"));
190+
return false;
191+
}
192+
193+
NSDictionary* outputSettings = createOutputSettings();
194+
output = createTrackOutput(videoTrack, outputSettings);
120195

121196
if (![reader canAddOutput:output]) {
197+
DEBUG_LOG("Cannot add output to reader");
122198
reader = nil;
123199
output = nil;
124200
return false;
125201
}
126202

127203
[reader addOutput:output];
128-
129-
// Set time range if seeking
130-
if (startFrame > 0) {
131-
CMTime startTime = CMTimeMake(startFrame, cachedFPS);
132-
CMTime duration = CMTimeSubtract([asset duration], startTime);
133-
reader.timeRange = CMTimeRangeMake(startTime, duration);
134-
}
204+
applyTimeRange(reader, startFrame);
135205

136206
if (![reader startReading]) {
207+
DEBUG_LOG("Failed to start reading");
137208
reader = nil;
138209
output = nil;
139210
return false;
@@ -142,10 +213,47 @@ bool setupReader(int64_t startFrame) {
142213
currentFrame = startFrame;
143214
batch_count = 0;
144215
batch_index = 0;
216+
DEBUG_LOG("Reader initialized successfully");
145217
return true;
146218
}
147219
}
148220

221+
/// Copies frame from pixel buffer to destination
222+
void copyFrameData(CVImageBufferRef imageBuffer, uint8_t* dst) {
223+
uint8_t* src = (uint8_t*)CVPixelBufferGetBaseAddress(imageBuffer);
224+
size_t bytesPerRow = CVPixelBufferGetBytesPerRow(imageBuffer);
225+
size_t data_width = cachedWidth * 4;
226+
size_t data_size = cachedHeight * data_width;
227+
228+
if (bytesPerRow == data_width) {
229+
memcpy(dst, src, data_size);
230+
} else {
231+
232+
for (int y = 0; y < cachedHeight; y++) {
233+
memcpy(dst + y * data_width,
234+
src + y * bytesPerRow,
235+
data_width);
236+
}
237+
}
238+
}
239+
240+
/// Processes single sample buffer and adds to batch
241+
bool processSampleBuffer(CMSampleBufferRef sampleBuffer, size_t frame_size) {
242+
CVImageBufferRef imageBuffer = CMSampleBufferGetImageBuffer(sampleBuffer);
243+
if (!imageBuffer) return false;
244+
245+
CVPixelBufferLockBaseAddress(imageBuffer, kCVPixelBufferLock_ReadOnly);
246+
247+
uint8_t* dst = batch_buffer.data() + (batch_count * frame_size);
248+
copyFrameData(imageBuffer, dst);
249+
250+
CVPixelBufferUnlockBaseAddress(imageBuffer, kCVPixelBufferLock_ReadOnly);
251+
batch_count++;
252+
currentFrame++;
253+
254+
return true;
255+
}
256+
149257
/// Load next batch of frames into internal buffer
150258
void loadBatch() {
151259
if (!reader || !output || !isOpen) {
@@ -157,51 +265,39 @@ void loadBatch() {
157265
batch_count = 0;
158266

159267
@autoreleasepool {
160-
while (batch_count < BATCH_SIZE) {
161-
if (reader.status != AVAssetReaderStatusReading) break;
268+
while (batch_count < batch_size) {
269+
if (reader.status != AVAssetReaderStatusReading) {
270+
DEBUG_LOG("Reader stopped, loaded " << batch_count << " frames");
271+
break;
272+
}
162273

163274
CMSampleBufferRef sampleBuffer = [output copyNextSampleBuffer];
164-
if (!sampleBuffer) break;
165-
166-
CVImageBufferRef imageBuffer = CMSampleBufferGetImageBuffer(sampleBuffer);
167-
if (imageBuffer) {
168-
CVPixelBufferLockBaseAddress(imageBuffer, kCVPixelBufferLock_ReadOnly);
169-
170-
uint8_t* src = (uint8_t*)CVPixelBufferGetBaseAddress(imageBuffer);
171-
size_t bytesPerRow = CVPixelBufferGetBytesPerRow(imageBuffer);
172-
uint8_t* dst = batch_buffer.data() + (batch_count * frame_size);
173-
174-
if (bytesPerRow == cachedWidth * 4) {
175-
memcpy(dst, src, frame_size);
176-
} else {
177-
size_t copy_width = cachedWidth * 4;
178-
for (int y = 0; y < cachedHeight; y++) {
179-
memcpy(dst + y * copy_width,
180-
src + y * bytesPerRow,
181-
copy_width);
182-
}
183-
}
184-
185-
CVPixelBufferUnlockBaseAddress(imageBuffer, kCVPixelBufferLock_ReadOnly);
186-
batch_count++;
187-
currentFrame++;
275+
if (!sampleBuffer) {
276+
DEBUG_LOG("No more sample buffers, loaded " << batch_count << " frames");
277+
break;
188278
}
189279

280+
processSampleBuffer(sampleBuffer, frame_size);
190281
CFRelease(sampleBuffer);
191282
}
192283
}
193284

194285
batch_index = 0;
286+
if (batch_count > 0) {
287+
DEBUG_LOG("Loaded batch of " << batch_count << " frames");
288+
}
195289
}
196290

197-
/// Get pointer to next frame from batch
291+
/// Returns pointer to next frame from batch
198292
uint8_t* nextFrame() {
199293
if (!isOpen) return nullptr;
200294

201-
// Load new batch if needed
202295
if (batch_index >= batch_count) {
203296
loadBatch();
204-
if (batch_count == 0) return nullptr;
297+
if (batch_count == 0) {
298+
DEBUG_LOG("No more frames available");
299+
return nullptr;
300+
}
205301
}
206302

207303
size_t frame_size = cachedWidth * cachedHeight * 4;
@@ -210,8 +306,10 @@ void loadBatch() {
210306
return frame_ptr;
211307
}
212308

309+
/// Resets reader to specified frame index
213310
void reset(int64_t frameIndex) {
214311
if (!isOpen) return;
312+
DEBUG_LOG("Resetting to frame " << frameIndex);
215313
setupReader(frameIndex);
216314
}
217315
};

0 commit comments

Comments
 (0)