@@ -13,12 +13,14 @@ class ImageFeatureExtractor extends FeatureExtractor
1313{
1414 /**
1515 * The mean values for image normalization.
16+ *
1617 * @var int|int[]
1718 */
1819 protected int |array |null $ imageMean ;
1920
2021 /**
2122 * The standard deviation values for image normalization.
23+ *
2224 * @var int|int[]
2325 */
2426 protected int |array |null $ imageStd ;
@@ -30,24 +32,28 @@ class ImageFeatureExtractor extends FeatureExtractor
3032
3133 /**
3234 * Whether to rescale the image pixel values to the [0,1] range.
35+ *
3336 * @var bool
3437 */
3538 protected bool $ doRescale ;
3639
3740 /**
3841 * The factor to use for rescaling the image pixel values.
42+ *
3943 * @var float
4044 */
4145 protected float $ rescaleFactor ;
4246
4347 /**
4448 * Whether to normalize the image pixel values.
49+ *
4550 * @var ?bool
4651 */
4752 protected ?bool $ doNormalize ;
4853
4954 /**
5055 * Whether to resize the image.
56+ *
5157 * @var ?bool
5258 */
5359 protected ?bool $ doResize ;
@@ -56,6 +62,7 @@ class ImageFeatureExtractor extends FeatureExtractor
5662
5763 /**
5864 * The size to resize the image to.
65+ *
5966 * @var ?array
6067 */
6168 protected ?array $ size ;
@@ -100,7 +107,9 @@ public function __construct(public array $config)
100107
101108 /**
102109 * Crops the margin of the image. Gray pixels are considered margin (i.e., pixels with a value below the threshold).
110+ *
103111 * @param int $grayThreshold Value below which pixels are considered to be gray.
112+ *
104113 * @return static The cropped image.
105114 */
106115 public function cropMargin (Image $ image , int $ grayThreshold = 200 ): static
@@ -152,18 +161,20 @@ public function cropMargin(Image $image, int $grayThreshold = 200): static
152161
153162 /**
154163 * Pad the image by a certain amount.
164+ *
155165 * @param Tensor $imageTensor The pixel data to pad.
156166 * @param int[]|int $padSize The dimensions of the padded image.
157167 * @param string $mode The type of padding to add.
158168 * @param bool $center Whether to center the image.
159169 * @param int $constantValues The constant value to use for padding.
170+ *
160171 * @return Tensor The padded pixel data and image dimensions.
161172 * @throws \Exception
162173 */
163174 public function padImage (
164175 Tensor $ imageTensor ,
165176 int |array $ padSize ,
166- string $ tensorFormat = 'CHW ' , // 'HWC' or 'CHW
177+ string $ tensorFormat = 'CHW ' , // 'HWC' or 'CHW
167178 string $ mode = 'constant ' ,
168179 bool $ center = false ,
169180 int $ constantValues = 0
@@ -260,8 +271,10 @@ private function calculateReflectOffset(int $val, int $max): int
260271 /**
261272 * Find the target (width, height) dimension of the output image after
262273 * resizing given the input image and the desired size.
274+ *
263275 * @param Image $image The image to be resized.
264276 * @param int|array|null $size The size to use for resizing the image.
277+ *
265278 * @return array The target (width, height) dimension of the output image after resizing.
266279 */
267280 public function getResizeOutputImageSize (Image $ image , int |array |null $ size ): array
@@ -336,7 +349,7 @@ public function getResizeOutputImageSize(Image $image, int|array|null $size): ar
336349 } elseif ($ this ->sizeDivisibility != null ) {
337350 return $ this ->enforceSizeDivisibility ([$ srcWidth , $ srcHeight ], $ this ->sizeDivisibility );
338351 } else {
339- throw new \Exception ("Could not resize image due to unsupported 'size' parameter passed: " . json_encode ($ size ));
352+ throw new \Exception ("Could not resize image due to unsupported 'size' parameter passed: " . json_encode ($ size ));
340353 }
341354 }
342355
@@ -349,6 +362,7 @@ public function getResizeOutputImageSize(Image $image, int|array|null $size): ar
349362 * @param ?bool $doPad
350363 * @param ?bool $doConvertRGB
351364 * @param ?bool $doConvertGrayscale
365+ *
352366 * @return array The preprocessed image.
353367 * @throws \Exception
354368 */
@@ -412,7 +426,7 @@ public function preprocess(
412426 if ($ doNormalize ?? $ this ->doNormalize ) {
413427 if (is_array ($ this ->imageMean )) {
414428 // Negate the mean values to add instead of subtract
415- $ negatedMean = array_map (fn ($ mean ) => -$ mean , $ this ->imageMean );
429+ $ negatedMean = array_map (fn ($ mean ) => -$ mean , $ this ->imageMean );
416430 $ imageMean = Tensor::repeat ($ negatedMean , $ image ->height () * $ image ->width (), 1 );
417431 } else {
418432 $ imageMean = Tensor::fill ([$ image ->channels * $ image ->height () * $ image ->width ()], -$ this ->imageMean );
@@ -421,7 +435,7 @@ public function preprocess(
421435
422436 if (is_array ($ this ->imageStd )) {
423437 // Inverse the standard deviation values to multiple instead of divide
424- $ inversedStd = array_map (fn ($ std ) => 1 / $ std , $ this ->imageStd );
438+ $ inversedStd = array_map (fn ($ std ) => 1 / $ std , $ this ->imageStd );
425439 $ imageStd = Tensor::repeat ($ inversedStd , $ image ->height () * $ image ->width (), 1 );
426440 } else {
427441 $ imageStd = Tensor::fill ([$ image ->channels * $ image ->height () * $ image ->width ()], 1 / $ this ->imageStd );
@@ -433,7 +447,7 @@ public function preprocess(
433447 $ imageStd = $ imageStd ->reshape ($ imageTensor ->shape ());
434448
435449 if (count ($ imageMean ) !== $ image ->channels || count ($ imageStd ) !== $ image ->channels ) {
436- throw new \Exception ("When set to arrays, the length of `imageMean` ( " . count ($ imageMean ) . ") and `imageStd` ( " . count ($ imageStd ) . ") must match the number of channels in the image ( {$ image ->channels }). " );
450+ throw new \Exception ("When set to arrays, the length of `imageMean` ( " . count ($ imageMean ). ") and `imageStd` ( " . count ($ imageStd ). ") must match the number of channels in the image ( {$ image ->channels }). " );
437451 }
438452
439453 // Normalize pixel data
@@ -461,8 +475,10 @@ public function preprocess(
461475 * Calls the feature extraction process on an array of images,
462476 * preprocesses each image, and concatenates the resulting
463477 * features into a single Tensor.
478+ *
464479 * @param Image|Image[] $images The image(s) to extract features from.
465480 * @param mixed ...$args Additional arguments.
481+ *
466482 * @return array An object containing the concatenated pixel values (and other metadata) of the preprocessed images.
467483 */
468484 public function __invoke (Image |array $ images , ...$ args ): array
@@ -491,4 +507,48 @@ public function __invoke(Image|array $images, ...$args): array
491507 ];
492508 }
493509
510+ /**
511+ * Rounds the height and width down to the closest multiple of size_divisibility
512+ *
513+ * @param array{int, int} $size The size of the image
514+ * @param int $divisor The divisor to use.
515+ *
516+ * @return array{int, int} The rounded size.
517+ */
518+ private function enforceSizeDivisibility (array $ size , int $ divisor ): array
519+ {
520+ [$ width , $ height ] = $ size ;
521+
522+ $ newWidth = max (intdiv ($ width , $ divisor ), 1 ) * $ divisor ;
523+ $ newHeight = max (intdiv ($ height , $ divisor ), 1 ) * $ divisor ;
524+
525+ return [$ newWidth , $ newHeight ];
526+ }
527+
528+ /**
529+ * Constrain a value to be a multiple of a number.
530+ *
531+ * @param int $val The value to constrain.
532+ * @param int $multiple The number to constrain to.
533+ * @param int $minVal The minimum value to constrain to.
534+ * @param int|null $maxVal The maximum value to constrain to.
535+ *
536+ * @return int
537+ */
538+ private function constraintToMultipleOf (int $ val , int $ multiple , int $ minVal = 0 , ?int $ maxVal = null ): int
539+ {
540+ $ a = $ val / $ multiple ;
541+
542+ $ x = round ($ a , 0 , PHP_ROUND_HALF_EVEN ) * $ multiple ;
543+
544+ if ($ maxVal !== null && $ x > $ maxVal ) {
545+ $ x = floor ($ a ) * $ multiple ;
546+ }
547+
548+ if ($ x < $ minVal ) {
549+ $ x = ceil ($ a ) * $ multiple ;
550+ }
551+
552+ return $ x ;
553+ }
494554}
0 commit comments