Skip to content

Commit 6547841

Browse files
Add PSR-3 compliant logging interface and default StreamLogger (#75)
* feat: implement configurable logging interface with PSR-3 support * feat: rename StdoutLogger to StreamLogger and add tests for the StreamLogger * fix: Possible null reference for logger in helpers * feat: Update docs on logger * feat: Update the StreamLogger to work with multiple streams
1 parent 920ae45 commit 6547841

File tree

14 files changed

+460
-36
lines changed

14 files changed

+460
-36
lines changed

README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -152,7 +152,7 @@ Transformers::setup()
152152
->setAuthToken('...') // Set the auth token for downloading models. Defaults to `null`
153153
->setUserAgent('...') // Set the user agent for downloading models. Defaults to `transformers-php/{version}`
154154
->setImageDriver('...') // Set the image driver for processing images. Defaults to `IMAGICK'
155-
->apply(); // Apply the configuration
155+
->setLogger('...'); // Set the logger for TransformersPHP. Defaults to `null`
156156
```
157157

158158
You can call the `set` methods in any order, or leave any out entirely, in which case, it uses the default values. For
@@ -399,4 +399,4 @@ This package is a WIP, but here's a list of tasks and architectures currently te
399399
1. **[YOLOS](https://huggingface.co/docs/transformers/model_doc/yolos)** (from Huazhong University of Science &
400400
Technology) released with the
401401
paper [You Only Look at One Sequence: Rethinking Transformer in Vision through Object Detection](https://arxiv.org/abs/2106.00666)
402-
by Yuxin Fang, Bencheng Liao, Xinggang Wang, Jiemin Fang, Jiyang Qi, Rui Wu, Jianwei Niu, Wenyu Liu.
402+
by Yuxin Fang, Bencheng Liao, Xinggang Wang, Jiemin Fang, Jiyang Qi, Rui Wu, Jianwei Niu, Wenyu Liu.

composer.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
"require": {
1616
"php": "^8.1",
1717
"ext-ffi": "*",
18+
"psr/log": "^1.1.3|^2.0|^3.0",
1819
"codewithkyrian/jinja-php": "^1.0",
1920
"codewithkyrian/transformers-libsloader": "^2.0",
2021
"imagine/imagine": "^1.3",

docs/configuration.md

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,8 @@ Transformers::setup()
2222
->setRemotePathTemplate('custom/path/{model}/{file}')
2323
->setAuthToken('your-token')
2424
->setUserAgent('your-user-agent')
25-
->setImageDriver(ImageDriver::IMAGICK);
25+
->setImageDriver(ImageDriver::IMAGICK)
26+
->setLogger(new StreamLogger('transformers-php'));
2627
```
2728

2829
::: tip
@@ -105,6 +106,14 @@ Transformers::setup()
105106
->apply();
106107
```
107108

109+
### `setLogger(LoggerInterface $logger)`
110+
111+
This setting allows you to set a custom logger for TransformersPHP. No logger is set by default, but you can set a
112+
logger to debug TransformersPHP's internal behavior. The logger should implement the `Psr\Log\LoggerInterface` interface. TransformersPHP
113+
comes with a `StreamLogger` class, similar to Monolog's `StreamHandler`, which can be used to log to a stream (STDOUT, STDERR,
114+
or a file) and can be customized to log at different levels (debug, info, warning, error, critical). You can also pass in a
115+
logger that is already configured and ready to use e.g. a Laravel logger.
116+
108117
## Standalone PHP Projects
109118

110119
In a standalone PHP project, the best place to add global configuration is in your project's bootstrap or initialization

examples/bootstrap.php

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,11 @@
44

55
use Codewithkyrian\Transformers\Transformers;
66
use Codewithkyrian\Transformers\Utils\ImageDriver;
7+
use Codewithkyrian\Transformers\Utils\StreamLogger;
78

89
require_once './vendor/autoload.php';
910

1011
Transformers::setup()
1112
->setCacheDir('/Users/Kyrian/.transformers')
12-
->setImageDriver(ImageDriver::VIPS);
13+
->setImageDriver(ImageDriver::VIPS)
14+
->setLogger(new StreamLogger(STDOUT));

examples/pipelines/asr.php

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,9 +26,9 @@
2626
//$audioUrl = __DIR__ . '/../sounds/sample-1.mp3';
2727

2828
$streamer = WhisperTextStreamer::make()
29-
//->onTimestampStart(fn($timestamp) => dump($timestamp));
3029
->onStream(fn($text) => print($text));
3130

31+
3232
$output = $transcriber($audioUrl,
3333
maxNewTokens: 256,
3434
chunkLengthSecs: 24,

src/Models/Auto/PretrainedMixin.php

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
use Codewithkyrian\Transformers\Exceptions\UnsupportedModelTypeException;
99
use Codewithkyrian\Transformers\Models\ModelArchitecture;
1010
use Codewithkyrian\Transformers\Models\Pretrained\PretrainedModel;
11+
use Codewithkyrian\Transformers\Transformers;
1112
use Codewithkyrian\Transformers\Utils\AutoConfig;
1213

1314
/**
@@ -18,6 +19,7 @@ abstract class PretrainedMixin
1819
{
1920
/**
2021
* Mapping from model type to model class.
22+
*
2123
* @var array<string, array<string, string>> The model class mappings.
2224
*/
2325
const MODEL_CLASS_MAPPINGS = [];
@@ -37,15 +39,16 @@ abstract class PretrainedMixin
3739
* @param string|null $cacheDir The cache directory to save the model in.
3840
* @param string $revision The revision of the model.
3941
* @param string|null $modelFilename The filename of the model.
42+
*
4043
* @return PretrainedModel The instantiated pretrained model.
4144
*/
4245
public static function fromPretrained(
43-
string $modelNameOrPath,
44-
bool $quantized = true,
45-
?array $config = null,
46-
?string $cacheDir = null,
47-
string $revision = 'main',
48-
?string $modelFilename = null,
46+
string $modelNameOrPath,
47+
bool $quantized = true,
48+
?array $config = null,
49+
?string $cacheDir = null,
50+
string $revision = 'main',
51+
?string $modelFilename = null,
4952
?callable $onProgress = null
5053
): PretrainedModel
5154
{
@@ -54,7 +57,6 @@ public static function fromPretrained(
5457
foreach (static::MODEL_CLASS_MAPPINGS as $modelClassMapping) {
5558
$modelClass = $modelClassMapping[$config->modelType] ?? null;
5659

57-
5860
if ($modelClass === null) continue;
5961

6062
$modelArchitecture = self::getModelArchitecture($modelClass);
@@ -72,7 +74,7 @@ public static function fromPretrained(
7274
}
7375

7476
if (static::BASE_IF_FAIL) {
75-
// echo "Unknown model class for model type {$config->modelType}. Using base class PreTrainedModel.";
77+
Transformers::getLogger()->warning("Unknown model class for model type {$config->modelType}. Using base class PreTrainedModel.");
7678

7779
return PretrainedModel::fromPretrained(
7880
modelNameOrPath: $modelNameOrPath,
@@ -109,4 +111,4 @@ protected static function getModelArchitecture($modelClass): ModelArchitecture
109111
default => ModelArchitecture::EncoderOnly,
110112
};
111113
}
112-
}
114+
}

src/Models/Pretrained/PretrainedModel.php

Lines changed: 38 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
use Codewithkyrian\Transformers\Models\ModelArchitecture;
2626
use Codewithkyrian\Transformers\Models\Output\ModelOutput;
2727
use Codewithkyrian\Transformers\Tensor\Tensor;
28+
use Codewithkyrian\Transformers\Transformers;
2829
use Codewithkyrian\Transformers\Utils\AutoConfig;
2930
use Codewithkyrian\Transformers\Utils\GenerationConfig;
3031
use Codewithkyrian\Transformers\Utils\Hub;
@@ -49,9 +50,7 @@ public function __construct(
4950
public InferenceSession $session,
5051
public ModelArchitecture $modelArchitecture = ModelArchitecture::EncoderOnly,
5152
...$args
52-
)
53-
{
54-
}
53+
) {}
5554

5655

5756
/**
@@ -72,6 +71,7 @@ public function __construct(
7271
* @param string $revision The specific model version to use. It can be a branch name, a tag name,
7372
* @param string|null $modelFilename The name of the model file to load. If not provided, will default to the
7473
* @param ModelArchitecture $modelArchitecture
74+
*
7575
* @return self The model instantiated from the configuration.
7676
* @throws HubException
7777
*/
@@ -217,7 +217,7 @@ public static function fromPretrained(
217217
default:
218218
{
219219
if ($modelArchitecture != ModelArchitecture::EncoderOnly) {
220-
echo "WARNING: {$modelArchitecture->value} is not a valid model group. Defaulting to EncoderOnly.";
220+
Transformers::getLogger()?->warning("{$modelArchitecture->value} is not a valid model group. Defaulting to EncoderOnly.");
221221
}
222222

223223

@@ -251,6 +251,7 @@ public static function fromPretrained(
251251
* @param bool $fatal Whether to raise an error if the file could not be loaded.
252252
* @param callable|null $onProgress
253253
* @param mixed ...$sessionOptions
254+
*
254255
* @return InferenceSession|null
255256
* @throws HubException
256257
*/
@@ -283,7 +284,9 @@ public function __invoke(array $modelInputs): array|ModelOutput
283284
/**
284285
* Forward method for a pretrained model. If not overridden by a subclass, the correct forward method
285286
* will be chosen based on the model type.
287+
*
286288
* @param array $modelInputs The input data to the model in the format specified in the ONNX model.
289+
*
287290
* @return array{logits: Tensor, hidden_states: Tensor, attentions: Tensor} The output data from the model in the format specified in the ONNX model.
288291
*/
289292
public function forward(array $modelInputs): array
@@ -315,6 +318,7 @@ public function runSession(InferenceSession $session, array $inputs): array
315318
/**
316319
* @param InferenceSession $session
317320
* @param Tensor[] $inputs
321+
*
318322
* @return Tensor[]
319323
* @throws MissingModelInputException
320324
*/
@@ -345,20 +349,27 @@ public function validateInputs(array $inputNames, array $inputs): array
345349

346350

347351
if ($numInputsProvided > $numInputsNeeded) {
348-
// No missing inputs, but too many inputs were provided.
349-
// Warn the user and ignore the extra inputs.
352+
// No missing inputs, but too many inputs were provided so we warn the user and ignore the extra inputs.
350353
$ignored = array_diff(array_keys($inputs), $inputNames);
351-
echo 'WARNING: Too many inputs were provided (' . $numInputsProvided . ' > ' . $numInputsNeeded . ').
352-
The following inputs will be ignored: "' . implode(', ', $ignored) . '".';
354+
355+
$warning = sprintf(
356+
'Too many inputs were provided (%d > %d). The following inputs will be ignored: "%s".',
357+
$numInputsProvided,
358+
$numInputsNeeded,
359+
implode(', ', $ignored)
360+
);
361+
362+
Transformers::getLogger()->warning($warning);
353363
}
354364

355-
// return array_map(fn($i) => $i->toArray(), $inputs);
356365
return $inputs;
357366
}
358367

359368
/**
360369
* Prepares an attention mask for a sequence of tokens based on configuration options.
370+
*
361371
* @param Tensor $tokens The input tokens.
372+
*
362373
* @return Tensor The attention mask tensor.
363374
* @private
364375
*/
@@ -379,7 +390,7 @@ public function prepareAttentionMask(Tensor $tokens): Tensor
379390
if ($isPadTokenInInputs && $isPadTokenNotEqualToEosTokenId) {
380391
$mo = Tensor::mo();
381392

382-
$data = $mo->f(fn($x) => $x != $padTokenId, $tokens);
393+
$data = $mo->f(fn ($x) => $x != $padTokenId, $tokens);
383394

384395
return new Tensor($data, $tokens->dtype(), $tokens->shape());
385396
} else {
@@ -389,9 +400,11 @@ public function prepareAttentionMask(Tensor $tokens): Tensor
389400

390401
/**
391402
* Add position IDs to the feeds object.
403+
*
392404
* @param array $inputNames The names of the inputs to the model.
393405
* @param array $feeds The input to the model.
394406
* @param bool $useCacheBranch Whether to use the cache branch of the model.
407+
*
395408
* @return void
396409
*/
397410
public function preparePositionIds(array $inputNames, array &$feeds, bool $useCacheBranch): void
@@ -430,6 +443,7 @@ public function preparePositionIds(array $inputNames, array &$feeds, bool $useCa
430443
*
431444
* @param array $decoderResults The decoder results object.
432445
* @param ?array $pastKeyValues The previous past key values.
446+
*
433447
* @return array An object containing past key values.
434448
*/
435449
public function getPastKeyValues(array $decoderResults, ?array $pastKeyValues): array
@@ -458,6 +472,7 @@ public function getPastKeyValues(array $decoderResults, ?array $pastKeyValues):
458472
* Returns an object containing attentions from the given decoder results object.
459473
*
460474
* @param array $decoderResults The decoder results object.
475+
*
461476
* @return array An object containing attentions.
462477
*/
463478
public function getAttentions(array $decoderResults): array
@@ -540,11 +555,13 @@ public function addPastKeyValues(array &$decoderFeeds, ?array $pastKeyValues): v
540555
}
541556

542557
/** Generates text based on the given inputs and generation configuration using the model.
558+
*
543559
* @param Tensor $inputs The input token ids.
544560
* @param GenerationConfig|null $generationConfig The generation configuration to use. If null, default configuration will be used.
545561
* @param LogitsProcessorList|null $logitsProcessor An optional logits processor to use. If null, a new LogitsProcessorList instance will be created.
546562
* @param Tensor|null $inputsAttentionMask An optional attention mask for the inputs.
547563
* @param Streamer|null $streamer
564+
*
548565
* @return array An array of generated output sequences, where each sequence is an array of token IDs.
549566
* @throws Exception
550567
*/
@@ -615,7 +632,7 @@ public function generate(
615632

616633
$beams = $this->getStartBeams($inputs, $generationConfig, $numOutputTokens, $inputsAttentionMask);
617634

618-
while (array_some($beams, fn($beam) => !$beam['done']) && $numOutputTokens < $maxOutputTokens) {
635+
while (array_some($beams, fn ($beam) => !$beam['done']) && $numOutputTokens < $maxOutputTokens) {
619636
$newestBeams = [];
620637
foreach ($beams as $beam) {
621638
if ($beam['done']) {
@@ -676,7 +693,7 @@ public function generate(
676693
// Group and select best beams
677694
$newestBeams = array_merge(...array_map(
678695
function ($group) use ($generationConfig) {
679-
usort($group, fn($a, $b) => $b['score'] <=> $a['score']);
696+
usort($group, fn ($a, $b) => $b['score'] <=> $a['score']);
680697
return array_slice(
681698
$group,
682699
0,
@@ -702,7 +719,7 @@ function ($group) use ($generationConfig) {
702719
function ($batch) use ($key, $generationConfig) {
703720
if ($generationConfig->num_return_sequences > 1) {
704721
return array_slice(
705-
array_map(fn($beam) => $beam[$key], $batch),
722+
array_map(fn ($beam) => $beam[$key], $batch),
706723
0,
707724
$generationConfig->num_return_sequences
708725
);
@@ -752,7 +769,9 @@ function ($batch) use ($key, $generationConfig) {
752769
/**
753770
* This function merges multiple generation configs together to form a final generation config to be used by the model for text generation.
754771
* It first creates an empty `GenerationConfig` object, then it applies the model's own `generation_config` property to it. Finally, if a `generation_config` object was passed in the arguments, it overwrites the corresponding properties in the final config with those of the passed config object.
772+
*
755773
* @param ?GenerationConfig $generationConfig A `GenerationConfig` object containing generation parameters.
774+
*
756775
* @return GenerationConfig The final generation config object to be used by the model for text generation.
757776
*/
758777
protected function getGenerationConfig(?GenerationConfig $generationConfig): GenerationConfig
@@ -854,6 +873,7 @@ protected function getLogitsProcessor(
854873
* @param GenerationConfig $generationConfig The generation config.
855874
* @param int $numOutputTokens The number of tokens to generate.
856875
* @param Tensor|null $inputsAttentionMask The attention mask for the input token ids.
876+
*
857877
* @return array{ inputs: Tensor, output_token_ids: Tensor, score: float, done: bool, id: int } The initial beam for text generation.
858878
*
859879
*/
@@ -877,6 +897,7 @@ public function getStartBeams(
877897
* Runs the beam for text generation task
878898
*
879899
* @param array $beam The current beam being generated.
900+
*
880901
* @return array The updated beam after a single generation step.
881902
*
882903
*/
@@ -890,14 +911,15 @@ public function runBeam(array &$beam): array
890911
*
891912
* @param array $beam
892913
* @param array $output
914+
*
893915
* @throws Exception
894916
*/
895917
public function addAttentionsToBeam(array &$beam, array $output): void
896918
{
897919
if ($this->config->isEncoderDecoder) {
898920
if (empty($output['cross_attentions'])) {
899921
throw new Exception(
900-
"`output_attentions` is true, but the model did not produce cross-attentions. " .
922+
"`output_attentions` is true, but the model did not produce cross-attentions. ".
901923
"This is most likely because the model was not exported with `output_attentions=True`."
902924
);
903925
}
@@ -909,7 +931,7 @@ public function addAttentionsToBeam(array &$beam, array $output): void
909931

910932
if (empty($output['decoder_attentions'])) {
911933
throw new Exception(
912-
"`output_attentions` is true, but the model did not produce decoder-attentions. " .
934+
"`output_attentions` is true, but the model did not produce decoder-attentions. ".
913935
"This is most likely because the model was not exported with `output_attentions=True`."
914936
);
915937
}
@@ -935,6 +957,7 @@ public function updateBeam(array &$beam, int $newTokenId): void
935957
* Groups an array of beam objects by their ids.
936958
*
937959
* @param array $beams The array of beam objects to group.
960+
*
938961
* @return array An array of arrays, where each inner array contains beam objects with the same id.
939962
*/
940963
public function groupBeams(array $beams): array

src/Models/Pretrained/VisionEncoderDecoderModel.php

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
use Codewithkyrian\Transformers\Models\Auto\AutoModel;
1010
use Codewithkyrian\Transformers\Models\Auto\AutoModelForCausalLM;
1111
use Codewithkyrian\Transformers\Models\ModelArchitecture;
12+
use Codewithkyrian\Transformers\Transformers;
1213
use Codewithkyrian\Transformers\Utils\AutoConfig;
1314
use Codewithkyrian\Transformers\Utils\GenerationConfig;
1415
use Codewithkyrian\Transformers\Utils\InferenceSession;
@@ -32,6 +33,7 @@ class VisionEncoderDecoderModel extends PretrainedModel
3233

3334
/**
3435
* Creates a new instance of the `VisionEncoderDecoderModel` class.
36+
*
3537
* @param AutoConfig $config The configuration array specifying the hyperparameters and other model settings.
3638
* @param mixed $session The ONNX session containing the encoder model.
3739
* @param InferenceSession $decoderMergedSession The ONNX session containing the merged decoder model.
@@ -60,7 +62,7 @@ public function __construct(
6062
?? AutoModel::ENCODER_DECODER_MODEL_MAPPING[$encoderModelType];
6163

6264
if (!$encoderModel) {
63-
echo "Model type for encoder '{$encoderModelType}' not found, assuming encoder-only architecture. Please report this at https://github.com/CodeWithKyrian/transformers-php/issues/new/choose.";
65+
Transformers::getLogger()?->warning("Model type for encoder '{$encoderModelType}' not found, assuming encoder-only architecture. Please report this at https://github.com/CodeWithKyrian/transformers-php/issues/new/choose.");
6466
}
6567

6668
// Validate decoder

0 commit comments

Comments
 (0)