Skip to content

Commit 980a880

Browse files
committed
refactor: consolidates down to a single File class
1 parent 4c25a86 commit 980a880

File tree

10 files changed

+440
-559
lines changed

10 files changed

+440
-559
lines changed

src/Files/DTO/File.php

Lines changed: 377 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,377 @@
1+
<?php
2+
3+
declare(strict_types=1);
4+
5+
namespace WordPress\AiClient\Files\DTO;
6+
7+
use WordPress\AiClient\Common\Contracts\WithJsonSchemaInterface;
8+
use WordPress\AiClient\Files\Contracts\FileInterface;
9+
use WordPress\AiClient\Files\Enums\FileTypeEnum;
10+
use WordPress\AiClient\Files\ValueObjects\MimeType;
11+
12+
/**
13+
* Represents a file in the AI client.
14+
*
15+
* This DTO automatically detects whether a file is a URL, base64 data, or local file path
16+
* and handles them appropriately.
17+
*
18+
* @since n.e.x.t
19+
*/
20+
class File implements FileInterface, WithJsonSchemaInterface
21+
{
22+
/**
23+
* @var MimeType The MIME type of the file.
24+
*/
25+
private MimeType $mimeType;
26+
27+
/**
28+
* @var FileTypeEnum The type of file storage.
29+
*/
30+
private FileTypeEnum $fileType;
31+
32+
/**
33+
* @var string The file data (URL for remote, base64 for inline).
34+
*/
35+
private string $data;
36+
37+
/**
38+
* Constructor.
39+
*
40+
* @since n.e.x.t
41+
*
42+
* @param string $file The file string (URL, base64 data, or local path).
43+
* @param string|null $mimeType The MIME type of the file (optional).
44+
* @throws \InvalidArgumentException If the file format is invalid or MIME type cannot be determined.
45+
*/
46+
public function __construct(string $file, ?string $mimeType = null)
47+
{
48+
// Detect and process the file type (will set MIME type if possible)
49+
$this->detectAndProcessFile($file, $mimeType);
50+
}
51+
52+
/**
53+
* Detects the file type and processes it accordingly.
54+
*
55+
* @since n.e.x.t
56+
*
57+
* @param string $file The file string to process.
58+
* @param string|null $providedMimeType The explicitly provided MIME type.
59+
* @throws \InvalidArgumentException If the file format is invalid or MIME type cannot be determined.
60+
*/
61+
private function detectAndProcessFile(string $file, ?string $providedMimeType): void
62+
{
63+
// Check if it's a URL
64+
if ($this->isUrl($file)) {
65+
$this->fileType = FileTypeEnum::remote();
66+
$this->data = $file;
67+
$this->mimeType = $this->determineMimeType($providedMimeType, null, $file);
68+
return;
69+
}
70+
71+
// Check if it's a data URI
72+
$dataUriPattern = '/^data:(?:([a-zA-Z0-9][a-zA-Z0-9!#$&\-\^_+.]*\/[a-zA-Z0-9][a-zA-Z0-9!#$&\-\^_+.]*'
73+
. '(?:;[a-zA-Z0-9\-]+=[a-zA-Z0-9\-]+)*)?;)?base64,([A-Za-z0-9+\/]*={0,2})$/';
74+
75+
if (preg_match($dataUriPattern, $file, $matches)) {
76+
$this->fileType = FileTypeEnum::inline();
77+
$this->data = $matches[2]; // Extract just the base64 data
78+
$extractedMimeType = empty($matches[1]) ? null : $matches[1];
79+
$this->mimeType = $this->determineMimeType($providedMimeType, $extractedMimeType, null);
80+
return;
81+
}
82+
83+
// Check if it's plain base64
84+
if (preg_match('/^[A-Za-z0-9+\/]*={0,2}$/', $file)) {
85+
if ($providedMimeType === null) {
86+
throw new \InvalidArgumentException(
87+
'MIME type is required when providing plain base64 data without data URI format.'
88+
);
89+
}
90+
$this->fileType = FileTypeEnum::inline();
91+
$this->data = $file;
92+
$this->mimeType = new MimeType($providedMimeType);
93+
return;
94+
}
95+
96+
// If none of the above, assume it's a local file path
97+
if (file_exists($file)) {
98+
$this->fileType = FileTypeEnum::inline();
99+
$this->data = $this->convertFileToBase64($file);
100+
$this->mimeType = $this->determineMimeType($providedMimeType, null, $file);
101+
return;
102+
}
103+
104+
throw new \InvalidArgumentException(
105+
'Invalid file provided. Expected URL, base64 data, or valid local file path.'
106+
);
107+
}
108+
109+
/**
110+
* Checks if a string is a valid URL.
111+
*
112+
* @since n.e.x.t
113+
*
114+
* @param string $string The string to check.
115+
* @return bool True if the string is a URL.
116+
*/
117+
private function isUrl(string $string): bool
118+
{
119+
return filter_var($string, FILTER_VALIDATE_URL) !== false
120+
&& preg_match('/^https?:\/\//i', $string);
121+
}
122+
123+
/**
124+
* Converts a local file to base64.
125+
*
126+
* @since n.e.x.t
127+
*
128+
* @param string $filePath The path to the local file.
129+
* @return string The base64-encoded file data.
130+
* @throws \RuntimeException If the file cannot be read.
131+
*/
132+
private function convertFileToBase64(string $filePath): string
133+
{
134+
$fileContent = @file_get_contents($filePath);
135+
136+
if ($fileContent === false) {
137+
throw new \RuntimeException(
138+
sprintf('Unable to read file: %s', $filePath)
139+
);
140+
}
141+
142+
return base64_encode($fileContent);
143+
}
144+
145+
/**
146+
* Gets the file type.
147+
*
148+
* @since n.e.x.t
149+
*
150+
* @return FileTypeEnum The file type.
151+
*/
152+
public function getFileType(): FileTypeEnum
153+
{
154+
return $this->fileType;
155+
}
156+
157+
/**
158+
* Gets the URL for remote files.
159+
*
160+
* @since n.e.x.t
161+
*
162+
* @return string The URL.
163+
* @throws \RuntimeException If the file is not remote.
164+
*/
165+
public function getUrl(): string
166+
{
167+
if (!$this->fileType->isRemote()) {
168+
throw new \RuntimeException('Cannot get URL for non-remote file.');
169+
}
170+
171+
return $this->data;
172+
}
173+
174+
/**
175+
* Gets the base64-encoded data for inline files.
176+
*
177+
* @since n.e.x.t
178+
*
179+
* @return string The plain base64-encoded data (without data URI prefix).
180+
* @throws \RuntimeException If the file is not inline.
181+
*/
182+
public function getBase64Data(): string
183+
{
184+
if (!$this->fileType->isInline()) {
185+
throw new \RuntimeException('Cannot get base64 data for non-inline file.');
186+
}
187+
188+
return $this->data;
189+
}
190+
191+
/**
192+
* Gets the data as a data URL for inline files.
193+
*
194+
* @since n.e.x.t
195+
*
196+
* @return string The data URL in format: data:[mimeType];base64,[data].
197+
* @throws \RuntimeException If the file is not inline.
198+
*/
199+
public function getDataUrl(): string
200+
{
201+
if (!$this->fileType->isInline()) {
202+
throw new \RuntimeException('Cannot get data URL for non-inline file.');
203+
}
204+
205+
return sprintf('data:%s;base64,%s', $this->getMimeType(), $this->data);
206+
}
207+
208+
/**
209+
* Gets the MIME type of the file as a string.
210+
*
211+
* @since n.e.x.t
212+
*
213+
* @return string The MIME type string value.
214+
*/
215+
public function getMimeType(): string
216+
{
217+
return (string) $this->mimeType;
218+
}
219+
220+
/**
221+
* Gets the MIME type object.
222+
*
223+
* @since n.e.x.t
224+
*
225+
* @return MimeType The MIME type object.
226+
*/
227+
public function getMimeTypeObject(): MimeType
228+
{
229+
return $this->mimeType;
230+
}
231+
232+
/**
233+
* Checks if the file is a video.
234+
*
235+
* @since n.e.x.t
236+
*
237+
* @return bool True if the file is a video.
238+
*/
239+
public function isVideo(): bool
240+
{
241+
return $this->mimeType->isVideo();
242+
}
243+
244+
/**
245+
* Checks if the file is an image.
246+
*
247+
* @since n.e.x.t
248+
*
249+
* @return bool True if the file is an image.
250+
*/
251+
public function isImage(): bool
252+
{
253+
return $this->mimeType->isImage();
254+
}
255+
256+
/**
257+
* Checks if the file is audio.
258+
*
259+
* @since n.e.x.t
260+
*
261+
* @return bool True if the file is audio.
262+
*/
263+
public function isAudio(): bool
264+
{
265+
return $this->mimeType->isAudio();
266+
}
267+
268+
/**
269+
* Checks if the file is text.
270+
*
271+
* @since n.e.x.t
272+
*
273+
* @return bool True if the file is text.
274+
*/
275+
public function isText(): bool
276+
{
277+
return $this->mimeType->isText();
278+
}
279+
280+
/**
281+
* Determines the MIME type from various sources.
282+
*
283+
* @since n.e.x.t
284+
*
285+
* @param string|null $providedMimeType The explicitly provided MIME type.
286+
* @param string|null $extractedMimeType The MIME type extracted from data URI.
287+
* @param string|null $pathOrUrl The file path or URL to extract extension from.
288+
* @return MimeType The determined MIME type.
289+
* @throws \InvalidArgumentException If MIME type cannot be determined.
290+
*/
291+
private function determineMimeType(
292+
?string $providedMimeType,
293+
?string $extractedMimeType,
294+
?string $pathOrUrl
295+
): MimeType {
296+
// Prefer explicitly provided MIME type
297+
if ($providedMimeType !== null) {
298+
return new MimeType($providedMimeType);
299+
}
300+
301+
// Use extracted MIME type from data URI
302+
if ($extractedMimeType !== null) {
303+
return new MimeType($extractedMimeType);
304+
}
305+
306+
// Try to determine from file extension
307+
if ($pathOrUrl !== null) {
308+
$parsedUrl = parse_url($pathOrUrl);
309+
$path = $parsedUrl['path'] ?? $pathOrUrl;
310+
311+
// Remove query string and fragment if present
312+
$cleanPath = strtok($path, '?#');
313+
if ($cleanPath === false) {
314+
$cleanPath = $path;
315+
}
316+
317+
$extension = pathinfo($cleanPath, PATHINFO_EXTENSION);
318+
if (!empty($extension)) {
319+
try {
320+
return MimeType::fromExtension($extension);
321+
} catch (\InvalidArgumentException $e) {
322+
// Extension not recognized, continue to error
323+
unset($e);
324+
}
325+
}
326+
}
327+
328+
throw new \InvalidArgumentException(
329+
'Unable to determine MIME type. Please provide it explicitly.'
330+
);
331+
}
332+
333+
/**
334+
* {@inheritDoc}
335+
*
336+
* @since n.e.x.t
337+
*/
338+
public static function getJsonSchema(): array
339+
{
340+
return [
341+
'type' => 'object',
342+
'oneOf' => [
343+
[
344+
'properties' => [
345+
'mimeType' => [
346+
'type' => 'string',
347+
'description' => 'The MIME type of the file.',
348+
'pattern' => '^[a-zA-Z0-9][a-zA-Z0-9!#$&\\-\\^_+.]*\\/[a-zA-Z0-9]'
349+
. '[a-zA-Z0-9!#$&\\-\\^_+.]*$',
350+
],
351+
'url' => [
352+
'type' => 'string',
353+
'format' => 'uri',
354+
'description' => 'The URL to the remote file.',
355+
],
356+
],
357+
'required' => ['mimeType', 'url'],
358+
],
359+
[
360+
'properties' => [
361+
'mimeType' => [
362+
'type' => 'string',
363+
'description' => 'The MIME type of the file.',
364+
'pattern' => '^[a-zA-Z0-9][a-zA-Z0-9!#$&\\-\\^_+.]*\\/[a-zA-Z0-9]'
365+
. '[a-zA-Z0-9!#$&\\-\\^_+.]*$',
366+
],
367+
'base64Data' => [
368+
'type' => 'string',
369+
'description' => 'The base64-encoded file data.',
370+
],
371+
],
372+
'required' => ['mimeType', 'base64Data'],
373+
],
374+
],
375+
];
376+
}
377+
}

0 commit comments

Comments
 (0)