Skip to content

Commit d21bee6

Browse files
committed
Added docs to HTTP & LLM packages
1 parent 5319422 commit d21bee6

17 files changed

+285
-34
lines changed

src-polyglot/Embeddings/Contracts/CanVectorize.php

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,19 @@
33

44
use Cognesy\Polyglot\Embeddings\EmbeddingsResponse;
55

6+
/**
7+
* Interface CanVectorize
8+
*
9+
* Defines the contract for embedding generation services
10+
*/
611
interface CanVectorize
712
{
13+
/**
14+
* Generate embeddings for the input
15+
*
16+
* @param array $input
17+
* @param array $options
18+
* @return EmbeddingsResponse
19+
*/
820
public function vectorize(array $input, array $options = []) : EmbeddingsResponse;
921
}

src-polyglot/Embeddings/Data/Vector.php

Lines changed: 22 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,11 @@
22

33
namespace Cognesy\Polyglot\Embeddings\Data;
44

5+
/**
6+
* Class Vector
7+
*
8+
* Represents an embedding - vector of floating point values
9+
*/
510
class Vector
611
{
712
public const METRIC_COSINE = 'cosine';
@@ -14,15 +19,28 @@ public function __construct(
1419
private int|string $id = 0,
1520
) {}
1621

17-
/** @return float[] */
22+
/**
23+
* Get the vector values
24+
* @return float[]
25+
*/
1826
public function values() : array {
1927
return $this->values;
2028
}
2129

30+
/**
31+
* Get the vector ID
32+
* @return int|string
33+
*/
2234
public function id() : int|string {
2335
return $this->id;
2436
}
2537

38+
/**
39+
* Compare this vector to another vector using a metric
40+
* @param Vector $vector
41+
* @param string $metric
42+
* @return float
43+
*/
2644
public function compareTo(Vector $vector, string $metric) : float {
2745
return match ($metric) {
2846
self::METRIC_COSINE => self::cosineSimilarity($this->values, $vector->values),
@@ -33,6 +51,7 @@ public function compareTo(Vector $vector, string $metric) : float {
3351
}
3452

3553
/**
54+
* Calculate the cosine similarity between two vectors
3655
* @param float[] $v1
3756
* @param float[] $v2
3857
*/
@@ -51,17 +70,8 @@ public static function cosineSimilarity(array $v1, array $v2) : float {
5170
return $dotProduct / ($magnitudeV1 * $magnitudeV2);
5271
}
5372

54-
// private function cosineSimilarity(array $vec1, array $vec2) : float {
55-
// $dotProduct = array_sum(array_map(fn($a, $b) => $a * $b, $vec1, $vec2));
56-
// $magnitude1 = sqrt(array_sum(array_map(fn($a) => $a * $a, $vec1)));
57-
// $magnitude2 = sqrt(array_sum(array_map(fn($b) => $b * $b, $vec2)));
58-
// if ($magnitude1 * $magnitude2 == 0) {
59-
// return 0;
60-
// }
61-
// return $dotProduct / ($magnitude1 * $magnitude2);
62-
// }
63-
6473
/**
74+
* Calculate the Euclidean distance between two vectors
6575
* @param float[] $v1
6676
* @param float[] $v2
6777
*/
@@ -75,6 +85,7 @@ public static function euclideanDistance(array $v1, array $v2) : float {
7585
}
7686

7787
/**
88+
* Calculate the dot product between two vectors
7889
* @param float[] $v1
7990
* @param float[] $v2
8091
*/

src-polyglot/Embeddings/Embeddings.php

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,9 @@
1717
use Cognesy\Utils\Settings;
1818
use InvalidArgumentException;
1919

20+
/**
21+
* Embeddings is a facade responsible for generating embeddings for provided input data
22+
*/
2023
class Embeddings
2124
{
2225
use HasFinders;
@@ -43,34 +46,65 @@ public function __construct(
4346

4447
// PUBLIC ///////////////////////////////////////////////////
4548

49+
/**
50+
* Configures the Embeddings instance with the given connection name.
51+
* @param string $connection
52+
* @return $this
53+
*/
4654
public function withConnection(string $connection) : self {
4755
$this->config = EmbeddingsConfig::load($connection);
4856
$this->driver = $this->getDriver($this->config, $this->httpClient);
4957
return $this;
5058
}
5159

60+
/**
61+
* Configures the Embeddings instance with the given configuration.
62+
* @param EmbeddingsConfig $config
63+
* @return $this
64+
*/
5265
public function withConfig(EmbeddingsConfig $config) : self {
5366
$this->config = $config;
5467
$this->driver = $this->getDriver($this->config, $this->httpClient);
5568
return $this;
5669
}
5770

71+
/**
72+
* Configures the Embeddings instance with the given model name.
73+
* @param string $model
74+
* @return $this
75+
*/
5876
public function withModel(string $model) : self {
5977
$this->config->model = $model;
6078
return $this;
6179
}
6280

81+
/**
82+
* Configures the Embeddings instance with the given HTTP client.
83+
* @param CanHandleHttp $httpClient
84+
* @return $this
85+
*/
6386
public function withHttpClient(CanHandleHttp $httpClient) : self {
6487
$this->httpClient = $httpClient;
6588
$this->driver = $this->getDriver($this->config, $this->httpClient);
6689
return $this;
6790
}
6891

92+
/**
93+
* Configures the Embeddings instance with the given driver.
94+
* @param CanVectorize $driver
95+
* @return $this
96+
*/
6997
public function withDriver(CanVectorize $driver) : self {
7098
$this->driver = $driver;
7199
return $this;
72100
}
73101

102+
/**
103+
* Generates embeddings for the provided input data.
104+
* @param string|array $input
105+
* @param array $options
106+
* @return EmbeddingsResponse
107+
*/
74108
public function create(string|array $input, array $options = []) : EmbeddingsResponse {
75109
if (is_string($input)) {
76110
$input = [$input];
@@ -83,6 +117,12 @@ public function create(string|array $input, array $options = []) : EmbeddingsRes
83117

84118
// INTERNAL /////////////////////////////////////////////////
85119

120+
/**
121+
* Returns the driver for the specified configuration.
122+
* @param EmbeddingsConfig $config
123+
* @param CanHandleHttp $httpClient
124+
* @return CanVectorize
125+
*/
86126
protected function getDriver(EmbeddingsConfig $config, CanHandleHttp $httpClient) : CanVectorize {
87127
return match ($config->providerType) {
88128
LLMProviderType::Azure->value => new AzureOpenAIDriver($config, $httpClient, $this->events),

src-polyglot/Embeddings/EmbeddingsResponse.php

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,9 @@
55
use Cognesy\Polyglot\Embeddings\Data\Vector;
66
use Cognesy\Polyglot\LLM\Data\Usage;
77

8+
/**
9+
* EmbeddingsResponse represents the response from an embeddings request
10+
*/
811
class EmbeddingsResponse
912
{
1013
public function __construct(
@@ -13,23 +16,40 @@ public function __construct(
1316
public ?Usage $usage,
1417
) {}
1518

19+
/**
20+
* Get the first vector
21+
* @return Vector
22+
*/
1623
public function first() : Vector {
1724
return $this->vectors[0];
1825
}
1926

27+
/**
28+
* Get the last vector
29+
* @return Vector
30+
*/
2031
public function last() : Vector {
2132
return $this->vectors[count($this->vectors) - 1];
2233
}
2334

35+
/**
36+
* Get all vectors
37+
* @return Vector[]
38+
*/
2439
public function all() : array {
2540
return $this->vectors;
2641
}
2742

43+
/**
44+
* Get the number of vectors
45+
* @return Usage
46+
*/
2847
public function usage() : Usage {
2948
return $this->usage;
3049
}
3150

3251
/**
52+
* Split the vectors into two EmbeddingsResponse objects
3353
* @param int $index
3454
* @return EmbeddingsResponse[]
3555
*/
@@ -46,13 +66,21 @@ public function split(int $index) : array {
4666
];
4767
}
4868

69+
/**
70+
* Get the values of all vectors
71+
* @return array
72+
*/
4973
public function toValuesArray() : array {
5074
return array_map(
5175
fn(Vector $vector) => $vector->values(),
5276
$this->vectors
5377
);
5478
}
5579

80+
/**
81+
* Get the total number of tokens
82+
* @return int
83+
*/
5684
public function totalTokens() : int {
5785
return $this->usage()->total();
5886
}

src-polyglot/Embeddings/Traits/HasFinders.php

Lines changed: 19 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,20 @@
33

44
use Cognesy\Polyglot\Embeddings\Data\Vector;
55

6+
/**
7+
* Trait HasFinders
8+
*
9+
* Provides methods for finding similar documents
10+
*/
611
trait HasFinders
712
{
13+
/**
14+
* Find the most similar documents to the query
15+
* @param string $query
16+
* @param array $documents
17+
* @param int $topK
18+
* @return array
19+
*/
820
public function findSimilar(string $query, array $documents, int $topK = 5) : array {
921
// generate embeddings for query and documents (in a single request)
1022
[$queryVector, $docVectors] = $this->create(array_merge([$query], $documents))->split(1);
@@ -20,6 +32,13 @@ public function findSimilar(string $query, array $documents, int $topK = 5) : ar
2032
], array_keys($matches));
2133
}
2234

35+
/**
36+
* Find the top K most similar documents to the query vector
37+
* @param array $queryVector
38+
* @param array $documentVectors
39+
* @param int $n
40+
* @return array
41+
*/
2342
public static function findTopK(array $queryVector, array $documentVectors, int $n = 5) : array {
2443
$similarity = [];
2544
foreach ($documentVectors as $i => $vector) {
@@ -29,11 +48,3 @@ public static function findTopK(array $queryVector, array $documentVectors, int
2948
return array_slice($similarity, 0, $n, true);
3049
}
3150
}
32-
33-
//if ($this->clientType !== ClientType::Jina && $this->model === 'jina-colbert-v2') {
34-
// $docVectors = $this->make($documents, ['options' => ['input_type' => 'document']]);
35-
// $queryVector = $this->make($query, ['options' => ['input_type' => 'query']]);
36-
//} else {
37-
// $docVectors = $this->make($documents);
38-
// $queryVector = $this->make($query);
39-
//}

src-polyglot/Http/Adapters/LaravelResponseAdapter.php renamed to src-polyglot/Http/Adapters/LaravelHttpResponse.php

Lines changed: 27 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,28 +6,54 @@
66
use Generator;
77
use Illuminate\Http\Client\Response;
88

9-
class LaravelResponseAdapter implements HttpClientResponse
9+
/**
10+
* Class LaravelHttpResponse
11+
*
12+
* Implements HttpClientResponse contract for Laravel HTTP client
13+
*/
14+
class LaravelHttpResponse implements HttpClientResponse
1015
{
1116
public function __construct(
1217
private Response $response,
1318
private bool $streaming = false
1419
) {}
1520

21+
/**
22+
* Get the response status code
23+
*
24+
* @return int
25+
*/
1626
public function getStatusCode(): int
1727
{
1828
return $this->response->status();
1929
}
2030

31+
/**
32+
* Get the response headers
33+
*
34+
* @return array
35+
*/
2136
public function getHeaders(): array
2237
{
2338
return $this->response->headers();
2439
}
2540

41+
/**
42+
* Get the response content
43+
*
44+
* @return string
45+
*/
2646
public function getContents(): string
2747
{
2848
return $this->response->body();
2949
}
3050

51+
/**
52+
* Read chunks of the stream
53+
*
54+
* @param int $chunkSize
55+
* @return Generator<string>
56+
*/
3157
public function streamContents(int $chunkSize = 1): Generator
3258
{
3359
if (!$this->streaming) {

0 commit comments

Comments
 (0)