Skip to content

Commit aeca1e6

Browse files
committed
firs skeleton for llamacloud parser
1 parent 04b269e commit aeca1e6

File tree

1 file changed

+143
-0
lines changed

1 file changed

+143
-0
lines changed
Lines changed: 143 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,143 @@
1+
using System;
2+
using System.IO;
3+
using System.Net.Http;
4+
using System.Net.Http.Headers;
5+
using System.Threading.Tasks;
6+
using Microsoft.Extensions.Logging;
7+
using Microsoft.KernelMemory.Diagnostics;
8+
9+
public class LLamaCloudParserClient
10+
{
11+
private readonly HttpClient _httpClient;
12+
private readonly ILogger<LLamaCloudParserClient> _log;
13+
private readonly string _apiKey;
14+
private readonly string _baseUrl;
15+
16+
public LLamaCloudParserClient(
17+
CloudParserConfiguration config,
18+
HttpClient httpClient,
19+
ILogger<LLamaCloudParserClient>? log = null)
20+
{
21+
if (String.IsNullOrEmpty(config.ApiKey))
22+
{
23+
throw new ArgumentException("ApiKey is required", nameof(config.ApiKey));
24+
}
25+
26+
this._httpClient = httpClient;
27+
_log = log ?? DefaultLogger<LLamaCloudParserClient>.Instance;
28+
_apiKey = config.ApiKey;
29+
_baseUrl = config.BaseUrl!;
30+
}
31+
32+
public async Task<string> UploadAsync(
33+
Stream fileContent,
34+
string fileName,
35+
UploadParameters? parameters = null)
36+
{
37+
var requestUri = $"{_baseUrl.TrimEnd('/')}/api/v1/parsing/upload";
38+
using var request = new HttpRequestMessage(HttpMethod.Post, requestUri);
39+
40+
request.Headers.Accept.Add(new MediaTypeWithQualityHeaderValue("application/json"));
41+
request.Headers.Authorization = new AuthenticationHeaderValue("Bearer", _apiKey);
42+
43+
var multipartContent = new MultipartFormDataContent();
44+
var streamContent = new StreamContent(fileContent);
45+
multipartContent.Add(streamContent, "file", fileName);
46+
47+
if (parameters != null)
48+
{
49+
foreach (var prop in typeof(UploadParameters).GetProperties())
50+
{
51+
var value = prop.GetValue(parameters);
52+
if (value != null)
53+
{
54+
if (value is bool boolValue)
55+
{
56+
multipartContent.Add(new StringContent(boolValue.ToString().ToLower()), prop.Name);
57+
}
58+
else if (value is string[] arrayValue)
59+
{
60+
multipartContent.Add(new StringContent(string.Join(",", arrayValue)), prop.Name);
61+
}
62+
else
63+
{
64+
multipartContent.Add(new StringContent(value.ToString()!), prop.Name);
65+
}
66+
}
67+
}
68+
}
69+
70+
request.Content = multipartContent;
71+
72+
using var response = await _httpClient.SendAsync(request);
73+
response.EnsureSuccessStatusCode();
74+
75+
return await response.Content.ReadAsStringAsync();
76+
}
77+
}
78+
79+
public class CloudParserConfiguration
80+
{
81+
public string? ApiKey { get; internal set; }
82+
public string? BaseUrl { get; internal set; }
83+
}
84+
85+
public class UploadParameters
86+
{
87+
public string? ProjectId { get; set; }
88+
public string? OrganizationId { get; set; }
89+
public bool AnnotateLinks { get; set; }
90+
public bool AutoMode { get; set; }
91+
public bool AutoModeTriggerOnImageInPage { get; set; }
92+
public bool AutoModeTriggerOnTableInPage { get; set; }
93+
public string? AutoModeTriggerOnTextInPage { get; set; }
94+
public string? AutoModeTriggerOnRegexpInPage { get; set; }
95+
public string? AzureOpenAiApiVersion { get; set; }
96+
public string? AzureOpenAiDeploymentName { get; set; }
97+
public string? AzureOpenAiEndpoint { get; set; }
98+
public string? AzureOpenAiKey { get; set; }
99+
public float? BboxBottom { get; set; }
100+
public float? BboxLeft { get; set; }
101+
public float? BboxRight { get; set; }
102+
public float? BboxTop { get; set; }
103+
public bool ContinuousMode { get; set; }
104+
public bool DisableOcr { get; set; }
105+
public bool DisableReconstruction { get; set; }
106+
public bool DisableImageExtraction { get; set; }
107+
public bool DoNotCache { get; set; }
108+
public bool DoNotUnrollColumns { get; set; }
109+
public bool ExtractCharts { get; set; }
110+
public bool FastMode { get; set; }
111+
public bool GuessXlsxSheetName { get; set; }
112+
public bool HtmlMakeAllElementsVisible { get; set; }
113+
public bool HtmlRemoveFixedElements { get; set; }
114+
public bool HtmlRemoveNavigationElements { get; set; }
115+
public string? HttpProxy { get; set; }
116+
public string? InputS3Path { get; set; }
117+
public string? InputUrl { get; set; }
118+
public bool InvalidateCache { get; set; }
119+
public bool IsFormattingInstruction { get; set; } = true;
120+
public string[]? Language { get; set; } = new[] { "en" };
121+
public bool ExtractLayout { get; set; }
122+
public object? MaxPages { get; set; }
123+
public bool OutputPdfOfDocument { get; set; }
124+
public string? OutputS3PathPrefix { get; set; }
125+
public string? PagePrefix { get; set; }
126+
public string? PageSeparator { get; set; }
127+
public string? PageSuffix { get; set; }
128+
public string? ParsingInstruction { get; set; }
129+
public bool PremiumMode { get; set; }
130+
public bool SkipDiagonalText { get; set; }
131+
public bool StructuredOutput { get; set; }
132+
public string? StructuredOutputJsonSchema { get; set; }
133+
public string? StructuredOutputJsonSchemaName { get; set; }
134+
public bool TakeScreenshot { get; set; }
135+
public string? TargetPages { get; set; }
136+
public bool UseVendorMultimodalModel { get; set; }
137+
public string? VendorMultimodalApiKey { get; set; }
138+
public string? VendorMultimodalModelName { get; set; }
139+
public string? WebhookUrl { get; set; }
140+
public string? BoundingBox { get; set; }
141+
public bool Gpt4OMode { get; set; }
142+
public string? Gpt4OApiKey { get; set; }
143+
}

0 commit comments

Comments
 (0)