Skip to content

Commit 5fc1209

Browse files
committed
firs skeleton for llamacloud parser
1 parent 04b269e commit 5fc1209

File tree

1 file changed

+174
-0
lines changed

1 file changed

+174
-0
lines changed
Lines changed: 174 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,174 @@
1+
using System;
2+
using System.IO;
3+
using System.Net.Http;
4+
using System.Net.Http.Headers;
5+
using System.Threading.Tasks;
6+
using Microsoft.Extensions.Logging;
7+
using Microsoft.KernelMemory.Diagnostics;
8+
using System.Text.Json;
9+
using System.Text.Json.Serialization;
10+
11+
/// <summary>
12+
/// As for https://docs.cloud.llamaindex.ai/API/upload-file-api-v-1-parsing-upload-post
13+
/// </summary>
14+
public class LLamaCloudParserClient
15+
{
16+
private readonly HttpClient _httpClient;
17+
private readonly ILogger<LLamaCloudParserClient> _log;
18+
private readonly string _apiKey;
19+
private readonly string _baseUrl;
20+
21+
public LLamaCloudParserClient(
22+
CloudParserConfiguration config,
23+
HttpClient httpClient,
24+
ILogger<LLamaCloudParserClient>? log = null)
25+
{
26+
if (String.IsNullOrEmpty(config.ApiKey))
27+
{
28+
throw new ArgumentException("ApiKey is required", nameof(config.ApiKey));
29+
}
30+
31+
this._httpClient = httpClient;
32+
_log = log ?? DefaultLogger<LLamaCloudParserClient>.Instance;
33+
_apiKey = config.ApiKey;
34+
_baseUrl = config.BaseUrl!;
35+
}
36+
37+
public async Task<UploadResponse> UploadAsync(
38+
Stream fileContent,
39+
string fileName,
40+
UploadParameters? parameters = null)
41+
{
42+
var requestUri = $"{_baseUrl.TrimEnd('/')}/api/v1/parsing/upload";
43+
using var request = new HttpRequestMessage(HttpMethod.Post, requestUri);
44+
45+
request.Headers.Accept.Add(new MediaTypeWithQualityHeaderValue("application/json"));
46+
request.Headers.Authorization = new AuthenticationHeaderValue("Bearer", _apiKey);
47+
48+
var multipartContent = new MultipartFormDataContent();
49+
var streamContent = new StreamContent(fileContent);
50+
multipartContent.Add(streamContent, "file", fileName);
51+
52+
if (parameters != null)
53+
{
54+
foreach (var prop in typeof(UploadParameters).GetProperties())
55+
{
56+
var value = prop.GetValue(parameters);
57+
if (value != null)
58+
{
59+
if (value is bool boolValue)
60+
{
61+
multipartContent.Add(new StringContent(boolValue.ToString().ToLower()), prop.Name);
62+
}
63+
else if (value is string[] arrayValue)
64+
{
65+
multipartContent.Add(new StringContent(string.Join(",", arrayValue)), prop.Name);
66+
}
67+
else
68+
{
69+
multipartContent.Add(new StringContent(value.ToString()!), prop.Name);
70+
}
71+
}
72+
}
73+
}
74+
75+
request.Content = multipartContent;
76+
77+
using var response = await _httpClient.SendAsync(request);
78+
response.EnsureSuccessStatusCode();
79+
80+
var jsonResponse = await response.Content.ReadAsStringAsync();
81+
return JsonSerializer.Deserialize<UploadResponse>(jsonResponse)
82+
?? throw new InvalidOperationException("Failed to parse response");
83+
}
84+
}
85+
86+
public class CloudParserConfiguration
87+
{
88+
public string? ApiKey { get; internal set; }
89+
public string? BaseUrl { get; internal set; }
90+
}
91+
92+
public class UploadParameters
93+
{
94+
public string? ProjectId { get; set; }
95+
public string? OrganizationId { get; set; }
96+
public bool AnnotateLinks { get; set; }
97+
public bool AutoMode { get; set; }
98+
public bool AutoModeTriggerOnImageInPage { get; set; }
99+
public bool AutoModeTriggerOnTableInPage { get; set; }
100+
public string? AutoModeTriggerOnTextInPage { get; set; }
101+
public string? AutoModeTriggerOnRegexpInPage { get; set; }
102+
public string? AzureOpenAiApiVersion { get; set; }
103+
public string? AzureOpenAiDeploymentName { get; set; }
104+
public string? AzureOpenAiEndpoint { get; set; }
105+
public string? AzureOpenAiKey { get; set; }
106+
public float? BboxBottom { get; set; }
107+
public float? BboxLeft { get; set; }
108+
public float? BboxRight { get; set; }
109+
public float? BboxTop { get; set; }
110+
public bool ContinuousMode { get; set; }
111+
public bool DisableOcr { get; set; }
112+
public bool DisableReconstruction { get; set; }
113+
public bool DisableImageExtraction { get; set; }
114+
public bool DoNotCache { get; set; }
115+
public bool DoNotUnrollColumns { get; set; }
116+
public bool ExtractCharts { get; set; }
117+
public bool FastMode { get; set; }
118+
public bool GuessXlsxSheetName { get; set; }
119+
public bool HtmlMakeAllElementsVisible { get; set; }
120+
public bool HtmlRemoveFixedElements { get; set; }
121+
public bool HtmlRemoveNavigationElements { get; set; }
122+
public string? HttpProxy { get; set; }
123+
public string? InputS3Path { get; set; }
124+
public string? InputUrl { get; set; }
125+
public bool InvalidateCache { get; set; }
126+
public bool IsFormattingInstruction { get; set; } = true;
127+
public string[]? Language { get; set; } = new[] { "en" };
128+
public bool ExtractLayout { get; set; }
129+
public object? MaxPages { get; set; }
130+
public bool OutputPdfOfDocument { get; set; }
131+
public string? OutputS3PathPrefix { get; set; }
132+
public string? PagePrefix { get; set; }
133+
public string? PageSeparator { get; set; }
134+
public string? PageSuffix { get; set; }
135+
public string? ParsingInstruction { get; set; }
136+
public bool PremiumMode { get; set; }
137+
public bool SkipDiagonalText { get; set; }
138+
public bool StructuredOutput { get; set; }
139+
public string? StructuredOutputJsonSchema { get; set; }
140+
public string? StructuredOutputJsonSchemaName { get; set; }
141+
public bool TakeScreenshot { get; set; }
142+
public string? TargetPages { get; set; }
143+
public bool UseVendorMultimodalModel { get; set; }
144+
public string? VendorMultimodalApiKey { get; set; }
145+
public string? VendorMultimodalModelName { get; set; }
146+
public string? WebhookUrl { get; set; }
147+
public string? BoundingBox { get; set; }
148+
public bool Gpt4OMode { get; set; }
149+
public string? Gpt4OApiKey { get; set; }
150+
}
151+
152+
public class UploadResponse
153+
{
154+
[JsonPropertyName("id")]
155+
public Guid Id { get; set; }
156+
157+
[JsonPropertyName("status")]
158+
[JsonConverter(typeof(JsonStringEnumConverter))]
159+
public UploadStatus Status { get; set; }
160+
161+
[JsonPropertyName("error_code")]
162+
public object? ErrorCode { get; set; }
163+
164+
[JsonPropertyName("error_message")]
165+
public object? ErrorMessage { get; set; }
166+
}
167+
168+
public enum UploadStatus
169+
{
170+
PENDING,
171+
SUCCESS,
172+
ERROR,
173+
PARTIAL_SUCCESS
174+
}

0 commit comments

Comments
 (0)