77using Microsoft . KernelMemory . Diagnostics ;
88using System . Text . Json ;
99using System . Text . Json . Serialization ;
10+ using System . Collections . Generic ;
1011
1112/// <summary>
1213/// As for https://docs.cloud.llamaindex.ai/API/upload-file-api-v-1-parsing-upload-post
@@ -28,7 +29,8 @@ public LLamaCloudParserClient(
2829 throw new ArgumentException ( "ApiKey is required" , nameof ( config . ApiKey ) ) ;
2930 }
3031
31- this . _httpClient = httpClient ;
32+ _httpClient = httpClient ;
33+
3234 _log = log ?? DefaultLogger < LLamaCloudParserClient > . Instance ;
3335 _apiKey = config . ApiKey ;
3436 _baseUrl = config . BaseUrl ! ;
@@ -49,26 +51,11 @@ public async Task<UploadResponse> UploadAsync(
4951 var streamContent = new StreamContent ( fileContent ) ;
5052 multipartContent . Add ( streamContent , "file" , fileName ) ;
5153
52- if ( parameters != null )
54+ if ( parameters != null && parameters . CustomStringParameters . Count > 0 )
5355 {
54- foreach ( var prop in typeof ( UploadParameters ) . GetProperties ( ) )
56+ foreach ( var ( key , value ) in parameters . CustomStringParameters )
5557 {
56- var value = prop . GetValue ( parameters ) ;
57- if ( value != null )
58- {
59- if ( value is bool boolValue )
60- {
61- multipartContent . Add ( new StringContent ( boolValue . ToString ( ) . ToLower ( ) ) , prop . Name ) ;
62- }
63- else if ( value is string [ ] arrayValue )
64- {
65- multipartContent . Add ( new StringContent ( string . Join ( "," , arrayValue ) ) , prop . Name ) ;
66- }
67- else
68- {
69- multipartContent . Add ( new StringContent ( value . ToString ( ) ! ) , prop . Name ) ;
70- }
71- }
58+ multipartContent . Add ( new StringContent ( value ) , key ) ;
7259 }
7360 }
7461
@@ -81,72 +68,71 @@ public async Task<UploadResponse> UploadAsync(
8168 return JsonSerializer . Deserialize < UploadResponse > ( jsonResponse )
8269 ?? throw new InvalidOperationException ( "Failed to parse response" ) ;
8370 }
71+
72+ public async Task < JobResponse > GetJobAsync ( string jobId )
73+ {
74+ var requestUri = $ "{ _baseUrl . TrimEnd ( '/' ) } /api/v1/parsing/job/{ jobId } ";
75+ using var request = new HttpRequestMessage ( HttpMethod . Get , requestUri ) ;
76+
77+ request . Headers . Accept . Add ( new MediaTypeWithQualityHeaderValue ( "application/json" ) ) ;
78+ request . Headers . Authorization = new AuthenticationHeaderValue ( "Bearer" , _apiKey ) ;
79+
80+ using var response = await _httpClient . SendAsync ( request ) ;
81+ response . EnsureSuccessStatusCode ( ) ;
82+
83+ var jsonResponse = await response . Content . ReadAsStringAsync ( ) ;
84+ return JsonSerializer . Deserialize < JobResponse > ( jsonResponse )
85+ ?? throw new InvalidOperationException ( "Failed to parse response" ) ;
86+ }
87+
88+ public async Task < string > GetJobRawMarkdownAsync ( string jobId )
89+ {
90+ var requestUri = $ "{ _baseUrl . TrimEnd ( '/' ) } /api/v1/parsing/job/{ jobId } /result/raw/markdown";
91+ using var request = new HttpRequestMessage ( HttpMethod . Get , requestUri ) ;
92+
93+ request . Headers . Accept . Add ( new MediaTypeWithQualityHeaderValue ( "application/json" ) ) ;
94+ request . Headers . Authorization = new AuthenticationHeaderValue ( "Bearer" , _apiKey ) ;
95+
96+ using var response = await _httpClient . SendAsync ( request ) ;
97+ response . EnsureSuccessStatusCode ( ) ;
98+
99+ return await response . Content . ReadAsStringAsync ( ) ;
100+ }
101+
102+ public async Task < bool > WaitForJobSuccessAsync ( string jobId , TimeSpan timeout )
103+ {
104+ var startTime = DateTime . UtcNow ;
105+ while ( DateTime . UtcNow - startTime < timeout )
106+ {
107+ var jobResponse = await GetJobAsync ( jobId ) ;
108+ if ( jobResponse . Status == UploadStatus . SUCCESS )
109+ {
110+ return true ;
111+ }
112+ await Task . Delay ( TimeSpan . FromSeconds ( 10 ) ) ; // Wait for 10 seconds before retrying
113+ }
114+ return false ;
115+ }
84116}
85117
86118public class CloudParserConfiguration
87119{
88120 public string ? ApiKey { get ; internal set ; }
89- public string ? BaseUrl { get ; internal set ; }
121+ public string ? BaseUrl { get ; internal set ; } = "https://api.cloud.llamaindex.ai" ;
90122}
91123
124+ /// <summary>
125+ /// Simplify passing parameters.
126+ /// </summary>
92127public class UploadParameters
93128{
94- public string ? ProjectId { get ; set ; }
95- public string ? OrganizationId { get ; set ; }
96- public bool AnnotateLinks { get ; set ; }
97- public bool AutoMode { get ; set ; }
98- public bool AutoModeTriggerOnImageInPage { get ; set ; }
99- public bool AutoModeTriggerOnTableInPage { get ; set ; }
100- public string ? AutoModeTriggerOnTextInPage { get ; set ; }
101- public string ? AutoModeTriggerOnRegexpInPage { get ; set ; }
102- public string ? AzureOpenAiApiVersion { get ; set ; }
103- public string ? AzureOpenAiDeploymentName { get ; set ; }
104- public string ? AzureOpenAiEndpoint { get ; set ; }
105- public string ? AzureOpenAiKey { get ; set ; }
106- public float ? BboxBottom { get ; set ; }
107- public float ? BboxLeft { get ; set ; }
108- public float ? BboxRight { get ; set ; }
109- public float ? BboxTop { get ; set ; }
110- public bool ContinuousMode { get ; set ; }
111- public bool DisableOcr { get ; set ; }
112- public bool DisableReconstruction { get ; set ; }
113- public bool DisableImageExtraction { get ; set ; }
114- public bool DoNotCache { get ; set ; }
115- public bool DoNotUnrollColumns { get ; set ; }
116- public bool ExtractCharts { get ; set ; }
117- public bool FastMode { get ; set ; }
118- public bool GuessXlsxSheetName { get ; set ; }
119- public bool HtmlMakeAllElementsVisible { get ; set ; }
120- public bool HtmlRemoveFixedElements { get ; set ; }
121- public bool HtmlRemoveNavigationElements { get ; set ; }
122- public string ? HttpProxy { get ; set ; }
123- public string ? InputS3Path { get ; set ; }
124- public string ? InputUrl { get ; set ; }
125- public bool InvalidateCache { get ; set ; }
126- public bool IsFormattingInstruction { get ; set ; } = true ;
127- public string [ ] ? Language { get ; set ; } = new [ ] { "en" } ;
128- public bool ExtractLayout { get ; set ; }
129- public object ? MaxPages { get ; set ; }
130- public bool OutputPdfOfDocument { get ; set ; }
131- public string ? OutputS3PathPrefix { get ; set ; }
132- public string ? PagePrefix { get ; set ; }
133- public string ? PageSeparator { get ; set ; }
134- public string ? PageSuffix { get ; set ; }
135- public string ? ParsingInstruction { get ; set ; }
136- public bool PremiumMode { get ; set ; }
137- public bool SkipDiagonalText { get ; set ; }
138- public bool StructuredOutput { get ; set ; }
139- public string ? StructuredOutputJsonSchema { get ; set ; }
140- public string ? StructuredOutputJsonSchemaName { get ; set ; }
141- public bool TakeScreenshot { get ; set ; }
142- public string ? TargetPages { get ; set ; }
143- public bool UseVendorMultimodalModel { get ; set ; }
144- public string ? VendorMultimodalApiKey { get ; set ; }
145- public string ? VendorMultimodalModelName { get ; set ; }
146- public string ? WebhookUrl { get ; set ; }
147- public string ? BoundingBox { get ; set ; }
148- public bool Gpt4OMode { get ; set ; }
149- public string ? Gpt4OApiKey { get ; set ; }
129+ public Dictionary < string , string > CustomStringParameters { get ; set ; } = new ( ) ;
130+
131+ public UploadParameters WithParsingInstructions ( string parsingInstructions )
132+ {
133+ CustomStringParameters [ "parsing_instruction" ] = parsingInstructions ;
134+ return this ;
135+ }
150136}
151137
152138public class UploadResponse
@@ -165,6 +151,22 @@ public class UploadResponse
165151 public object ? ErrorMessage { get ; set ; }
166152}
167153
154+ public class JobResponse
155+ {
156+ [ JsonPropertyName ( "id" ) ]
157+ public Guid Id { get ; set ; }
158+
159+ [ JsonPropertyName ( "status" ) ]
160+ [ JsonConverter ( typeof ( JsonStringEnumConverter ) ) ]
161+ public UploadStatus Status { get ; set ; }
162+
163+ [ JsonPropertyName ( "error_code" ) ]
164+ public object ? ErrorCode { get ; set ; }
165+
166+ [ JsonPropertyName ( "error_message" ) ]
167+ public object ? ErrorMessage { get ; set ; }
168+ }
169+
168170public enum UploadStatus
169171{
170172 PENDING ,
0 commit comments