Skip to content

Commit 0e14c01

Browse files
committed
Cloud storage caching
1 parent c36fa2a commit 0e14c01

File tree

2 files changed

+138
-11
lines changed

2 files changed

+138
-11
lines changed

.env.template

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,10 @@ GOOGLE_GENAI_USE_VERTEXAI=
1010
GOOGLE_CLOUD_PROJECT=
1111
GOOGLE_APPLICATION_CREDENTIALS=
1212
GEMINI_SERVICE_ACCOUNT=
13+
NEBULA_API_URL=
14+
NEBULA_API_STORAGE_BUCKET=
15+
NEBULA_API_KEY=
16+
NEBULA_API_STORAGE_KEY=
1317

1418
# Uploader
1519
MONGODB_URI=

parser/academicCalendars.go

Lines changed: 134 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,14 @@ package parser
33
import (
44
"bytes"
55
"context"
6+
"crypto/sha256"
7+
"encoding/hex"
68
"encoding/json"
79
"fmt"
10+
"io"
811
"io/fs"
912
"log"
13+
"net/http"
1014
"os"
1115
"path/filepath"
1216
"strings"
@@ -139,27 +143,46 @@ func parsePdf(path string) (schema.AcademicCalendar, error) {
139143
return schema.AcademicCalendar{}, err
140144
}
141145

142-
geminiClient := getGeminiClient()
143-
144146
// Build prompt
145147
promptFilled := fmt.Sprintf(prompt, name, timeline, content)
146148

147-
// Send with default config
148-
response, err := geminiClient.Models.GenerateContent(context.Background(),
149-
"gemini-2.5-pro",
150-
genai.Text(promptFilled),
151-
&genai.GenerateContentConfig{},
152-
)
149+
// Check cache
150+
hash := sha256.Sum256([]byte(promptFilled))
151+
key := hex.EncodeToString(hash[:]) + ".json"
152+
result, err := checkCache(key)
153153
if err != nil {
154154
return schema.AcademicCalendar{}, err
155155
}
156+
if result != "" {
157+
log.Printf("Cache found for %s!", filename)
158+
} else {
159+
log.Printf("No cache for %s, asking Gemini.", filename)
160+
// AI
161+
geminiClient := getGeminiClient()
162+
163+
// Send with default config
164+
response, err := geminiClient.Models.GenerateContent(context.Background(),
165+
"gemini-2.5-pro",
166+
genai.Text(promptFilled),
167+
&genai.GenerateContentConfig{},
168+
)
169+
if err != nil {
170+
return schema.AcademicCalendar{}, err
171+
}
156172

157-
// Get response, remove backtick formatting
158-
jsonStr := strings.ReplaceAll(strings.ReplaceAll(response.Candidates[0].Content.Parts[0].Text, "```json", ""), "```", "")
173+
// Get response, remove backtick formatting
174+
result = strings.ReplaceAll(strings.ReplaceAll(response.Candidates[0].Content.Parts[0].Text, "```json", ""), "```", "")
175+
176+
// Set cache
177+
err = setCache(key, result)
178+
if err != nil {
179+
return schema.AcademicCalendar{}, err
180+
}
181+
}
159182

160183
// Build struct
161184
var academicCalendar schema.AcademicCalendar
162-
err = json.Unmarshal([]byte(jsonStr), &academicCalendar)
185+
err = json.Unmarshal([]byte(result), &academicCalendar)
163186
if err != nil {
164187
return schema.AcademicCalendar{}, err
165188
}
@@ -196,6 +219,106 @@ func readPdf(path string) (string, error) {
196219
return buf.String(), nil
197220
}
198221

222+
func checkCache(hash string) (string, error) {
223+
apiUrl, apiBucket, apiKey, apiStorageKey, err := getNebulaKeys()
224+
if err != nil {
225+
return "", err
226+
}
227+
228+
client := &http.Client{}
229+
230+
// Make request
231+
req, err := http.NewRequest("GET", apiUrl+"storage/"+apiBucket+"/"+hash, nil)
232+
if err != nil {
233+
return "", err
234+
}
235+
req.Header.Add("x-api-key", apiKey)
236+
req.Header.Add("x-storage-key", apiStorageKey)
237+
resp, err := client.Do(req)
238+
if err != nil {
239+
return "", err
240+
}
241+
defer resp.Body.Close()
242+
243+
// Read the response body
244+
body, err := io.ReadAll(resp.Body)
245+
if err != nil {
246+
return "", err
247+
}
248+
var parsedBody schema.APIResponse[schema.ObjectInfo]
249+
err = json.Unmarshal([]byte(body), &parsedBody)
250+
if err != nil {
251+
// If this errors, return ("", nil) to indicate not found
252+
return "", nil
253+
}
254+
255+
// Fetch object
256+
req, err = http.NewRequest("GET", parsedBody.Data.MediaLink, nil)
257+
if err != nil {
258+
return "", err
259+
}
260+
resp, err = client.Do(req)
261+
if err != nil {
262+
return "", err
263+
}
264+
defer resp.Body.Close()
265+
266+
// Read the response body
267+
body, err = io.ReadAll(resp.Body)
268+
if err != nil {
269+
return "", err
270+
}
271+
272+
return string(body), nil
273+
}
274+
275+
func setCache(key string, result string) error {
276+
apiUrl, apiBucket, apiKey, apiStorageKey, err := getNebulaKeys()
277+
if err != nil {
278+
return err
279+
}
280+
281+
// Make request
282+
jsonStr := []byte(result)
283+
bodyReader := bytes.NewBuffer(jsonStr)
284+
req, err := http.NewRequest("POST", apiUrl+"storage/"+apiBucket+"/"+key, bodyReader)
285+
if err != nil {
286+
return err
287+
}
288+
req.Header.Set("Content-Type", "application/json")
289+
req.Header.Add("x-api-key", apiKey)
290+
req.Header.Add("x-storage-key", apiStorageKey)
291+
client := &http.Client{}
292+
resp, err := client.Do(req)
293+
if err != nil {
294+
return err
295+
}
296+
defer resp.Body.Close()
297+
298+
return nil
299+
}
300+
301+
func getNebulaKeys() (string, string, string, string, error) {
302+
apiUrl, err := utils.GetEnv("NEBULA_API_URL")
303+
if err != nil {
304+
return "", "", "", "", err
305+
}
306+
apiBucket, err := utils.GetEnv("NEBULA_API_STORAGE_BUCKET")
307+
if err != nil {
308+
return "", "", "", "", err
309+
}
310+
apiKey, err := utils.GetEnv("NEBULA_API_KEY")
311+
if err != nil {
312+
return "", "", "", "", err
313+
}
314+
apiStorageKey, err := utils.GetEnv("NEBULA_API_STORAGE_KEY")
315+
if err != nil {
316+
return "", "", "", "", err
317+
}
318+
319+
return apiUrl, apiBucket, apiKey, apiStorageKey, nil
320+
}
321+
199322
// Create client only once
200323
// Auth is from GOOGLE_GENAI_USE_VERTEXAI, GOOGLE_CLOUD_PROJECT and GOOGLE_APPLICATION_CREDENTIALS environment variables and service account JSON
201324
func getGeminiClient() *genai.Client {

0 commit comments

Comments
 (0)