@@ -26,6 +26,7 @@ import (
2626var once sync.Once
2727var geminiClient * genai.Client
2828
29+ // What gets sent to Gemini, with the PDF content added
2930var prompt = `Parse this PDF content and generate the following JSON schema.
3031
3132{
@@ -78,12 +79,11 @@ func ParseAcademicCalendars(inDir string, outDir string) {
7879
7980 // Parallel requests
8081 numWorkers := 10
81-
8282 jobs := make (chan string )
8383 var wg sync.WaitGroup
8484
8585 // Start worker goroutines
86- for i := 0 ; i < numWorkers ; i ++ {
86+ for range numWorkers {
8787 wg .Add (1 )
8888 go func () {
8989 defer wg .Done ()
@@ -122,8 +122,9 @@ func ParseAcademicCalendars(inDir string, outDir string) {
122122 utils .WriteJSON (fmt .Sprintf ("%s/academicCalendars.json" , outDir ), result )
123123}
124124
125+ // Read a PDF, build a prompt for Gemini to parse it, check if it has already been asked in the cache, and ask Gemini if not
125126func parsePdf (path string ) (schema.AcademicCalendar , error ) {
126- // Fall 2025 to 25F
127+ // " Fall 2025" to " 25F"
127128 filename := filepath .Base (path )
128129 filename = filename [0 : len (filename )- 4 ]
129130 filenameParts := strings .Split (filename , "-" )
@@ -147,20 +148,24 @@ func parsePdf(path string) (schema.AcademicCalendar, error) {
147148 promptFilled := fmt .Sprintf (prompt , name , timeline , content )
148149
149150 // Check cache
150- hash := sha256 .Sum256 ([]byte (promptFilled ))
151- key := hex .EncodeToString (hash [:]) + ".json"
152- result , err := checkCache (key )
151+ hashByte := sha256 .Sum256 ([]byte (promptFilled ))
152+ hash := hex .EncodeToString (hashByte [:]) + ".json"
153+ result , err := checkCache (hash )
153154 if err != nil {
154155 return schema.AcademicCalendar {}, err
155156 }
157+
158+ // Skip AI if cache found
156159 if result != "" {
157160 log .Printf ("Cache found for %s!" , filename )
158161 } else {
162+ // Cache not found
159163 log .Printf ("No cache for %s, asking Gemini." , filename )
164+
160165 // AI
161166 geminiClient := getGeminiClient ()
162167
163- // Send with default config
168+ // Send request with default config
164169 response , err := geminiClient .Models .GenerateContent (context .Background (),
165170 "gemini-2.5-pro" ,
166171 genai .Text (promptFilled ),
@@ -170,11 +175,11 @@ func parsePdf(path string) (schema.AcademicCalendar, error) {
170175 return schema.AcademicCalendar {}, err
171176 }
172177
173- // Get response, remove backtick formatting
178+ // Get response, remove backtick formatting if present
174179 result = strings .ReplaceAll (strings .ReplaceAll (response .Candidates [0 ].Content .Parts [0 ].Text , "```json" , "" ), "```" , "" )
175180
176- // Set cache
177- err = setCache (key , result )
181+ // Set cache for next time
182+ err = setCache (hash , result )
178183 if err != nil {
179184 return schema.AcademicCalendar {}, err
180185 }
@@ -190,6 +195,7 @@ func parsePdf(path string) (schema.AcademicCalendar, error) {
190195 return academicCalendar , nil
191196}
192197
198+ // Read the text from the first page of a PDF
193199func readPdf (path string ) (string , error ) {
194200 // Open the PDF
195201 f , r , err := pdf .Open (path )
@@ -219,6 +225,7 @@ func readPdf(path string) (string, error) {
219225 return buf .String (), nil
220226}
221227
228+ // Check cache for a response to the same prompt
222229func checkCache (hash string ) (string , error ) {
223230 apiUrl , apiBucket , apiKey , apiStorageKey , err := getNebulaKeys ()
224231 if err != nil {
@@ -272,7 +279,8 @@ func checkCache(hash string) (string, error) {
272279 return string (body ), nil
273280}
274281
275- func setCache (key string , result string ) error {
282+ // Upload AI response to cache
283+ func setCache (hash string , result string ) error {
276284 apiUrl , apiBucket , apiKey , apiStorageKey , err := getNebulaKeys ()
277285 if err != nil {
278286 return err
@@ -281,7 +289,7 @@ func setCache(key string, result string) error {
281289 // Make request
282290 jsonStr := []byte (result )
283291 bodyReader := bytes .NewBuffer (jsonStr )
284- req , err := http .NewRequest ("POST" , apiUrl + "storage/" + apiBucket + "/" + key , bodyReader )
292+ req , err := http .NewRequest ("POST" , apiUrl + "storage/" + apiBucket + "/" + hash , bodyReader )
285293 if err != nil {
286294 return err
287295 }
@@ -298,6 +306,7 @@ func setCache(key string, result string) error {
298306 return nil
299307}
300308
309+ // Get all the keys to access the Nebula API storage routes
301310func getNebulaKeys () (string , string , string , string , error ) {
302311 apiUrl , err := utils .GetEnv ("NEBULA_API_URL" )
303312 if err != nil {
@@ -320,7 +329,7 @@ func getNebulaKeys() (string, string, string, string, error) {
320329}
321330
322331// Create client only once
323- // Auth is from GOOGLE_GENAI_USE_VERTEXAI, GOOGLE_CLOUD_PROJECT and GOOGLE_APPLICATION_CREDENTIALS environment variables and service account JSON
332+ // Auth is from GOOGLE_GENAI_USE_VERTEXAI, GOOGLE_CLOUD_PROJECT and GOOGLE_APPLICATION_CREDENTIALS environment variables and service account JSON which is created from GEMINI_SERVICE_ACCOUNT
324333func getGeminiClient () * genai.Client {
325334 once .Do (func () {
326335 // Create JSON file
0 commit comments