Github-models-starter-pro/sample-image.js at main · DilkiSenanayake/Github-models-starter-pro · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
/**
 * SAMPLE IMAGE ANALYSIS WITH GPT-4o VISION
 *
 * This file demonstrates how to use GPT-4o's vision capabilities to analyze images.
 * The application:
 * 1. Loads a sample image from the Images folder
 * 2. Converts the image to a base64 data URL format
 * 3. Sends both text and image data to the AI model
 * 4. Receives a detailed description of what's in the image
 *
 * Key concepts demonstrated:
 * - Multi-modal input handling (text + image)
 * - File system operations for reading image files
 * - Base64 encoding for image data transmission
 * - Error handling for file operations
 * - Vision model configuration and usage
 *
 * This showcases the multimodal capabilities of modern AI models that can understand
 * and describe visual content alongside text prompts.
 */

import OpenAI from "openai";
import { readFileSync } from "node:fs";
import dotenv from "dotenv";

// Load environment variables from .env file
dotenv.config();
// Get GitHub token for API authentication
const token = process.env["GITHUB_TOKEN"];
// GitHub's AI inference endpoint URL
const endpoint = "https://models.github.ai/inference";
// Specify the GPT-4o model for vision capabilities
const modelName = "openai/gpt-4o";

export async function main() {
  // Initialize OpenAI client with GitHub's endpoint and token
  const client = new OpenAI({ baseURL: endpoint, apiKey: token });

  // Create a chat completion request with both text and image content
  const response = await client.chat.completions.create({
    messages: [
        // System message defines the AI's role for image description
        { role: "system", content: "You are a helpful assistant that describes images in details." },
        // User message contains both text query and image data
        { role: "user", content: [
            { type: "text", text: "What's in this image?"},
            { type: "image_url", image_url: {
                url: getImageDataUrl("sample.jpg", "jpg"), details: "low"}}
          ]
        }
      ],
      model: modelName
    });

  // Output the AI's description of the image
  console.log(response.choices[0].message.content);
}

/**
 * Utility function to convert an image file to a base64 data URL
 * This is required for sending image data to the AI model via API
 *
 * @param {string} imageFile - The path to the image file.
 * @param {string} imageFormat - The format of the image file. For example: "jpeg", "png".
 * @returns {string} The data URL of the image.
 */
function getImageDataUrl(imageFile, imageFormat) {
  try {
      // Construct the full path to the image in the Images folder
      const imagePath = `Images/${imageFile}`;
      // Read the image file as a buffer
      const imageBuffer = readFileSync(imagePath);
      // Convert the buffer to base64 string
      const imageBase64 = imageBuffer.toString('base64');
      // Return the complete data URL format required by the API
      return `data:image/${imageFormat};base64,${imageBase64}`;
  } catch (error) {
      // Handle file reading errors gracefully
      console.error(`Could not read 'Images/${imageFile}'.`);
      console.error('Set the correct path to the image file before running this sample.');
      process.exit(1);
  }
}

// Execute the main function and handle any errors that occur
main().catch((err) => {
  console.error("The sample encountered an error:", err);
});