ntthanh2603 · Hieuslecong · Mar 12, 2026 · Mar 13, 2026 · Mar 13, 2026
diff --git a/demo_ask_image.py b/demo_ask_image.py
@@ -0,0 +1,95 @@
+import io
+import json
+import base64
+import requests
+import sys
+from PIL import Image as PILImage
+
+# Configuration
+API_URL = "http://localhost:4981/gemini/v1beta/models/gemini-advanced:generateContent"
+
+def encode_image(image_path, max_size=(1024, 1024), quality=80):
+    """Đọc file ảnh, nén/resize xuống và mã hóa thành Base64"""
+    try:
+        # Mở ảnh bằng Pillow
+        img = PILImage.open(image_path)
+
+        # Chuyển đổi sang RGB nếu là RGBA (tránh lỗi khi lưu JPEG)
+        if img.mode in ("RGBA", "P"):
+            img = img.convert("RGB")
+
+        # Resize nếu ảnh quá lớn (giữ tỉ lệ)
+        img.thumbnail(max_size, PILImage.Resampling.LANCZOS)
+
+        # Lưu vào bộ nhớ đệm dạng byte với định dạng JPEG để nén dung lượng cao
+        buffer = io.BytesIO()
+        img.save(buffer, format="JPEG", quality=quality, optimize=True)
+
+        return base64.b64encode(buffer.getvalue()).decode('utf-8')
+    except ImportError:
+        print("Lỗi: Bạn cần cài đặt thư viện Pillow để nén ảnh. Chạy lệnh: pip install Pillow")
+        sys.exit(1)
+    except Exception as e:
+        print(f"Lỗi khi xử lý ảnh: {e}")
+        sys.exit(1)
+
+def main():
+    # Cần ít nhất 2 tham số: tên script, đường dẫn ảnh, và câu hỏi
+    if len(sys.argv) < 3:
+        print("Sử dụng: python3 demo_ask_image.py <đường_dẫn_tới_ảnh> \"<câu_hỏi_của_bạn>\"")
+        print("Ví dụ:   python3 demo_ask_image.py 5_3d_visualization.png \"Trục X đại diện cho cái gì?\"")
+        sys.exit(1)
+
+    image_path = sys.argv[1]
+    prompt_text = sys.argv[2] # Câu hỏi từ người dùng
+
+    print(f"Bức ảnh: {image_path}")
+    print(f"Câu hỏi: {prompt_text}")
+    print("Đang xủ lý và tải ảnh lên...")
+
+    base64_image = encode_image(image_path)
+
+    # Khởi tạo Payload gửi đến Go Server
+    payload = {
+        "contents": [
+            {
+                "parts": [
+                    {"text": prompt_text},
+                    {
+                        "inlineData": {
+                            "mimeType": "image/jpeg", # Định dạng ảnh chung
+                            "data": base64_image
+                        }
+                    }
+                ]
+            }
+        ]
+    }
+
+    headers = {
+        "Content-Type": "application/json"
+    }
+
+    print(f"Đang chờ Gemini trả lời...\n")
+    try:
+        response = requests.post(API_URL, headers=headers, data=json.dumps(payload))
+        response.raise_for_status() 
+
+        result = response.json()
+
+        print("============== GEMINI TRẢ LỜI ==============")
+        try:
+            answer = result['candidates'][0]['content']['parts'][0]['text']
+            print(answer)
+        except (KeyError, IndexError) as e:
+            print("Cấu trúc phản hồi không khớp dự kiến. Dữ liệu gốc:")
+            print(json.dumps(result, indent=2))
+        print("===========================================\n")
+
+    except requests.exceptions.RequestException as e:
+        print(f"Lỗi gọi API: {e}")
+        if hasattr(e, 'response') and e.response is not None:
+            print(f"Chi tiết: {e.response.text}")
+
+if __name__ == "__main__":
+    main()
diff --git a/demo_upload.py b/demo_upload.py
@@ -0,0 +1,92 @@
+import io
+import json
+import base64
+import requests
+import sys
+from PIL import Image as PILImage
+
+# Configuration
+API_URL = "http://localhost:4981/gemini/v1beta/models/gemini-advanced:generateContent"
+
+def encode_image(image_path, max_size=(1024, 1024), quality=80):
+    """Đọc file ảnh, nén/resize xuống và mã hóa thành Base64"""
+    try:
+        # Mở ảnh bằng Pillow
+        img = PILImage.open(image_path)
+
+        # Chuyển đổi sang RGB nếu là RGBA (tránh lỗi khi lưu JPEG)
+        if img.mode in ("RGBA", "P"):
+            img = img.convert("RGB")
+
+        # Resize nếu ảnh quá lớn (giữ tỉ lệ)
+        img.thumbnail(max_size, PILImage.Resampling.LANCZOS)
+
+        # Lưu vào bộ nhớ đệm dạng byte với định dạng JPEG để nén dung lượng cao
+        buffer = io.BytesIO()
+        img.save(buffer, format="JPEG", quality=quality, optimize=True)
+
+        return base64.b64encode(buffer.getvalue()).decode('utf-8')
+    except ImportError:
+        print("Lỗi: Bạn cần cài đặt thư viện Pillow để nén ảnh. Chạy lệnh: pip install Pillow")
+        sys.exit(1)
+    except Exception as e:
+        print(f"Lỗi khi xử lý ảnh: {e}")
+        sys.exit(1)
+
+def main():
+    if len(sys.argv) < 2:
+        print("Sử dụng: python demo_upload.py <đường_dẫn_tới_ảnh>")
+        print("Ví dụ: python demo_upload.py 5_3d_visualization.png")
+        sys.exit(1)
+
+    image_path = sys.argv[1]
+    prompt_text = "Mô tả chi tiết bức ảnh này."
+
+    print(f"Đang chuẩn bị gửi ảnh: {image_path}")
+    base64_image = encode_image(image_path)
+
+    # Khởi tạo Payload gửi đến Go Server (chuẩn Gemini/Vertex AI)
+    payload = {
+        "contents": [
+            {
+                "parts": [
+                    {"text": prompt_text},
+                    {
+                        "inlineData": {
+                            "mimeType": "image/jpeg", # Ảnh luôn được chuyển đổi sang định dạng JPEG để nén.
+                            "data": base64_image
+                        }
+                    }
+                ]
+            }
+        ]
+    }
+
+    headers = {
+        "Content-Type": "application/json"
+    }
+
+    print(f"Đang gửi yêu cầu tới {API_URL}...")
+    try:
+        response = requests.post(API_URL, headers=headers, data=json.dumps(payload))
+        response.raise_for_status() # Báo lỗi nếu server trả về mã lỗi (500, 400...)
+
+        result = response.json()
+
+        print("\n--- Gemini Trả Lời ---")
+        # Trích xuất nội dung văn bản từ kết quả trả về
+        try:
+            answer = result['candidates'][0]['content']['parts'][0]['text']
+            print(answer)
+        except (KeyError, IndexError) as e:
+            print("Cấu trúc phản hồi không khớp dự kiến. Dữ liệu gốc:")
+            print(json.dumps(result, indent=2))
+        print("------------------------\n")
+
+    except requests.exceptions.RequestException as e:
+        print(f"Lỗi gọi API: {e}")
+        if hasattr(e, 'response') and e.response is not None:
+            print(f"Chi tiết response: {e.response.text}")
+
+if __name__ == "__main__":
+    main()
diff --git a/internal/modules/gemini/gemini_service.go b/internal/modules/gemini/gemini_service.go
@@ -2,6 +2,7 @@ package gemini
 
 import (
 	"context"
+	"encoding/base64"
 	"fmt"
 	"strings"
 
@@ -28,24 +29,40 @@ func (s *GeminiService) ListModels() []providers.ModelInfo {
 }
 
 func (s *GeminiService) GenerateContent(ctx context.Context, modelID string, req dto.GeminiGenerateRequest) (*dto.GeminiGenerateResponse, error) {
-	// Logic: Extract prompt
+	// Logic: Extract prompt and files
 	var promptBuilder strings.Builder
+	var files []providers.FileData
+
 	for _, content := range req.Contents {
 		for _, part := range content.Parts {
 			if part.Text != "" {
 				promptBuilder.WriteString(part.Text)
 				promptBuilder.WriteString("\n")
 			}
+			if part.InlineData != nil {
+				dataBytes, err := base64.StdEncoding.DecodeString(part.InlineData.Data)
+				if err != nil {
+					s.log.Warn("Failed to decode base64 inline data", zap.Error(err))
+					continue
+				}
+				files = append(files, providers.FileData{
+					MimeType: part.InlineData.MimeType,
+					Data:     dataBytes,
+				})
+			}
 		}
 	}
 
 	prompt := strings.TrimSpace(promptBuilder.String())
-	if prompt == "" {
+	if prompt == "" && len(files) == 0 {
 		return nil, fmt.Errorf("empty content")
 	}
 
 	// Logic: Call Provider
 	opts := []providers.GenerateOption{providers.WithModel(modelID)}
+	if len(files) > 0 {
+		opts = append(opts, providers.WithFiles(files))
+	}
 	response, err := s.client.GenerateContent(ctx, prompt, opts...)
 	if err != nil {
 		return nil, err

diff --git a/internal/modules/providers/gemini_service.go b/internal/modules/providers/gemini_service.go
@@ -1,6 +1,7 @@
 package providers
 
 import (
+	"bytes"
 	"compress/gzip"
 	"context"
 	"crypto/sha256"
@@ -494,10 +495,39 @@ func (c *Client) GenerateContent(ctx context.Context, prompt string, options ...
 		return nil, errors.New("client not initialized")
 	}
 
+	var reqFileData interface{} = nil
+	var fileDataArr []interface{}
+
+	if len(config.Files) > 0 {
+		for _, file := range config.Files {
+            filename := file.FileName
+			if filename == "" {
+				filename = fmt.Sprintf("input_%d", time.Now().UnixNano())
+				file.FileName = filename
+			}
+
+			url, err := c.UploadFile(ctx, file)
+			if err != nil {
+				return nil, fmt.Errorf("failed to upload file %s: %w", filename, err)
+			}
+
+			fileDataArr = append(fileDataArr, []interface{}{
+				[]interface{}{url}, filename,
+			})
+		}
+		reqFileData = fileDataArr
+	}
+
+	var messageContent []interface{}
+	if reqFileData != nil {
+		messageContent = []interface{}{prompt, 0, nil, reqFileData, nil, nil, 0}
+	} else {
+		messageContent = []interface{}{prompt}
+	}
+
 	// Build request payload
-	// The structure confirmed to work for model selection is [ [prompt], nil, nil, model ]
 	inner := []interface{}{
-		[]interface{}{prompt},
+		messageContent,
 		nil,
 		nil,
 		config.Model,
@@ -507,6 +537,7 @@ func (c *Client) GenerateContent(ctx context.Context, prompt string, options ...
 	outer := []interface{}{nil, string(innerJSON)}
 	outerJSON, _ := json.Marshal(outer)
 
+
 	formData := map[string]string{
 		"at":    at,
 		"f.req": string(outerJSON),
@@ -840,13 +871,42 @@ func (c *Client) ClearCookieCache() error {
 }
 
 const (
-EndpointGoogle        = "https://www.google.com"
-EndpointInit          = "https://gemini.google.com/app"
-EndpointGenerate      = "https://gemini.google.com/_/BardChatUi/data/assistant.lamda.BardFrontendService/StreamGenerate"
-EndpointRotateCookies = "https://accounts.google.com/RotateCookies"
-EndpointBatchExec     = "https://gemini.google.com/_/BardChatUi/data/batchexecute"
+	EndpointGoogle        = "https://www.google.com"
+	EndpointInit          = "https://gemini.google.com/app"
+	EndpointGenerate      = "https://gemini.google.com/_/BardChatUi/data/assistant.lamda.BardFrontendService/StreamGenerate"
+	EndpointRotateCookies = "https://accounts.google.com/RotateCookies"
+	EndpointBatchExec     = "https://gemini.google.com/_/BardChatUi/data/batchexecute"
+	EndpointUpload        = "https://content-push.googleapis.com/upload"
 )
 
+// UploadFile uploads a file to Google content-push and returns its identifier
+func (c *Client) UploadFile(ctx context.Context, file FileData) (string, error) {
+	filename := file.FileName
+	if filename == "" {
+		filename = fmt.Sprintf("input_%d.jpg", time.Now().UnixNano())
+	}
+
+	headers := map[string]string{
+		"Push-ID": "feeds/mcudyrk2a4khkz",
+	}
+
+	resp, err := c.httpClient.R().
+		SetContext(ctx).
+		SetHeaders(headers).
+		SetFileReader("file", filename, bytes.NewReader(file.Data)).
+		Post(EndpointUpload)
+
+	if err != nil {
+		return "", err
+	}
+
+	if resp.StatusCode != http.StatusOK {
+		return "", fmt.Errorf("upload failed with status: %d", resp.StatusCode)
+	}
+
+	return resp.String(), nil
+}
+
 var DefaultHeaders = map[string]string{
 "Content-Type":  "application/x-www-form-urlencoded;charset=utf-8",
 "Origin":        "https://gemini.google.com",

diff --git a/internal/modules/providers/provider_interface.go b/internal/modules/providers/provider_interface.go
@@ -86,10 +86,17 @@ type SessionMetadata struct {
 // GenerateOption configures generation behavior
 type GenerateOption func(*GenerateConfig)
 
+// FileData represents a file to attach to the prompt
+type FileData struct {
+	MimeType string
+	Data     []byte
+	FileName string
+}
+
 // GenerateConfig holds generation configuration
 type GenerateConfig struct {
 	Model       string
-	Files       []string
+	Files       []FileData
 	Temperature float64
 	MaxTokens   int
 }
@@ -111,7 +118,7 @@ func WithModel(model string) GenerateOption {
 }
 
 // WithFiles adds files to the request
-func WithFiles(files []string) GenerateOption {
+func WithFiles(files []FileData) GenerateOption {
 	return func(c *GenerateConfig) {
 		c.Files = files
 	}

diff --git a/internal/server/server.go b/internal/server/server.go
@@ -19,7 +19,8 @@ import (
 // New creates a new Fiber app instance
 func NewGeminiWebToAPI(log *zap.Logger, cfg *configs.Config) *fiber.App {
 	app := fiber.New(fiber.Config{
-		AppName: "Gemini Web To API",
+		AppName:   "Gemini Web To API",
+		BodyLimit: 20 * 1024 * 1024, // 20 MB
 	})
 
 	app.Use(cors.New(cors.Config{