Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
95 changes: 95 additions & 0 deletions demo_ask_image.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
import io
import json
import base64
import requests
import sys
from PIL import Image as PILImage

# Configuration
API_URL = "http://localhost:4981/gemini/v1beta/models/gemini-advanced:generateContent"

def encode_image(image_path, max_size=(1024, 1024), quality=80):
"""Đọc file ảnh, nén/resize xuống và mã hóa thành Base64"""
try:
# Mở ảnh bằng Pillow
img = PILImage.open(image_path)

# Chuyển đổi sang RGB nếu là RGBA (tránh lỗi khi lưu JPEG)
if img.mode in ("RGBA", "P"):
img = img.convert("RGB")

# Resize nếu ảnh quá lớn (giữ tỉ lệ)
img.thumbnail(max_size, PILImage.Resampling.LANCZOS)

# Lưu vào bộ nhớ đệm dạng byte với định dạng JPEG để nén dung lượng cao
buffer = io.BytesIO()
img.save(buffer, format="JPEG", quality=quality, optimize=True)

return base64.b64encode(buffer.getvalue()).decode('utf-8')
except ImportError:
print("Lỗi: Bạn cần cài đặt thư viện Pillow để nén ảnh. Chạy lệnh: pip install Pillow")
sys.exit(1)
except Exception as e:
print(f"Lỗi khi xử lý ảnh: {e}")
sys.exit(1)

def main():
# Cần ít nhất 2 tham số: tên script, đường dẫn ảnh, và câu hỏi
if len(sys.argv) < 3:
print("Sử dụng: python3 demo_ask_image.py <đường_dẫn_tới_ảnh> \"<câu_hỏi_của_bạn>\"")
print("Ví dụ: python3 demo_ask_image.py 5_3d_visualization.png \"Trục X đại diện cho cái gì?\"")
sys.exit(1)

image_path = sys.argv[1]
prompt_text = sys.argv[2] # Câu hỏi từ người dùng

print(f"Bức ảnh: {image_path}")
print(f"Câu hỏi: {prompt_text}")
print("Đang xủ lý và tải ảnh lên...")

base64_image = encode_image(image_path)

# Khởi tạo Payload gửi đến Go Server
payload = {
"contents": [
{
"parts": [
{"text": prompt_text},
{
"inlineData": {
"mimeType": "image/jpeg", # Định dạng ảnh chung
"data": base64_image
}
}
]
}
]
}

headers = {
"Content-Type": "application/json"
}

print(f"Đang chờ Gemini trả lời...\n")
try:
response = requests.post(API_URL, headers=headers, data=json.dumps(payload))
response.raise_for_status()

result = response.json()

print("============== GEMINI TRẢ LỜI ==============")
try:
answer = result['candidates'][0]['content']['parts'][0]['text']
print(answer)
except (KeyError, IndexError) as e:
print("Cấu trúc phản hồi không khớp dự kiến. Dữ liệu gốc:")
print(json.dumps(result, indent=2))
print("===========================================\n")

except requests.exceptions.RequestException as e:
print(f"Lỗi gọi API: {e}")
if hasattr(e, 'response') and e.response is not None:
print(f"Chi tiết: {e.response.text}")

if __name__ == "__main__":
main()
92 changes: 92 additions & 0 deletions demo_upload.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
import io
import json
import base64
import requests
import sys
from PIL import Image as PILImage

# Configuration
API_URL = "http://localhost:4981/gemini/v1beta/models/gemini-advanced:generateContent"

def encode_image(image_path, max_size=(1024, 1024), quality=80):
"""Đọc file ảnh, nén/resize xuống và mã hóa thành Base64"""
try:
# Mở ảnh bằng Pillow
img = PILImage.open(image_path)

# Chuyển đổi sang RGB nếu là RGBA (tránh lỗi khi lưu JPEG)
if img.mode in ("RGBA", "P"):
img = img.convert("RGB")

# Resize nếu ảnh quá lớn (giữ tỉ lệ)
img.thumbnail(max_size, PILImage.Resampling.LANCZOS)

# Lưu vào bộ nhớ đệm dạng byte với định dạng JPEG để nén dung lượng cao
buffer = io.BytesIO()
img.save(buffer, format="JPEG", quality=quality, optimize=True)

return base64.b64encode(buffer.getvalue()).decode('utf-8')
except ImportError:
print("Lỗi: Bạn cần cài đặt thư viện Pillow để nén ảnh. Chạy lệnh: pip install Pillow")
sys.exit(1)
except Exception as e:
print(f"Lỗi khi xử lý ảnh: {e}")
sys.exit(1)

def main():
if len(sys.argv) < 2:
print("Sử dụng: python demo_upload.py <đường_dẫn_tới_ảnh>")
print("Ví dụ: python demo_upload.py 5_3d_visualization.png")
sys.exit(1)

image_path = sys.argv[1]
prompt_text = "Mô tả chi tiết bức ảnh này."

print(f"Đang chuẩn bị gửi ảnh: {image_path}")
base64_image = encode_image(image_path)

# Khởi tạo Payload gửi đến Go Server (chuẩn Gemini/Vertex AI)
payload = {
"contents": [
{
"parts": [
{"text": prompt_text},
{
"inlineData": {
"mimeType": "image/jpeg", # Ảnh luôn được chuyển đổi sang định dạng JPEG để nén.
"data": base64_image
}
}
]
}
]
}

headers = {
"Content-Type": "application/json"
}

print(f"Đang gửi yêu cầu tới {API_URL}...")
try:
response = requests.post(API_URL, headers=headers, data=json.dumps(payload))
response.raise_for_status() # Báo lỗi nếu server trả về mã lỗi (500, 400...)

result = response.json()

print("\n--- Gemini Trả Lời ---")
# Trích xuất nội dung văn bản từ kết quả trả về
try:
answer = result['candidates'][0]['content']['parts'][0]['text']
print(answer)
except (KeyError, IndexError) as e:
print("Cấu trúc phản hồi không khớp dự kiến. Dữ liệu gốc:")
print(json.dumps(result, indent=2))
print("------------------------\n")

except requests.exceptions.RequestException as e:
print(f"Lỗi gọi API: {e}")
if hasattr(e, 'response') and e.response is not None:
print(f"Chi tiết response: {e.response.text}")

if __name__ == "__main__":
main()
21 changes: 19 additions & 2 deletions internal/modules/gemini/gemini_service.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package gemini

import (
"context"
"encoding/base64"
"fmt"
"strings"

Expand All @@ -28,24 +29,40 @@ func (s *GeminiService) ListModels() []providers.ModelInfo {
}

func (s *GeminiService) GenerateContent(ctx context.Context, modelID string, req dto.GeminiGenerateRequest) (*dto.GeminiGenerateResponse, error) {
// Logic: Extract prompt
// Logic: Extract prompt and files
var promptBuilder strings.Builder
var files []providers.FileData

for _, content := range req.Contents {
for _, part := range content.Parts {
if part.Text != "" {
promptBuilder.WriteString(part.Text)
promptBuilder.WriteString("\n")
}
if part.InlineData != nil {
dataBytes, err := base64.StdEncoding.DecodeString(part.InlineData.Data)
if err != nil {
s.log.Warn("Failed to decode base64 inline data", zap.Error(err))
continue
}
files = append(files, providers.FileData{
MimeType: part.InlineData.MimeType,
Data: dataBytes,
})
}
}
}

prompt := strings.TrimSpace(promptBuilder.String())
if prompt == "" {
if prompt == "" && len(files) == 0 {
return nil, fmt.Errorf("empty content")
}

// Logic: Call Provider
opts := []providers.GenerateOption{providers.WithModel(modelID)}
if len(files) > 0 {
opts = append(opts, providers.WithFiles(files))
}
response, err := s.client.GenerateContent(ctx, prompt, opts...)
if err != nil {
return nil, err
Expand Down
74 changes: 67 additions & 7 deletions internal/modules/providers/gemini_service.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package providers

import (
"bytes"
"compress/gzip"
"context"
"crypto/sha256"
Expand Down Expand Up @@ -494,10 +495,39 @@ func (c *Client) GenerateContent(ctx context.Context, prompt string, options ...
return nil, errors.New("client not initialized")
}

var reqFileData interface{} = nil
var fileDataArr []interface{}

if len(config.Files) > 0 {
for _, file := range config.Files {
filename := file.FileName
if filename == "" {
filename = fmt.Sprintf("input_%d", time.Now().UnixNano())
file.FileName = filename
}

url, err := c.UploadFile(ctx, file)
if err != nil {
return nil, fmt.Errorf("failed to upload file %s: %w", filename, err)
}

fileDataArr = append(fileDataArr, []interface{}{
[]interface{}{url}, filename,
})
}
reqFileData = fileDataArr
}

var messageContent []interface{}
if reqFileData != nil {
messageContent = []interface{}{prompt, 0, nil, reqFileData, nil, nil, 0}
} else {
messageContent = []interface{}{prompt}
}

// Build request payload
// The structure confirmed to work for model selection is [ [prompt], nil, nil, model ]
inner := []interface{}{
[]interface{}{prompt},
messageContent,
nil,
nil,
config.Model,
Expand All @@ -507,6 +537,7 @@ func (c *Client) GenerateContent(ctx context.Context, prompt string, options ...
outer := []interface{}{nil, string(innerJSON)}
outerJSON, _ := json.Marshal(outer)


formData := map[string]string{
"at": at,
"f.req": string(outerJSON),
Expand Down Expand Up @@ -840,13 +871,42 @@ func (c *Client) ClearCookieCache() error {
}

const (
EndpointGoogle = "https://www.google.com"
EndpointInit = "https://gemini.google.com/app"
EndpointGenerate = "https://gemini.google.com/_/BardChatUi/data/assistant.lamda.BardFrontendService/StreamGenerate"
EndpointRotateCookies = "https://accounts.google.com/RotateCookies"
EndpointBatchExec = "https://gemini.google.com/_/BardChatUi/data/batchexecute"
EndpointGoogle = "https://www.google.com"
EndpointInit = "https://gemini.google.com/app"
EndpointGenerate = "https://gemini.google.com/_/BardChatUi/data/assistant.lamda.BardFrontendService/StreamGenerate"
EndpointRotateCookies = "https://accounts.google.com/RotateCookies"
EndpointBatchExec = "https://gemini.google.com/_/BardChatUi/data/batchexecute"
EndpointUpload = "https://content-push.googleapis.com/upload"
)

// UploadFile uploads a file to Google content-push and returns its identifier
func (c *Client) UploadFile(ctx context.Context, file FileData) (string, error) {
filename := file.FileName
if filename == "" {
filename = fmt.Sprintf("input_%d.jpg", time.Now().UnixNano())
}

headers := map[string]string{
"Push-ID": "feeds/mcudyrk2a4khkz",
}

resp, err := c.httpClient.R().
SetContext(ctx).
SetHeaders(headers).
SetFileReader("file", filename, bytes.NewReader(file.Data)).
Post(EndpointUpload)

if err != nil {
return "", err
}

if resp.StatusCode != http.StatusOK {
return "", fmt.Errorf("upload failed with status: %d", resp.StatusCode)
}

return resp.String(), nil
}

var DefaultHeaders = map[string]string{
"Content-Type": "application/x-www-form-urlencoded;charset=utf-8",
"Origin": "https://gemini.google.com",
Expand Down
11 changes: 9 additions & 2 deletions internal/modules/providers/provider_interface.go
Original file line number Diff line number Diff line change
Expand Up @@ -86,10 +86,17 @@ type SessionMetadata struct {
// GenerateOption configures generation behavior
type GenerateOption func(*GenerateConfig)

// FileData represents a file to attach to the prompt
type FileData struct {
MimeType string
Data []byte
FileName string
}

// GenerateConfig holds generation configuration
type GenerateConfig struct {
Model string
Files []string
Files []FileData
Temperature float64
MaxTokens int
}
Expand All @@ -111,7 +118,7 @@ func WithModel(model string) GenerateOption {
}

// WithFiles adds files to the request
func WithFiles(files []string) GenerateOption {
func WithFiles(files []FileData) GenerateOption {
return func(c *GenerateConfig) {
c.Files = files
}
Expand Down
3 changes: 2 additions & 1 deletion internal/server/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,8 @@ import (
// New creates a new Fiber app instance
func NewGeminiWebToAPI(log *zap.Logger, cfg *configs.Config) *fiber.App {
app := fiber.New(fiber.Config{
AppName: "Gemini Web To API",
AppName: "Gemini Web To API",
BodyLimit: 20 * 1024 * 1024, // 20 MB
})

app.Use(cors.New(cors.Config{
Expand Down