Skip to content
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
110 changes: 69 additions & 41 deletions .claude/Skills/Art/tools/generate-ulart-image.ts
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,16 @@ import { GoogleGenAI } from "@google/genai";
import { writeFile, readFile } from "node:fs/promises";
import { extname, resolve } from "node:path";

// ============================================================================
// Constants
// ============================================================================

/**
* PAI_DIR - Root directory for PAI configuration
* Reads from environment variable PAI_DIR, falls back to ~/.claude
*/
const PAI_DIR = process.env.PAI_DIR || resolve(process.env.HOME!, '.claude');

// ============================================================================
// Environment Loading
// ============================================================================
Expand Down Expand Up @@ -70,7 +80,7 @@ interface CLIArgs {
creativeVariations?: number;
aspectRatio?: ReplicateSize; // For Gemini models
transparent?: boolean; // Enable transparent background
referenceImage?: string; // Reference image path (Nano Banana Pro only)
referenceImages?: string[]; // Reference image paths (Nano Banana Pro only, up to 5 for humans, 14 total)
removeBg?: boolean; // Remove background after generation using remove.bg API
}

Expand Down Expand Up @@ -143,9 +153,10 @@ OPTIONS:
--aspect-ratio <ratio> Aspect ratio for Gemini nano-banana-pro (default: 16:9)
Options: 1:1, 2:3, 3:2, 3:4, 4:3, 4:5, 5:4, 9:16, 16:9, 21:9
--output <path> Output file path (default: /tmp/ul-image.png)
--reference-image <path> Reference image for style/composition guidance (Nano Banana Pro only)
Accepts: PNG, JPEG, WebP images
Model will use this image as visual reference while following text prompt
--reference-image <path> Reference image(s) for character/style consistency (Nano Banana Pro only)
Can be specified multiple times: --reference-image a.jpg --reference-image b.jpg
Accepts: PNG, JPEG, WebP images (up to 5 for humans, 14 total)
Model uses these images to maintain likeness while following text prompt
--transparent Enable transparent background (adds transparency instructions to prompt)
Note: Not all models support transparency natively; may require post-processing
--remove-bg Remove background after generation using remove.bg API
Expand Down Expand Up @@ -175,10 +186,15 @@ EXAMPLES:
generate-ulart-image --model gpt-image-1 --prompt "..." --creative-variations 3 --output /tmp/essay.png
# Outputs: /tmp/essay-v1.png, /tmp/essay-v2.png, /tmp/essay-v3.png

# Generate with reference image for style guidance (Nano Banana Pro only)
# Generate with single reference image for style guidance (Nano Banana Pro only)
generate-ulart-image --model nano-banana-pro --prompt "Tokyo Night themed illustration..." \\
--reference-image /tmp/style-reference.png --size 2K --aspect-ratio 16:9

# Generate with multiple reference images for character consistency (up to 5 for humans)
generate-ulart-image --model nano-banana-pro --prompt "Person from references at a party..." \\
--reference-image face1.jpg --reference-image face2.jpg --reference-image face3.jpg \\
--size 2K --aspect-ratio 16:9

NOTE: For true creative diversity with different prompts, use the creative workflow in Kai which
integrates the be-creative skill. CLI creative mode generates multiple images with the SAME prompt.

Expand Down Expand Up @@ -268,7 +284,11 @@ function parseArgs(argv: string[]): CLIArgs {
i++; // Skip next arg (value)
break;
case "reference-image":
parsed.referenceImage = value;
// Collect multiple reference images into an array
if (!parsed.referenceImages) {
parsed.referenceImages = [];
}
parsed.referenceImages.push(value);
i++; // Skip next arg (value)
break;
case "creative-variations":
Expand All @@ -294,10 +314,15 @@ function parseArgs(argv: string[]): CLIArgs {
}

// Validate reference-image is only used with nano-banana-pro
if (parsed.referenceImage && parsed.model !== "nano-banana-pro") {
if (parsed.referenceImages && parsed.referenceImages.length > 0 && parsed.model !== "nano-banana-pro") {
throw new CLIError("--reference-image is only supported with --model nano-banana-pro");
}

// Validate reference image count (API limits: 5 human, 6 object, 14 total)
if (parsed.referenceImages && parsed.referenceImages.length > 14) {
throw new CLIError(`Too many reference images: ${parsed.referenceImages.length}. Maximum is 14 total (5 for humans, 6 for objects)`);
}

// Validate size based on model
if (parsed.model === "gpt-image-1") {
if (!OPENAI_SIZES.includes(parsed.size as OpenAISize)) {
Expand Down Expand Up @@ -450,7 +475,7 @@ async function generateWithNanoBananaPro(
size: GeminiSize,
aspectRatio: ReplicateSize,
output: string,
referenceImage?: string
referenceImages?: string[]
): Promise<void> {
const apiKey = process.env.GOOGLE_API_KEY;
if (!apiKey) {
Expand All @@ -459,45 +484,48 @@ async function generateWithNanoBananaPro(

const ai = new GoogleGenAI({ apiKey });

if (referenceImage) {
console.log(`🍌✨ Generating with Nano Banana Pro (Gemini 3 Pro) at ${size} ${aspectRatio} with reference image...`);
const refCount = referenceImages?.length || 0;
if (refCount > 0) {
console.log(`🍌✨ Generating with Nano Banana Pro (Gemini 3 Pro) at ${size} ${aspectRatio} with ${refCount} reference image(s)...`);
} else {
console.log(`🍌✨ Generating with Nano Banana Pro (Gemini 3 Pro) at ${size} ${aspectRatio}...`);
}

// Prepare content parts
const parts: Array<{ text?: string; inlineData?: { mimeType: string; data: string } }> = [];

// Add reference image if provided
if (referenceImage) {
// Read image file
const imageBuffer = await readFile(referenceImage);
const imageBase64 = imageBuffer.toString("base64");

// Determine MIME type from extension
const ext = extname(referenceImage).toLowerCase();
let mimeType: string;
switch (ext) {
case ".png":
mimeType = "image/png";
break;
case ".jpg":
case ".jpeg":
mimeType = "image/jpeg";
break;
case ".webp":
mimeType = "image/webp";
break;
default:
throw new CLIError(`Unsupported image format: ${ext}. Supported: .png, .jpg, .jpeg, .webp`);
}
// Add reference images if provided (up to 14 total, 5 for humans)
if (referenceImages && referenceImages.length > 0) {
for (const referenceImage of referenceImages) {
// Read image file
const imageBuffer = await readFile(referenceImage);
const imageBase64 = imageBuffer.toString("base64");

// Determine MIME type from extension
const ext = extname(referenceImage).toLowerCase();
let mimeType: string;
switch (ext) {
case ".png":
mimeType = "image/png";
break;
case ".jpg":
case ".jpeg":
mimeType = "image/jpeg";
break;
case ".webp":
mimeType = "image/webp";
break;
default:
throw new CLIError(`Unsupported image format: ${ext}. Supported: .png, .jpg, .jpeg, .webp`);
}

parts.push({
inlineData: {
mimeType,
data: imageBase64,
},
});
parts.push({
inlineData: {
mimeType,
data: imageBase64,
},
});
}
}

// Add text prompt
Expand Down Expand Up @@ -583,7 +611,7 @@ async function main(): Promise<void> {
args.size as GeminiSize,
args.aspectRatio!,
varOutput,
args.referenceImage
args.referenceImages
)
);
} else if (args.model === "gpt-image-1") {
Expand All @@ -607,7 +635,7 @@ async function main(): Promise<void> {
args.size as GeminiSize,
args.aspectRatio!,
args.output,
args.referenceImage
args.referenceImages
);
} else if (args.model === "gpt-image-1") {
await generateWithGPTImage(finalPrompt, args.size as OpenAISize, args.output);
Expand Down