comapeo-docs/scripts/constants.ts at d1b5ff28f2732a771f0205949ff1529d18e7d030 · digidem/comapeo-docs · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
/**
 * Constants used across the Notion workflow scripts
 */
import dotenv from "dotenv";

// Load environment variables and override system variables
// so local .env values take precedence
dotenv.config({ override: true });

// Main language configuration
export const MAIN_LANGUAGE = "English";

// Notion property names
export const NOTION_PROPERTIES = {
  TITLE: "Content elements",
  LANGUAGE: "Language",
  STATUS: "Publish Status",
  ORDER: "Order",
  TAGS: "Tags",
  ELEMENT_TYPE: "Element Type",
  READY_FOR_TRANSLATION: "Ready for translation",
  READY_TO_PUBLISH: "Ready to publish",
  PUBLISHED_DATE: "Date Published",
} as const;

// Translation language configurations
export interface TranslationConfig {
  language: string;
  notionLangCode: string;
  outputDir: string;
}

// Notion page type
export interface NotionPage {
  id: string;
  last_edited_time: string;
  properties: {
    [key: string]: unknown;
  };
  [key: string]: unknown;
}

export const LANGUAGES: TranslationConfig[] = [
  {
    language: "pt-BR",
    notionLangCode: "Portuguese",
    outputDir: "./i18n/pt/docusaurus-plugin-content-docs/current",
  },
  // Add more languages as needed
  // Example:
  {
    language: "es",
    notionLangCode: "Spanish",
    outputDir: "./i18n/es/docusaurus-plugin-content-docs/current",
  },
];

// Maximum number of retries for API calls
export const MAX_RETRIES = 3;

// Notion API limits
export const NOTION_API_CHUNK_SIZE = 50; // Notion API has a limit of 100 blocks per request, using 50 to be safe

// Image processing constants
export const IMAGE_MAX_WIDTH = 1280;
export const JPEG_QUALITY = 80;
export const PNG_COMPRESSION_LEVEL = 9;
export const WEBP_QUALITY = 80;
export const PNG_QUALITY_RANGE = [0.6, 0.8];

// OpenAI constants
export const OPENAI_BASE_URL = process.env.OPENAI_BASE_URL;
export const DEFAULT_OPENAI_MODEL = process.env.OPENAI_MODEL || "gpt-5-mini";
export const DEFAULT_OPENAI_TEMPERATURE = 0.3;
export const DEFAULT_OPENAI_MAX_TOKENS = 4096;

// Check if using OpenAI's default API (vs custom endpoint like DeepSeek)
export const IS_CUSTOM_OPENAI_API = !!OPENAI_BASE_URL;

// Known model context limits (tokens)
const MODEL_CONTEXT_LIMITS: Record<string, number> = {
  // OpenAI models
  "gpt-5": 272000,
  "gpt-5-nano": 272000,
  "gpt-5-mini": 272000,
  "gpt-5.2": 272000,
  "gpt-4o": 128000,
  "gpt-4o-mini": 128000,
  "gpt-4-turbo": 128000,
  "gpt-4": 128000,
  "gpt-3.5-turbo": 16385,
  // DeepSeek models
  "deepseek-chat": 131072,
  "deepseek-coder": 131072,
};

/**
 * Gets the maximum context length (in tokens) for a given model.
 * For unknown models, returns a conservative default.
 */
export function getModelContextLimit(modelName: string): number {
  const normalized = modelName.toLowerCase().trim();

  // Exact match first
  const entry = Object.entries(MODEL_CONTEXT_LIMITS).find(
    ([key]) => key === normalized
  );
  if (entry) {
    return entry[1];
  }

  // Prefix match for variants (e.g., "gpt-5-mini-2025-01-01")
  for (const [key, limit] of Object.entries(MODEL_CONTEXT_LIMITS)) {
    if (normalized.startsWith(key) || key.startsWith(normalized)) {
      return limit;
    }
  }

  // Conservative fallback for unknown models
  return 128000;
}

/** Estimated max chars per chunk (context limit / 3.5 chars per token, with 50% buffer) */
export function getMaxChunkChars(modelName: string): number {
  const contextLimit = getModelContextLimit(modelName);
  // Reserve ~50% for prompt overhead, title, and response to be safe
  return Math.floor((contextLimit * 3.5) / 1.5);
}

/**
 * GPT-5.2 supports custom temperature ONLY when reasoning_effort="none"
 * Based on: https://platform.openai.com/docs/guides/reasoning
 */
const GPT5_2_MODEL = "gpt-5.2";

/**
 * Gets model-specific parameters for OpenAI API requests.
 * GPT-5 models have different temperature support depending on variant:
 * - gpt-5, gpt-5-nano, gpt-5-mini: Only temperature=1 (or omit)
 * - gpt-5.2 with reasoning_effort="none": Supports custom temperature
 * - Other models: Use DEFAULT_OPENAI_TEMPERATURE
 *
 * @param modelName The OpenAI model name (e.g., "gpt-5-nano", "gpt-4o")
 * @param options Optional configuration for reasoning behavior
 * @returns Request params object with temperature and optionally reasoning_effort
 */
export function getModelParams(
  modelName: string,
  options: { useReasoningNone?: boolean } = {}
): { temperature: number; reasoning_effort?: "none" } {
  // Normalize model name for consistent matching
  const normalizedModel = modelName.trim().toLowerCase();

  // GPT-5.2 with reasoning_effort="none" supports custom temperature
  if (normalizedModel === GPT5_2_MODEL && options.useReasoningNone) {
    return {
      temperature: DEFAULT_OPENAI_TEMPERATURE,
      reasoning_effort: "none",
    };
  }

  // GPT-5 models (gpt-5, gpt-5-nano, gpt-5-mini) only support temperature=1
  const gpt5BaseModels = ["gpt-5", "gpt-5-nano", "gpt-5-mini"];
  const isGpt5BaseModel = gpt5BaseModels.some(
    (m) => normalizedModel === m || normalizedModel.startsWith(m + "-")
  );

  if (isGpt5BaseModel) {
    return { temperature: 1 };
  }

  // All other models use configured temperature
  return { temperature: DEFAULT_OPENAI_TEMPERATURE };
}

// Safety messages
export const ENGLISH_MODIFICATION_ERROR =
  "SAFETY ERROR: Cannot create or update English pages. This is a critical safety measure to prevent data loss.";
export const ENGLISH_DIR_SAVE_ERROR =
  "Safety check failed: Cannot save translated content to English docs directory";

// Translation retry configuration
export const TRANSLATION_MAX_RETRIES = 3;
export const TRANSLATION_RETRY_BASE_DELAY_MS = 750;
/**
 * Reliability-oriented cap for proactive markdown translation chunking.
 * This keeps long-form docs away from the model's theoretical context ceiling,
 * even when the model advertises a much larger maximum context window.
 */
export const TRANSLATION_CHUNK_MAX_CHARS = 120_000;
/** Smallest total-budget chunk size used when retrying incomplete translations. */
export const TRANSLATION_MIN_CHUNK_MAX_CHARS = 8_000;
/**
 * Maximum times to retry with smaller chunks after completeness checks fail.
 * Each retry halves the chunk limit. Starting from 120 K chars:
 *   120k → 60k → 30k → 15k → 8k (floor)
 * Four halvings are needed to descend from the default cap to the 8k floor,
 * so this must be at least 4.
 */
export const TRANSLATION_COMPLETENESS_MAX_RETRIES = 4;

// URL handling
export const INVALID_URL_PLACEHOLDER =
  process.env.INVALID_URL_PLACEHOLDER ||
  "https://example.com/invalid-url-removed";

// Test environment configuration
export const SAFE_BRANCH_PATTERNS = [
  "test/*",
  "fix/*",
  "feat/*",
  "chore/*",
  "refactor/*",
];

export const PROTECTED_BRANCHES = ["main", "master", "content"];

export function isTestMode(): boolean {
  return (
    process.env.TEST_MODE === "true" ||
    !!process.env.TEST_DATABASE_ID ||
    !!process.env.TEST_DATA_SOURCE_ID
  );
}

export function getTestDataSourceId(): string | undefined {
  return process.env.TEST_DATA_SOURCE_ID;
}

export function getTestDatabaseId(): string | undefined {
  return process.env.TEST_DATABASE_ID;
}

export function isSafeTestBranch(branch: string): boolean {
  // In test mode, only allow safe branch patterns
  if (!isTestMode()) {
    return true; // If not in test mode, allow any branch
  }

  // Check if branch matches any safe pattern
  const isSafePattern = SAFE_BRANCH_PATTERNS.some((pattern) => {
    // Use literal string comparison instead of RegExp to avoid ESLint warning
    // SAFE_BRANCH_PATTERNS uses "*" as wildcard, so we do simple prefix check
    if (pattern.endsWith("/*")) {
      const prefix = pattern.slice(0, -2); // Remove "/*" suffix
      return branch.startsWith(prefix + "/");
    }
    return branch === pattern;
  });

  // Check if branch contains "test" (case-insensitive)
  const hasTestInName = /test/i.test(branch);

  // Check if branch is a protected branch (never allow in test mode)
  const isProtected = PROTECTED_BRANCHES.includes(branch);

  return (isSafePattern || hasTestInName) && !isProtected;
}