Skip to content

Commit 52b7e36

Browse files
Add webpage translation functionality
1 parent 54250f2 commit 52b7e36

File tree

3 files changed

+122
-9
lines changed

3 files changed

+122
-9
lines changed

README.md

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -230,6 +230,30 @@ directly:
230230
- `getDocumentStatus()` (or `isDocumentTranslationComplete()`), and
231231
- `downloadDocument()`
232232

233+
#### Translating webpages
234+
235+
Webpages can be translated as well by calling `translateWebpage()`. It has the same signature as `translateDocument` except for the
236+
first parameter, which is the URL for the webpage you would like translated.
237+
238+
```javascript
239+
// Translate the English DeepL wikipedia page into German:
240+
await translator.translateWebpage(
241+
'https://en.wikipedia.org/wiki/DeepL_Translator',
242+
'DeepLWiki.html',
243+
'en',
244+
'de'
245+
);
246+
```
247+
248+
Like `translateDocument()`, `translateWebpage()` wraps multiple API calls: uploading, polling status until
249+
the translation is complete, and downloading. If your application needs to
250+
execute these steps individually, you can instead use the following functions
251+
directly:
252+
253+
- `uploadWebpage()`,
254+
- `getDocumentStatus()` (or `isDocumentTranslationComplete()`), and
255+
- `downloadDocument()`
256+
233257
#### Document translation options
234258

235259
- `formality`: same as in [Text translation options](#text-translation-options).

src/client.ts

Lines changed: 33 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -106,14 +106,16 @@ export class HttpClient {
106106
url: string,
107107
timeoutMs: number,
108108
responseAsStream: boolean,
109+
isDeepL: boolean,
109110
options: SendRequestOptions,
110111
): AxiosRequestConfig {
111112
const headers = Object.assign({}, this.headers, options.headers);
113+
console.log(`isDeepL: ${isDeepL}`);
112114

113115
const axiosRequestConfig: AxiosRequestConfig = {
114-
url,
116+
url: isDeepL ? url : undefined,
115117
method,
116-
baseURL: this.serverUrl,
118+
baseURL: isDeepL ? this.serverUrl : url,
117119
headers,
118120
responseType: responseAsStream ? 'stream' : 'text',
119121
timeout: timeoutMs,
@@ -147,7 +149,7 @@ export class HttpClient {
147149
/**
148150
* Makes API request retrying if necessary, and returns (as Promise) response.
149151
* @param method HTTP method, for example 'GET'
150-
* @param url Path to endpoint, excluding base server URL.
152+
* @param url Path to endpoint, excluding base server URL if DeepL API request, including base server URL if a webpage.
151153
* @param options Additional options controlling request.
152154
* @param responseAsStream Set to true if the return type is IncomingMessage.
153155
* @return Fulfills with status code and response (as text or stream).
@@ -157,9 +159,16 @@ export class HttpClient {
157159
url: string,
158160
options?: SendRequestOptions,
159161
responseAsStream = false,
160-
): Promise<{ statusCode: number; content: TContent }> {
162+
): Promise<{ statusCode: number; content: TContent; contentType?: string }> {
163+
let isDeepLUrl: boolean;
164+
try {
165+
isDeepLUrl = !!new URL(url);
166+
} catch {
167+
isDeepLUrl = true;
168+
}
169+
161170
options = options === undefined ? {} : options;
162-
logInfo(`Request to DeepL API ${method} ${url}`);
171+
logInfo(`${isDeepLUrl ? 'Request to DeepL API' : 'Request to webpage'} ${method} ${url}`);
163172
logDebug(`Request details: ${options.data}`);
164173
const backoff = new BackoffTimer();
165174
let response, error;
@@ -170,6 +179,7 @@ export class HttpClient {
170179
url,
171180
timeoutMs,
172181
responseAsStream,
182+
isDeepLUrl,
173183
options,
174184
);
175185
try {
@@ -199,8 +209,12 @@ export class HttpClient {
199209
}
200210

201211
if (response !== undefined) {
202-
const { statusCode, content } = response;
203-
logInfo(`DeepL API response ${method} ${url} ${statusCode}`);
212+
const { statusCode, content, contentType } = response;
213+
logInfo(
214+
`${
215+
isDeepLUrl ? 'DeepL API response' : 'Webpage response'
216+
} ${method} ${url} ${statusCode}${!isDeepLUrl ? ` ${contentType}` : ''}`,
217+
);
204218
if (!responseAsStream) {
205219
logDebug('Response details:', { content: content });
206220
}
@@ -217,7 +231,7 @@ export class HttpClient {
217231
*/
218232
private static async sendAxiosRequest<TContent extends string | IncomingMessage>(
219233
axiosRequestConfig: AxiosRequestConfig,
220-
): Promise<{ statusCode: number; content: TContent }> {
234+
): Promise<{ statusCode: number; content: TContent; contentType?: string }> {
221235
try {
222236
const response = await axios.request(axiosRequestConfig);
223237

@@ -227,7 +241,17 @@ export class HttpClient {
227241
response.data = JSON.stringify(response.data);
228242
}
229243
}
230-
return { statusCode: response.status, content: response.data };
244+
245+
let contentType: string | undefined = undefined;
246+
if (response.headers.getContentType) {
247+
if (typeof response.headers.getContentType === 'string') {
248+
contentType = response.headers.getContentType;
249+
} else {
250+
contentType = response.headers.getContentType()?.toString() ?? undefined;
251+
}
252+
}
253+
254+
return { statusCode: response.status, content: response.data, contentType };
231255
} catch (axios_error_raw) {
232256
const axiosError = axios_error_raw as AxiosError;
233257
const message: string = axiosError.message || '';

src/index.ts

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -645,6 +645,37 @@ export class Translator {
645645
}
646646
}
647647

648+
/**
649+
* Uploads the HTML of the specified webpage to DeepL to translate into given target language, waits for
650+
* translation to complete, then downloads translated webpage to specified output path.
651+
* @param webpageUrl String or URL containing the URL of the webpage to be translated.
652+
* @param outputFile String containing file path to create translated document, or Stream or
653+
* FileHandle to write translated document content.
654+
* @param sourceLang Language code of input document, or null to use auto-detection.
655+
* @param targetLang Language code of language to translate into.
656+
* @param options Optional DocumentTranslateOptions object containing additional options controlling translation.
657+
* @return Fulfills with a DocumentStatus object for the completed translation. You can use the
658+
* billedCharacters property to check how many characters were billed for the document.
659+
* @throws {Error} If no file exists at the input file path, or a file already exists at the output file path.
660+
* @throws {DocumentTranslationError} If any error occurs during document upload, translation or
661+
* download. The `documentHandle` property of the error may be used to recover the document.
662+
*/
663+
async translateWebpage(
664+
webpageUrl: string | URL,
665+
outputFile: string | fs.WriteStream | fs.promises.FileHandle,
666+
sourceLang: SourceLanguageCode | null,
667+
targetLang: TargetLanguageCode,
668+
options?: DocumentTranslateOptions,
669+
): Promise<DocumentStatus> {
670+
return this.translateDocument(
671+
Buffer.from(await this.getContentFromWebpage(webpageUrl)),
672+
outputFile,
673+
sourceLang,
674+
targetLang,
675+
{ filename: 'webpage.html', ...options },
676+
);
677+
}
678+
648679
/**
649680
* Uploads specified document to DeepL to translate into target language, and returns handle associated with the document.
650681
* @param inputFile String containing file path, stream containing file data, or FileHandle.
@@ -709,6 +740,28 @@ export class Translator {
709740
}
710741
}
711742

743+
/**
744+
* Uploads specified webpage HTML to DeepL to translate into target language, and returns handle associated with the document.
745+
* @param webpageUrl String or URL containing the URL of the webpage to be translated.
746+
* @param sourceLang Language code of input document, or null to use auto-detection.
747+
* @param targetLang Language code of language to translate into.
748+
* @param options Optional DocumentTranslateOptions object containing additional options controlling translation.
749+
* @return Fulfills with DocumentHandle associated with the in-progress translation.
750+
*/
751+
async uploadWebpage(
752+
webpageUrl: string | URL,
753+
sourceLang: SourceLanguageCode | null,
754+
targetLang: TargetLanguageCode,
755+
options?: DocumentTranslateOptions,
756+
): Promise<DocumentHandle> {
757+
return this.uploadDocument(
758+
Buffer.from(await this.getContentFromWebpage(webpageUrl)),
759+
sourceLang,
760+
targetLang,
761+
{ filename: 'webpage.html', ...options },
762+
);
763+
}
764+
712765
/**
713766
* Retrieves the status of the document translation associated with the given document handle.
714767
* @param handle Document handle associated with document.
@@ -1003,6 +1056,18 @@ export class Translator {
10031056
return libraryInfoString;
10041057
}
10051058

1059+
private async getContentFromWebpage(webpageUrl: string | URL): Promise<string> {
1060+
const { statusCode, content, contentType } =
1061+
await this.httpClient.sendRequestWithBackoff<string>('GET', webpageUrl.toString());
1062+
await checkStatusCode(statusCode, content);
1063+
1064+
if (!contentType?.includes('text/html')) {
1065+
throw new Error('URL to translate must return HTML');
1066+
}
1067+
1068+
return content;
1069+
}
1070+
10061071
/**
10071072
* HttpClient implements all HTTP requests and retries.
10081073
* @private

0 commit comments

Comments
 (0)