Skip to content

Commit 6a0fc19

Browse files
committed
refactor(chatbot): integrate OpenAI service into chatbot implementations
- Updated various chatbot services (Typebot, Dify, EvolutionBot, Flowise, N8n) to include the OpenAI service for audio transcription capabilities. - Modified constructors to accept OpenaiService as a dependency, enhancing the ability to transcribe audio messages directly within each service. - Refactored the handling of `keywordFinish` in multiple controllers and services, changing its type from an array to a string for consistency and simplifying logic. - Removed redundant audio transcription logic from the base service, centralizing it within the OpenAI service to improve maintainability and reduce code duplication. This commit focuses on enhancing the chatbot services by integrating OpenAI's transcription capabilities, improving code structure, and ensuring consistent handling of session keywords.
1 parent 9cedf31 commit 6a0fc19

File tree

10 files changed

+124
-90
lines changed

10 files changed

+124
-90
lines changed

src/api/integrations/chatbot/base-chatbot.controller.ts

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ import { ChatbotController, ChatbotControllerInterface, EmitData } from './chatb
1313
// Common settings interface for all chatbot integrations
1414
export interface ChatbotSettings {
1515
expire: number;
16-
keywordFinish: string[];
16+
keywordFinish: string;
1717
delayMessage: number;
1818
unknownMessage: string;
1919
listeningFromMe: boolean;
@@ -344,7 +344,6 @@ export abstract class BaseChatbotController<BotType = any, BotData extends BaseC
344344
const settings = await this.settingsRepository.create({
345345
data: {
346346
...settingsData,
347-
instanceId: instanceId,
348347
Instance: {
349348
connect: {
350349
id: instanceId,
@@ -399,7 +398,7 @@ export abstract class BaseChatbotController<BotType = any, BotData extends BaseC
399398
if (!settings) {
400399
return {
401400
expire: 300,
402-
keywordFinish: ['bye', 'exit', 'quit', 'stop'],
401+
keywordFinish: 'bye',
403402
delayMessage: 1000,
404403
unknownMessage: 'Sorry, I dont understand',
405404
listeningFromMe: true,

src/api/integrations/chatbot/base-chatbot.service.ts

Lines changed: 3 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,9 @@ import { InstanceDto } from '@api/dto/instance.dto';
22
import { PrismaRepository } from '@api/repository/repository.service';
33
import { WAMonitoringService } from '@api/services/monitor.service';
44
import { Integration } from '@api/types/wa.types';
5-
import { ConfigService, Language } from '@config/env.config';
5+
import { ConfigService } from '@config/env.config';
66
import { Logger } from '@config/logger.config';
77
import { IntegrationSession } from '@prisma/client';
8-
import axios from 'axios';
9-
import FormData from 'form-data';
108

119
/**
1210
* Base class for all chatbot service implementations
@@ -73,46 +71,6 @@ export abstract class BaseChatbotService<BotType = any, SettingsType = any> {
7371
return null;
7472
}
7573

76-
/**
77-
* Transcribes audio to text using OpenAI's Whisper API
78-
*/
79-
protected async speechToText(audioBuffer: Buffer): Promise<string | null> {
80-
if (!this.configService) {
81-
this.logger.error('ConfigService not available for speech-to-text transcription');
82-
return null;
83-
}
84-
85-
try {
86-
// Try to get the API key from process.env directly since ConfigService might not access it correctly
87-
const apiKey = this.configService.get<any>('OPENAI')?.API_KEY || process.env.OPENAI_API_KEY;
88-
if (!apiKey) {
89-
this.logger.error('No OpenAI API key set for Whisper transcription');
90-
return null;
91-
}
92-
93-
const lang = this.configService.get<Language>('LANGUAGE').includes('pt')
94-
? 'pt'
95-
: this.configService.get<Language>('LANGUAGE');
96-
97-
const formData = new FormData();
98-
formData.append('file', audioBuffer, 'audio.ogg');
99-
formData.append('model', 'whisper-1');
100-
formData.append('language', lang);
101-
102-
const response = await axios.post('https://api.openai.com/v1/audio/transcriptions', formData, {
103-
headers: {
104-
...formData.getHeaders(),
105-
Authorization: `Bearer ${apiKey}`,
106-
},
107-
});
108-
109-
return response?.data?.text || null;
110-
} catch (err) {
111-
this.logger.error(`Whisper transcription failed: ${err}`);
112-
return null;
113-
}
114-
}
115-
11674
/**
11775
* Create a new chatbot session
11876
*/
@@ -174,12 +132,9 @@ export abstract class BaseChatbotService<BotType = any, SettingsType = any> {
174132
}
175133

176134
// For existing sessions, keywords might indicate the conversation should end
177-
const keywordFinish = (settings as any)?.keywordFinish || [];
135+
const keywordFinish = (settings as any)?.keywordFinish || '';
178136
const normalizedContent = content.toLowerCase().trim();
179-
if (
180-
keywordFinish.length > 0 &&
181-
keywordFinish.some((keyword: string) => normalizedContent === keyword.toLowerCase())
182-
) {
137+
if (keywordFinish.length > 0 && normalizedContent === keywordFinish.toLowerCase()) {
183138
// Update session to closed and return
184139
await this.prismaRepository.integrationSession.update({
185140
where: {

src/api/integrations/chatbot/dify/services/dify.service.ts

Lines changed: 19 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -6,13 +6,21 @@ import { Auth, ConfigService, HttpServer } from '@config/env.config';
66
import { Dify, DifySetting, IntegrationSession } from '@prisma/client';
77
import { sendTelemetry } from '@utils/sendTelemetry';
88
import axios from 'axios';
9-
import { downloadMediaMessage } from 'baileys';
109

1110
import { BaseChatbotService } from '../../base-chatbot.service';
11+
import { OpenaiService } from '../../openai/services/openai.service';
1212

1313
export class DifyService extends BaseChatbotService<Dify, DifySetting> {
14-
constructor(waMonitor: WAMonitoringService, configService: ConfigService, prismaRepository: PrismaRepository) {
14+
private openaiService: OpenaiService;
15+
16+
constructor(
17+
waMonitor: WAMonitoringService,
18+
configService: ConfigService,
19+
prismaRepository: PrismaRepository,
20+
openaiService: OpenaiService,
21+
) {
1522
super(waMonitor, prismaRepository, 'DifyService', configService);
23+
this.openaiService = openaiService;
1624
}
1725

1826
/**
@@ -73,10 +81,9 @@ export class DifyService extends BaseChatbotService<Dify, DifySetting> {
7381
if (this.isAudioMessage(content) && msg) {
7482
try {
7583
this.logger.debug(`[Dify] Downloading audio for Whisper transcription`);
76-
const mediaBuffer = await downloadMediaMessage({ key: msg.key, message: msg.message }, 'buffer', {});
77-
const transcribedText = await this.speechToText(mediaBuffer);
78-
if (transcribedText) {
79-
payload.query = transcribedText;
84+
const transcription = await this.openaiService.speechToText(msg);
85+
if (transcription) {
86+
payload.query = transcription;
8087
} else {
8188
payload.query = '[Audio message could not be transcribed]';
8289
}
@@ -151,10 +158,9 @@ export class DifyService extends BaseChatbotService<Dify, DifySetting> {
151158
if (this.isAudioMessage(content) && msg) {
152159
try {
153160
this.logger.debug(`[Dify] Downloading audio for Whisper transcription`);
154-
const mediaBuffer = await downloadMediaMessage({ key: msg.key, message: msg.message }, 'buffer', {});
155-
const transcribedText = await this.speechToText(mediaBuffer);
156-
if (transcribedText) {
157-
payload.inputs.query = transcribedText;
161+
const transcription = await this.openaiService.speechToText(msg);
162+
if (transcription) {
163+
payload.inputs.query = transcription;
158164
} else {
159165
payload.inputs.query = '[Audio message could not be transcribed]';
160166
}
@@ -229,10 +235,9 @@ export class DifyService extends BaseChatbotService<Dify, DifySetting> {
229235
if (this.isAudioMessage(content) && msg) {
230236
try {
231237
this.logger.debug(`[Dify] Downloading audio for Whisper transcription`);
232-
const mediaBuffer = await downloadMediaMessage({ key: msg.key, message: msg.message }, 'buffer', {});
233-
const transcribedText = await this.speechToText(mediaBuffer);
234-
if (transcribedText) {
235-
payload.query = transcribedText;
238+
const transcription = await this.openaiService.speechToText(msg);
239+
if (transcription) {
240+
payload.query = transcription;
236241
} else {
237242
payload.query = '[Audio message could not be transcribed]';
238243
}

src/api/integrations/chatbot/evolutionBot/services/evolutionBot.service.ts

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,19 @@ import { sendTelemetry } from '@utils/sendTelemetry';
88
import axios from 'axios';
99

1010
import { BaseChatbotService } from '../../base-chatbot.service';
11+
import { OpenaiService } from '../../openai/services/openai.service';
1112

1213
export class EvolutionBotService extends BaseChatbotService<EvolutionBot, EvolutionBotSetting> {
13-
constructor(waMonitor: WAMonitoringService, configService: ConfigService, prismaRepository: PrismaRepository) {
14+
private openaiService: OpenaiService;
15+
16+
constructor(
17+
waMonitor: WAMonitoringService,
18+
configService: ConfigService,
19+
prismaRepository: PrismaRepository,
20+
openaiService: OpenaiService,
21+
) {
1422
super(waMonitor, prismaRepository, 'EvolutionBotService', configService);
23+
this.openaiService = openaiService;
1524
}
1625

1726
/**
@@ -50,6 +59,21 @@ export class EvolutionBotService extends BaseChatbotService<EvolutionBot, Evolut
5059
user: remoteJid,
5160
};
5261

62+
if (this.isAudioMessage(content) && msg) {
63+
try {
64+
this.logger.debug(`[EvolutionBot] Downloading audio for Whisper transcription`);
65+
const transcription = await this.openaiService.speechToText(msg);
66+
if (transcription) {
67+
payload.query = transcription;
68+
} else {
69+
payload.query = '[Audio message could not be transcribed]';
70+
}
71+
} catch (err) {
72+
this.logger.error(`[EvolutionBot] Failed to transcribe audio: ${err}`);
73+
payload.query = '[Audio message could not be transcribed]';
74+
}
75+
}
76+
5377
if (this.isImageMessage(content)) {
5478
const contentSplit = content.split('|');
5579

src/api/integrations/chatbot/flowise/services/flowise.service.ts

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,18 @@ import { sendTelemetry } from '@utils/sendTelemetry';
88
import axios from 'axios';
99

1010
import { BaseChatbotService } from '../../base-chatbot.service';
11+
import { OpenaiService } from '../../openai/services/openai.service';
1112

1213
export class FlowiseService extends BaseChatbotService<Flowise, FlowiseSetting> {
13-
constructor(waMonitor: WAMonitoringService, configService: ConfigService, prismaRepository: PrismaRepository) {
14+
private openaiService: OpenaiService;
15+
constructor(
16+
waMonitor: WAMonitoringService,
17+
configService: ConfigService,
18+
prismaRepository: PrismaRepository,
19+
openaiService: OpenaiService,
20+
) {
1421
super(waMonitor, prismaRepository, 'FlowiseService', configService);
22+
this.openaiService = openaiService;
1523
}
1624

1725
/**
@@ -49,6 +57,21 @@ export class FlowiseService extends BaseChatbotService<Flowise, FlowiseSetting>
4957
},
5058
};
5159

60+
if (this.isAudioMessage(content) && msg) {
61+
try {
62+
this.logger.debug(`[EvolutionBot] Downloading audio for Whisper transcription`);
63+
const transcription = await this.openaiService.speechToText(msg);
64+
if (transcription) {
65+
payload.query = transcription;
66+
} else {
67+
payload.query = '[Audio message could not be transcribed]';
68+
}
69+
} catch (err) {
70+
this.logger.error(`[EvolutionBot] Failed to transcribe audio: ${err}`);
71+
payload.query = '[Audio message could not be transcribed]';
72+
}
73+
}
74+
5275
if (this.isImageMessage(content)) {
5376
const contentSplit = content.split('|');
5477

src/api/integrations/chatbot/n8n/services/n8n.service.ts

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -5,14 +5,21 @@ import { Auth, ConfigService, HttpServer } from '@config/env.config';
55
import { IntegrationSession, N8n, N8nSetting } from '@prisma/client';
66
import { sendTelemetry } from '@utils/sendTelemetry';
77
import axios from 'axios';
8-
import { downloadMediaMessage } from 'baileys';
98

109
import { BaseChatbotService } from '../../base-chatbot.service';
10+
import { OpenaiService } from '../../openai/services/openai.service';
1111
import { N8nDto } from '../dto/n8n.dto';
12-
1312
export class N8nService extends BaseChatbotService<N8n, N8nSetting> {
14-
constructor(waMonitor: WAMonitoringService, prismaRepository: PrismaRepository, configService: ConfigService) {
13+
private openaiService: OpenaiService;
14+
15+
constructor(
16+
waMonitor: WAMonitoringService,
17+
prismaRepository: PrismaRepository,
18+
configService: ConfigService,
19+
openaiService: OpenaiService,
20+
) {
1521
super(waMonitor, prismaRepository, 'N8nService', configService);
22+
this.openaiService = openaiService;
1623
}
1724

1825
/**
@@ -135,10 +142,9 @@ export class N8nService extends BaseChatbotService<N8n, N8nSetting> {
135142
if (this.isAudioMessage(content) && msg) {
136143
try {
137144
this.logger.debug(`[N8n] Downloading audio for Whisper transcription`);
138-
const mediaBuffer = await downloadMediaMessage({ key: msg.key, message: msg.message }, 'buffer', {});
139-
const transcribedText = await this.speechToText(mediaBuffer);
140-
if (transcribedText) {
141-
payload.chatInput = transcribedText;
145+
const transcription = await this.openaiService.speechToText(msg);
146+
if (transcription) {
147+
payload.chatInput = transcription;
142148
} else {
143149
payload.chatInput = '[Audio message could not be transcribed]';
144150
}

src/api/integrations/chatbot/openai/controllers/openai.controller.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -176,7 +176,7 @@ export class OpenaiController extends BaseChatbotController<OpenaiBot, OpenaiDto
176176
await this.settings(instance, {
177177
openaiCredsId: data.openaiCredsId,
178178
expire: data.expire || 300,
179-
keywordFinish: data.keywordFinish || 'bye,exit,quit,stop',
179+
keywordFinish: data.keywordFinish || 'bye',
180180
delayMessage: data.delayMessage || 1000,
181181
unknownMessage: data.unknownMessage || 'Sorry, I dont understand',
182182
listeningFromMe: data.listeningFromMe !== undefined ? data.listeningFromMe : true,
@@ -385,7 +385,7 @@ export class OpenaiController extends BaseChatbotController<OpenaiBot, OpenaiDto
385385
});
386386

387387
// Convert keywordFinish to string if it's an array
388-
const keywordFinish = Array.isArray(data.keywordFinish) ? data.keywordFinish.join(',') : data.keywordFinish;
388+
const keywordFinish = data.keywordFinish;
389389

390390
// Additional OpenAI-specific fields
391391
const settingsData = {

src/api/integrations/chatbot/openai/services/openai.service.ts

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -114,12 +114,9 @@ export class OpenaiService extends BaseChatbotService<OpenaiBot, OpenaiSetting>
114114
}
115115

116116
// Handle keyword finish
117-
const keywordFinish = settings?.keywordFinish?.split(',') || [];
117+
const keywordFinish = settings?.keywordFinish || '';
118118
const normalizedContent = content.toLowerCase().trim();
119-
if (
120-
keywordFinish.length > 0 &&
121-
keywordFinish.some((keyword: string) => normalizedContent === keyword.toLowerCase().trim())
122-
) {
119+
if (keywordFinish.length > 0 && normalizedContent === keywordFinish.toLowerCase()) {
123120
if (settings?.keepOpen) {
124121
await this.prismaRepository.integrationSession.update({
125122
where: {

src/api/integrations/chatbot/typebot/services/typebot.service.ts

Lines changed: 26 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,10 +6,19 @@ import { sendTelemetry } from '@utils/sendTelemetry';
66
import axios from 'axios';
77

88
import { BaseChatbotService } from '../../base-chatbot.service';
9+
import { OpenaiService } from '../../openai/services/openai.service';
910

1011
export class TypebotService extends BaseChatbotService<TypebotModel, any> {
11-
constructor(waMonitor: WAMonitoringService, configService: ConfigService, prismaRepository: PrismaRepository) {
12+
private openaiService: OpenaiService;
13+
14+
constructor(
15+
waMonitor: WAMonitoringService,
16+
configService: ConfigService,
17+
prismaRepository: PrismaRepository,
18+
openaiService: OpenaiService,
19+
) {
1220
super(waMonitor, prismaRepository, 'TypebotService', configService);
21+
this.openaiService = openaiService;
1322
}
1423

1524
/**
@@ -58,7 +67,7 @@ export class TypebotService extends BaseChatbotService<TypebotModel, any> {
5867
// Continue an existing chat
5968
const version = this.configService?.get<Typebot>('TYPEBOT').API_VERSION;
6069
let url: string;
61-
let reqData: {};
70+
let reqData: any;
6271

6372
if (version === 'latest') {
6473
url = `${bot.url}/api/v1/sessions/${session.sessionId.split('-')[1]}/continueChat`;
@@ -71,6 +80,21 @@ export class TypebotService extends BaseChatbotService<TypebotModel, any> {
7180
};
7281
}
7382

83+
if (this.isAudioMessage(content) && msg) {
84+
try {
85+
this.logger.debug(`[EvolutionBot] Downloading audio for Whisper transcription`);
86+
const transcription = await this.openaiService.speechToText(msg);
87+
if (transcription) {
88+
reqData.message = transcription;
89+
} else {
90+
reqData.message = '[Audio message could not be transcribed]';
91+
}
92+
} catch (err) {
93+
this.logger.error(`[EvolutionBot] Failed to transcribe audio: ${err}`);
94+
reqData.message = '[Audio message could not be transcribed]';
95+
}
96+
}
97+
7498
const response = await axios.post(url, reqData);
7599

76100
// Process the response and send the messages to WhatsApp

0 commit comments

Comments
 (0)