Skip to content

Commit de58a58

Browse files
authored
fix: try to update citations starting points when a code block is present (#731)
* fix: try to update citations start and end points when there is a code block present * fix: decoding data before to generate file * fix: multiple blocks with multi citations
1 parent d8c73f9 commit de58a58

File tree

3 files changed

+271
-21
lines changed

3 files changed

+271
-21
lines changed

src/interfaces/assistants_web/src/stores/slices/citationsSlice.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import { StateCreator } from 'zustand';
22

33
import { Document } from '@/cohere-client';
4-
import { mapExtensionToMimeType } from '@/utils';
4+
import { decodeBase64, mapExtensionToMimeType } from '@/utils';
55

66
import { StoreState } from '..';
77

@@ -87,7 +87,7 @@ export const createCitationsSlice: StateCreator<StoreState, [], [], CitationsSto
8787
if (file.downloadUrl) {
8888
continue;
8989
}
90-
const data = file.data;
90+
const data = decodeBase64(file.data);
9191
const fileExtension = file.name.split('.').pop() || '.txt';
9292
const mimeType = mapExtensionToMimeType(fileExtension);
9393
const blob = new Blob([data], { type: mimeType });

src/interfaces/assistants_web/src/utils/citations.test.ts

Lines changed: 195 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@ import { describe, expect, test } from 'vitest';
22

33
import { Citation } from '@/cohere-client';
44
import {
5+
CODE_BLOCK_REGEX_EXP,
6+
IFRAME_REGEX_EXP,
57
fixInlineCitationsForMarkdown,
68
isReferenceBetweenSpecialTags,
79
replaceTextWithCitations,
@@ -214,55 +216,240 @@ describe('fixInlineCitationsForMarkdown', () => {
214216
},
215217
]);
216218
});
219+
220+
test('should fix citations position for to get the correct markdown', () => {
221+
const citations = [
222+
{
223+
text: '(fn',
224+
start: 71,
225+
end: 74,
226+
document_ids: ['12345'],
227+
},
228+
{
229+
text: 'delay',
230+
start: 76,
231+
end: 81,
232+
document_ids: ['12345'],
233+
},
234+
{
235+
text: 'let debounceTimer',
236+
start: 86,
237+
end: 103,
238+
document_ids: ['12345'],
239+
},
240+
{
241+
text: 'return function',
242+
start: 107,
243+
end: 122,
244+
document_ids: ['12345'],
245+
},
246+
{
247+
text: 'args',
248+
start: 126,
249+
end: 130,
250+
document_ids: ['12345'],
251+
},
252+
{
253+
text: 'clearTimeout',
254+
start: 135,
255+
end: 147,
256+
document_ids: ['12345'],
257+
},
258+
{
259+
text: '(debounceTimer',
260+
start: 147,
261+
end: 161,
262+
document_ids: ['12345'],
263+
},
264+
{
265+
text: 'debounceTimer = setTimeout',
266+
start: 165,
267+
end: 191,
268+
document_ids: ['12345'],
269+
},
270+
{
271+
text: 'fn.apply(this, args',
272+
start: 201,
273+
end: 220,
274+
document_ids: ['12345'],
275+
},
276+
{
277+
text: 'delay',
278+
start: 227,
279+
end: 232,
280+
document_ids: ['12345'],
281+
},
282+
{
283+
text: 'code is only executed once per user input',
284+
start: 285,
285+
end: 326,
286+
document_ids: ['12345'],
287+
},
288+
{
289+
text: 'takes two parameters',
290+
start: 331,
291+
end: 351,
292+
document_ids: ['12345'],
293+
},
294+
{
295+
text: 'the function to be debounced',
296+
start: 353,
297+
end: 381,
298+
document_ids: ['12345'],
299+
},
300+
{
301+
text: 'the delay in milliseconds',
302+
start: 386,
303+
end: 411,
304+
document_ids: ['12345'],
305+
},
306+
];
307+
const text =
308+
"Here's a JavaScript debounce function: ```javascript function debounce(fn, delay) { let debounceTimer; return function(...args) { clearTimeout(debounceTimer); debounceTimer = setTimeout(() => { fn.apply(this, args); }, delay); }; } ``` The debounce function ensures that the code is only executed once per user input. It takes two parameters: the function to be debounced and the delay in milliseconds.";
309+
const result = fixInlineCitationsForMarkdown(citations, text);
310+
expect(result).toStrictEqual([
311+
{
312+
text: '(fn',
313+
start: 71,
314+
end: 74,
315+
document_ids: ['12345'],
316+
},
317+
{
318+
text: 'delay',
319+
start: 76,
320+
end: 81,
321+
document_ids: ['12345'],
322+
},
323+
{
324+
text: 'let debounceTimer',
325+
start: 86,
326+
end: 103,
327+
document_ids: ['12345'],
328+
},
329+
{
330+
text: 'return function',
331+
start: 107,
332+
end: 122,
333+
document_ids: ['12345'],
334+
},
335+
{
336+
text: 'args',
337+
start: 126,
338+
end: 130,
339+
document_ids: ['12345'],
340+
},
341+
{
342+
text: 'clearTimeout',
343+
start: 135,
344+
end: 147,
345+
document_ids: ['12345'],
346+
},
347+
{
348+
text: '(debounceTimer',
349+
start: 147,
350+
end: 161,
351+
document_ids: ['12345'],
352+
},
353+
{
354+
text: 'debounceTimer = setTimeout',
355+
start: 165,
356+
end: 191,
357+
document_ids: ['12345'],
358+
},
359+
{
360+
text: 'fn.apply(this, args',
361+
start: 201,
362+
end: 220,
363+
document_ids: ['12345'],
364+
},
365+
{
366+
text: 'delay',
367+
start: 227,
368+
end: 232,
369+
document_ids: ['12345'],
370+
},
371+
{
372+
text: 'code is only executed once per user input',
373+
start: 301,
374+
end: 342,
375+
document_ids: ['12345'],
376+
},
377+
{
378+
text: 'takes two parameters',
379+
start: 347,
380+
end: 367,
381+
document_ids: ['12345'],
382+
},
383+
{
384+
text: 'the function to be debounced',
385+
start: 369,
386+
end: 397,
387+
document_ids: ['12345'],
388+
},
389+
{
390+
text: 'the delay in milliseconds',
391+
start: 402,
392+
end: 427,
393+
document_ids: ['12345'],
394+
},
395+
]);
396+
});
217397
});
218398

219399
describe('isReferenceBetweenSpecialTags', () => {
220400
test('should return true if the citation is between <iframe> tags', () => {
221-
const matchRegex = /<iframe>.*<\/iframe>/;
222401
const text = '<iframe> This is a citation </iframe>';
223402
const citation: Citation = {
224403
start: 19,
225404
end: 27,
226405
text: 'citation',
227406
document_ids: ['12345'],
228407
};
229-
const result = isReferenceBetweenSpecialTags(matchRegex, text, citation.start);
408+
const result = isReferenceBetweenSpecialTags(IFRAME_REGEX_EXP, text, citation.start);
230409
expect(result).toBe(true);
231410
});
232411
test('should return false if the citation is not between <iframe> tags', () => {
233-
const matchRegex = /<iframe>.*<\/iframe>/;
234412
const text = 'This is a citation <iframe> another test citaiton </iframe>';
235413
const citation: Citation = {
236414
start: 10,
237415
end: 18,
238416
text: 'citation',
239417
document_ids: ['12345'],
240418
};
241-
const result = isReferenceBetweenSpecialTags(matchRegex, text, citation.start);
419+
const result = isReferenceBetweenSpecialTags(IFRAME_REGEX_EXP, text, citation.start);
242420
expect(result).toBe(false);
243421
});
244422
test('should return true if the citation is between ``` tags', () => {
245-
const matchRegex = /```[\s\S]*?```/g;
246423
const text = '``` This is a citation ```';
247424
const citation: Citation = {
248425
start: 14,
249426
end: 22,
250427
text: 'citation',
251428
document_ids: ['12345'],
252429
};
253-
const result = isReferenceBetweenSpecialTags(matchRegex, text, citation.start);
430+
const result = isReferenceBetweenSpecialTags(CODE_BLOCK_REGEX_EXP, text, citation.start);
254431
expect(result).toBe(true);
255432
});
256433
test('should return false if the citation is not between ``` tags', () => {
257-
const matchRegex = /```[\s\S]*?```/g;
258434
const text = '``` This is a citation ``` another test citaiton';
259435
const citation: Citation = {
260436
start: 40,
261437
end: 48,
262438
text: 'citation',
263439
document_ids: ['12345'],
264440
};
265-
const result = isReferenceBetweenSpecialTags(matchRegex, text, citation.start);
441+
const result = isReferenceBetweenSpecialTags(CODE_BLOCK_REGEX_EXP, text, citation.start);
266442
expect(result).toBe(false);
267443
});
444+
test('should be able to check if there are more than one match', () => {
445+
const text = '``` This is a citation ``` another test citaiton ``` final citation ``` ';
446+
const citation: Citation = {
447+
start: 59,
448+
end: 67,
449+
text: 'citation',
450+
document_ids: ['12345'],
451+
};
452+
const result = isReferenceBetweenSpecialTags(CODE_BLOCK_REGEX_EXP, text, citation.start);
453+
expect(result).toBe(true);
454+
});
268455
});

0 commit comments

Comments
 (0)