Commit ec6b164
committed
feat: refactoring of the tokenization function
1 parent 4ab26a2 commit ec6b164
File tree
4 files changed
+53
-13
lines changed- scrapegraphai/utils
- tokenizers
4 files changed
+53
-13
lines changed| Original file line number | Diff line number | Diff line change | |
|---|---|---|---|
| |||
71 | 71 | | |
72 | 72 | | |
73 | 73 | | |
| 74 | + | |
74 | 75 | | |
75 | 76 | | |
76 | 77 | | |
| |||
87 | 88 | | |
88 | 89 | | |
89 | 90 | | |
| 91 | + | |
90 | 92 | | |
91 | 93 | | |
92 | 94 | | |
| |||
152 | 154 | | |
153 | 155 | | |
154 | 156 | | |
| 157 | + | |
155 | 158 | | |
156 | 159 | | |
157 | 160 | | |
| |||
235 | 238 | | |
236 | 239 | | |
237 | 240 | | |
| 241 | + | |
| 242 | + | |
238 | 243 | | |
239 | 244 | | |
240 | 245 | | |
| 246 | + | |
| 247 | + | |
241 | 248 | | |
242 | 249 | | |
243 | 250 | | |
| |||
254 | 261 | | |
255 | 262 | | |
256 | 263 | | |
| 264 | + | |
257 | 265 | | |
258 | 266 | | |
259 | 267 | | |
| |||
271 | 279 | | |
272 | 280 | | |
273 | 281 | | |
| 282 | + | |
274 | 283 | | |
275 | 284 | | |
276 | 285 | | |
| |||
320 | 329 | | |
321 | 330 | | |
322 | 331 | | |
| 332 | + | |
323 | 333 | | |
324 | 334 | | |
325 | 335 | | |
| |||
342 | 352 | | |
343 | 353 | | |
344 | 354 | | |
| 355 | + | |
345 | 356 | | |
346 | 357 | | |
347 | 358 | | |
348 | 359 | | |
349 | 360 | | |
| 361 | + | |
350 | 362 | | |
351 | 363 | | |
352 | 364 | | |
| |||
358 | 370 | | |
359 | 371 | | |
360 | 372 | | |
| 373 | + | |
361 | 374 | | |
362 | 375 | | |
363 | 376 | | |
| |||
367 | 380 | | |
368 | 381 | | |
369 | 382 | | |
| 383 | + | |
| 384 | + | |
| 385 | + | |
| 386 | + | |
370 | 387 | | |
371 | 388 | | |
372 | 389 | | |
| |||
416 | 433 | | |
417 | 434 | | |
418 | 435 | | |
| 436 | + | |
419 | 437 | | |
420 | 438 | | |
421 | 439 | | |
| |||
428 | 446 | | |
429 | 447 | | |
430 | 448 | | |
| 449 | + | |
431 | 450 | | |
432 | 451 | | |
| 452 | + | |
| 453 | + | |
| 454 | + | |
| 455 | + | |
433 | 456 | | |
434 | 457 | | |
435 | 458 | | |
| |||
| Original file line number | Diff line number | Diff line change | |
|---|---|---|---|
| |||
41 | 41 | | |
42 | 42 | | |
43 | 43 | | |
| 44 | + | |
| 45 | + | |
44 | 46 | | |
45 | 47 | | |
46 | 48 | | |
| |||
49 | 51 | | |
50 | 52 | | |
51 | 53 | | |
| 54 | + | |
52 | 55 | | |
53 | 56 | | |
54 | 57 | | |
| |||
103 | 106 | | |
104 | 107 | | |
105 | 108 | | |
| 109 | + | |
106 | 110 | | |
107 | 111 | | |
108 | 112 | | |
| |||
153 | 157 | | |
154 | 158 | | |
155 | 159 | | |
| 160 | + | |
| 161 | + | |
156 | 162 | | |
157 | 163 | | |
158 | 164 | | |
| 165 | + | |
| 166 | + | |
159 | 167 | | |
160 | 168 | | |
161 | 169 | | |
| |||
164 | 172 | | |
165 | 173 | | |
166 | 174 | | |
| 175 | + | |
167 | 176 | | |
168 | 177 | | |
169 | 178 | | |
| |||
175 | 184 | | |
176 | 185 | | |
177 | 186 | | |
| 187 | + | |
178 | 188 | | |
179 | 189 | | |
180 | 190 | | |
| |||
205 | 215 | | |
206 | 216 | | |
207 | 217 | | |
| 218 | + | |
| 219 | + | |
208 | 220 | | |
209 | 221 | | |
210 | 222 | | |
| |||
219 | 231 | | |
220 | 232 | | |
221 | 233 | | |
| 234 | + | |
222 | 235 | | |
223 | 236 | | |
| 237 | + | |
224 | 238 | | |
225 | 239 | | |
226 | 240 | | |
| |||
229 | 243 | | |
230 | 244 | | |
231 | 245 | | |
| 246 | + | |
232 | 247 | | |
233 | 248 | | |
234 | 249 | | |
235 | 250 | | |
| 251 | + | |
| 252 | + | |
| 253 | + | |
| 254 | + | |
236 | 255 | | |
237 | 256 | | |
238 | 257 | | |
| |||
253 | 272 | | |
254 | 273 | | |
255 | 274 | | |
| 275 | + | |
256 | 276 | | |
257 | 277 | | |
258 | 278 | | |
| 279 | + | |
259 | 280 | | |
260 | 281 | | |
| 282 | + | |
| 283 | + | |
| 284 | + | |
| 285 | + | |
261 | 286 | | |
262 | 287 | | |
263 | 288 | | |
| |||
| Original file line number | Diff line number | Diff line change | |
|---|---|---|---|
| |||
23 | 23 | | |
24 | 24 | | |
25 | 25 | | |
26 | | - | |
27 | | - | |
| 26 | + | |
| 27 | + | |
| 28 | + | |
28 | 29 | | |
29 | 30 | | |
| Original file line number | Diff line number | Diff line change | |
|---|---|---|---|
| |||
21 | 21 | | |
22 | 22 | | |
23 | 23 | | |
24 | | - | |
25 | | - | |
26 | | - | |
27 | | - | |
28 | | - | |
29 | 24 | | |
30 | | - | |
31 | | - | |
32 | | - | |
33 | | - | |
34 | | - | |
35 | | - | |
| 25 | + | |
| 26 | + | |
36 | 27 | | |
37 | 28 | | |
0 commit comments