Skip to content

Commit 171da0f

Browse files
committed
added path-using-slug, added the astro stripped header as a JSON Object in the options column
1 parent e2579a5 commit 171da0f

File tree

3 files changed

+174
-21
lines changed

3 files changed

+174
-21
lines changed

README.md

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ jobs:
3939
uses: actions/checkout@v4
4040

4141
- name: Build SQLite Cloud database
42-
uses: sqlitecloud/docsearch-action@v2
42+
uses: sqlitecloud/docsearch-action@v3
4343
with:
4444
project-string: ${{ secrets.PROJECT_STRING }}
4545
base-url: https://your-website.com/docs/
@@ -56,7 +56,8 @@ jobs:
5656
* Set the `strip-html` input to `true` if you want to remove HTML elements.
5757
* Set the `strip-jsx` input to `true` if you want to remove JSX elements.
5858
* Set the `strip-md-titles` input to `true` if you want to remove markdown titles to avoid redundancy in the search.
59-
* Set the `strip-astro-header` input to `true` if you want to remove the Astro header from every file.
59+
* Set the `strip-astro-header` input to `true` if you want to remove the Astro header from every file and put it in the `documentation` table as a JSON Object.
60+
* Set the `path-using-slug` input to `true` if you want to use the slug in the header as the path instead of the relative one for the URL.
6061
7. Commit and push the workflow file to your repository.
6162

6263

action.yml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,10 @@ inputs:
3232
description: If you use Astro and you want to remove its docs header make this true.
3333
required: false
3434
default: false
35+
path-using-slug:
36+
description: Use the slug in the header as the path instead of the relative one.
37+
required: false
38+
default: false
3539

3640
branding:
3741
icon: "search"
@@ -59,6 +63,7 @@ runs:
5963
[[ ${{ inputs.strip-jsx }} == true ]] && args+=" --strip-jsx"
6064
[[ ${{ inputs.strip-md-titles }} == true ]] && args+=" --strip-md-titles"
6165
[[ ${{ inputs.strip-astro-header }} == true ]] && args+=" --strip-astro-header"
66+
[[ ${{ inputs.path-using-slug }} == true ]] && args+=" --path-using-slug"
6267
echo $(main --input=${{ inputs.path }} --output=search.sql --base-url=${{ inputs.base-url }} $args)
6368
shell: bash
6469

src/main.c

Lines changed: 166 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,8 @@
3535
#define SKIP_UNTIL(_c) do{i++;} while(PEEK == _c)
3636
#define RESET_SKIP() do {toskip = NO_SKIP; nskip = 1;} while(0)
3737

38+
#define OPTIONS_COL json_mode && strip_astro_header
39+
3840
#if GENERATE_SQLITE_DATABASE
3941
sqlite3 *db = NULL;
4042
#endif
@@ -49,6 +51,7 @@ bool strip_md_title = false;
4951
bool strip_astro_header = false;
5052
bool use_transaction = false;
5153
bool json_mode = false;
54+
bool path_using_slug = false;
5255
bool use_database = false;
5356
bool create_db = false;
5457

@@ -191,17 +194,30 @@ static bool check_line (const char *current, const char *begin_with, const char
191194
return ((found1 != NULL) && (found2 != NULL));
192195
}
193196

194-
static char *process_md (const char *input, size_t *len) {
195-
char *buffer = (char *)malloc(*len);
196-
if (!buffer) {
197-
printf("Not enough memory to allocate %zu bytes.", *len);
198-
exit(-3);
197+
static char *match_copy(const char *str, const char match) {
198+
const char *pos = strchr(str, match);
199+
200+
if (pos != NULL) {
201+
size_t length = pos - str;
202+
203+
char *cp_str = (char *)malloc(length + 1);
204+
if (cp_str == NULL) return NULL;
205+
206+
strncpy(cp_str, str, length);
207+
cp_str[length] = '\0';
208+
209+
return cp_str;
199210
}
200211

212+
return NULL;
213+
}
214+
215+
static char *process_md (const char *input, char *buffer, size_t *len, char *astro_header, size_t *header_len) {
216+
201217
bool is_code = false;
202218
int toskip = NO_SKIP;
203219
int nskip = 1;
204-
int i = 0, j = 0;
220+
int i = 0, j = 0, h = 0, slug_index = 6; // 6 is the length of the string "slug: "
205221

206222
while (input[i]) {
207223
int c = NEXT;
@@ -216,6 +232,9 @@ static char *process_md (const char *input, size_t *len) {
216232
NEXT; // \n
217233
RESET_SKIP();
218234
}
235+
} else if(toskip == '-' && nskip == 3) {
236+
if(path_using_slug && c == '\n' && PEEK == 's' && PEEK2 == 'l' && check_line(&input[i-1], "\nslug: ", "\n")) slug_index += i;
237+
astro_header[h++] = c;
219238
}
220239
continue;
221240
}
@@ -337,7 +356,93 @@ static char *process_md (const char *input, size_t *len) {
337356

338357
*len = j;
339358
buffer[j] = 0;
340-
return buffer;
359+
*header_len = h;
360+
astro_header[h] = 0;
361+
if(slug_index == 6) return NULL; // no slug found
362+
return match_copy(&input[slug_index], '\n');
363+
}
364+
365+
static void process_json (char *input, size_t *header_len) {
366+
367+
*header_len = *header_len * 2;
368+
char *astro_header = (char *)malloc(*header_len);
369+
if (!astro_header) {
370+
printf("Not enough memory to allocate %zu bytes.", *header_len);
371+
exit(-3);
372+
}
373+
374+
int i = 0, j = 0, quotes = 0;
375+
376+
astro_header[j++] = '\n';
377+
astro_header[j++] = '{';
378+
astro_header[j++] = '\n';
379+
astro_header[j++] = '\\';
380+
astro_header[j++] = '\"';
381+
382+
while (input[i]) {
383+
int c = NEXT;
384+
385+
switch (c) {
386+
case ':': {
387+
if(PEEK == ' ' && quotes == 0){
388+
astro_header[j++] = '\\';
389+
astro_header[j++] = '\"';
390+
astro_header[j++] = ':';
391+
astro_header[j++] = ' ';
392+
NEXT; //skip the space
393+
astro_header[j++] = '\\';
394+
astro_header[j++] = '\"';
395+
continue;
396+
}
397+
break;
398+
}
399+
case '\n': {
400+
if(PEEK && PEEK != ' ' && PEEK != '\n' && i > 1){
401+
astro_header[j++] = '\\';
402+
astro_header[j++] = '\"';
403+
astro_header[j++] = ',';
404+
astro_header[j++] = '\n';
405+
astro_header[j++] = '\\';
406+
astro_header[j++] = '\"';
407+
}
408+
quotes = 0;
409+
continue;
410+
}
411+
case '\\': {
412+
astro_header[j++] = '\\';
413+
break;
414+
}
415+
case '\'': {
416+
quotes++;
417+
astro_header[j++] = c;
418+
break;
419+
}
420+
case '\"': {
421+
quotes++;
422+
continue;
423+
}
424+
case '[':
425+
case ']':
426+
case '\t': {
427+
// skip character
428+
continue;
429+
}
430+
}
431+
432+
// copy character as-is
433+
astro_header[j++] = c;
434+
}
435+
436+
astro_header[j++] = '\\';
437+
astro_header[j++] = '\"';
438+
astro_header[j++] = '\n';
439+
astro_header[j++] = '}';
440+
astro_header[j++] = '\n';
441+
astro_header[j] = 0;
442+
*header_len = j;
443+
444+
strcpy(input, astro_header);
445+
free(astro_header);
341446
}
342447

343448
// MARK: -
@@ -394,7 +499,11 @@ static void create_file (const char *path) {
394499
}
395500

396501
write_line("DROP TABLE IF EXISTS documentation;", -1, 1);
397-
write_line("CREATE VIRTUAL TABLE IF NOT EXISTS documentation USING fts5 (url, content);", -1, 1);
502+
if(OPTIONS_COL){
503+
write_line("CREATE VIRTUAL TABLE IF NOT EXISTS documentation USING fts5 (url, content, options);", -1, 1);
504+
} else {
505+
write_line("CREATE VIRTUAL TABLE IF NOT EXISTS documentation USING fts5 (url, content);", -1, 1);
506+
}
398507
}
399508

400509
static void create_output (const char *path) {
@@ -443,29 +552,39 @@ static void add_database_entry(const char *url, char *buffer, size_t size) {
443552
}
444553
#endif
445554

446-
static void add_file_entry(const char *url, char *buffer, size_t bsize) {
555+
static void add_file_entry(const char *url, char *buffer, size_t bsize, char *astro_header, size_t header_size) {
447556
if (bsize == -1) bsize = strlen(buffer);
557+
if (header_size == -1) header_size = strlen(astro_header);
448558

449559
size_t url_size = strlen(url);
450560

451-
size_t blen = url_size + bsize + 1024;
561+
size_t blen;
562+
if(OPTIONS_COL){
563+
blen = url_size + bsize + header_size + 1024;
564+
} else {
565+
blen = url_size + bsize + 1024;
566+
}
452567
char *b = malloc (blen);
453568
if (!b) {
454569
exit(-11);
455570
}
456571

457-
// INSERT INTO documentation (url, content) VALUES (?1, ?2);
458-
size_t nwrote = snprintf(b, blen, "INSERT INTO documentation (url, content) VALUES ('%s', '%s');", url, buffer);
572+
size_t nwrote;
573+
if(OPTIONS_COL){
574+
nwrote = snprintf(b, blen, "INSERT INTO documentation (url, content, options) VALUES ('%s', '%s', json('%s'));", url, buffer, astro_header);
575+
} else {
576+
nwrote = snprintf(b, blen, "INSERT INTO documentation (url, content) VALUES ('%s', '%s');", url, buffer);
577+
}
459578
write_line(b, nwrote, 1);
460579

461580
free(b);
462581
}
463582

464-
static void add_entry(const char *url, char *buffer, size_t size) {
583+
static void add_entry(const char *url, char *buffer, size_t size, char *astro_header, size_t header_size) {
465584
#if GENERATE_SQLITE_DATABASE
466585
add_database_entry(url, buffer, size);
467586
#else
468-
add_file_entry(url, buffer, size);
587+
add_file_entry(url, buffer, size, astro_header, header_size);
469588
#endif
470589
}
471590

@@ -486,16 +605,33 @@ static void scan_docs (const char *base_url, const char *dir_path) {
486605
// test only files with a .md or mdx extension
487606
if ((strstr(full_path, ".md") == NULL) && (strstr(full_path, ".mdx") == NULL)) continue;
488607

489-
// build url and title
490-
const char *url = file_buildurl(base_url, full_path);
491-
492608
// load md source code
493609
size_t size = 0;
494610
char *source_code = file_read(full_path, &size);
495611

496-
char *buffer = process_md(source_code, &size);
612+
size_t header_size = size;
613+
char *buffer = (char *)malloc(size);
614+
char *astro_header = (char *)malloc(header_size);
615+
if (!buffer || !astro_header) {
616+
printf("Not enough memory to allocate %zu bytes.", size > header_size ? size : header_size);
617+
exit(-3);
618+
}
619+
620+
const char *slug_path = process_md(source_code, buffer, &size, astro_header, &header_size);
621+
622+
if(OPTIONS_COL) process_json(astro_header, &header_size);
623+
624+
// build url and title
625+
char *url;
626+
if(path_using_slug && slug_path != NULL){
627+
url = malloc(strlen(base_url) + strlen(slug_path) + 1);
628+
strcpy(url, base_url);
629+
strcat(url, slug_path);
630+
} else {
631+
url = file_buildurl(base_url, full_path);
632+
}
497633

498-
add_entry(url, buffer, size);
634+
add_entry(url, buffer, size, astro_header, header_size);
499635

500636
//DEBUG
501637
//printf("title: %s\n", title);
@@ -507,8 +643,10 @@ static void scan_docs (const char *base_url, const char *dir_path) {
507643

508644
free((void *)url);
509645
free((void *)full_path);
646+
free((void *)slug_path);
510647
free(source_code);
511648
free(buffer);
649+
free(astro_header);
512650
}
513651
}
514652

@@ -596,6 +734,14 @@ int main (int argc, char * argv[]) {
596734
.value_name = NULL,
597735
.description = "Use transactions"
598736
},
737+
738+
{
739+
.identifier = 'g',
740+
.access_letters = "g",
741+
.access_name = "path-using-slug",
742+
.value_name = NULL,
743+
.description = "Use the slug in the header as the path instead of the relative one"
744+
},
599745

600746
{
601747
.identifier = 's',
@@ -630,6 +776,7 @@ int main (int argc, char * argv[]) {
630776
case 't': use_transaction = true; break;
631777
case 'u': use_database = true; break;
632778
case 's': json_mode = true; break;
779+
case 'g': path_using_slug = true; break;
633780
case 'c': create_db = true; break;
634781

635782
case 'h':

0 commit comments

Comments
 (0)