Skip to content

Commit af33899

Browse files
committed
fbdoc: move load index code from getindex tool to CWikiConUrl.LoadIndex()
1 parent d4e6b51 commit af33899

File tree

2 files changed

+163
-156
lines changed

2 files changed

+163
-156
lines changed

doc/fbchkdoc/getindex.bas

Lines changed: 5 additions & 155 deletions
Original file line numberDiff line numberDiff line change
@@ -39,154 +39,8 @@ using fbdoc
3939

4040
const def_index_file = hardcoded.default_index_file
4141

42-
'' temporary files
43-
const def_html_file = "PageIndex.html"
44-
const def_text_file = "PageIndex0.txt"
45-
4642
'' --------------------------------------------------------
4743

48-
''
49-
sub RemoveHTMLtags _
50-
( _
51-
byref sBody as string _
52-
)
53-
54-
'' remove HTML tags
55-
56-
dim as string txt, html
57-
dim as integer n, b = 0, j = 1, atag = 0, i
58-
n = len(sBody)
59-
txt = ""
60-
61-
print "Removing html tags"
62-
while( i <= n )
63-
64-
if( lcase(mid( sBody, i, 4 )) = "&lt;" ) then
65-
txt += "<"
66-
i += 4
67-
elseif( lcase(mid( sBody, i, 4 )) = "&gt;" ) then
68-
txt += ">"
69-
i += 4
70-
elseif( lcase(mid( sBody, i, 5 )) = "&amp;" ) then
71-
txt += "&"
72-
i += 5
73-
elseif( lcase(mid( sBody, i, 6 )) = "&nbsp;" ) then
74-
txt += " "
75-
i += 6
76-
elseif( mid( sBody, i, 4 ) = "All<" and atag = 1 ) then
77-
txt += "All" + crlf + "----" + crlf
78-
i += 3
79-
elseif( mid( sBody, i, 5 ) = "All <" and atag = 1 ) then
80-
txt += "All " + crlf + "----" + crlf
81-
i += 3
82-
elseif( lcase(mid( sBody, i, 1 )) = "<" ) then
83-
atag = 0
84-
b = 1
85-
j = i + 1
86-
while( j <= n and b > 0 )
87-
select case ( mid( sBody, j, 1 ))
88-
case "<"
89-
b += 1
90-
j += 1
91-
case ">"
92-
b -= 1
93-
j += 1
94-
case chr(34)
95-
j += 1
96-
while( j <= n )
97-
select case ( mid( sBody, j, 1 ))
98-
case chr(34)
99-
j += 1
100-
exit while
101-
case else
102-
j += 1
103-
end select
104-
wend
105-
case else
106-
j += 1
107-
end select
108-
wend
109-
110-
html = mid( sBody, i, j - i )
111-
select case lcase( html )
112-
case "<br>","<br />"
113-
txt += crlf
114-
case "<hr>","<hr />"
115-
txt += "----"
116-
case else
117-
if left( html, 3 ) = "<a " then
118-
atag = 2
119-
end if
120-
end select
121-
i = j
122-
123-
else
124-
txt += mid( sBody, i, 1 )
125-
i += 1
126-
end if
127-
128-
if( atag = 2 ) then
129-
atag = 1
130-
else
131-
atag = 0
132-
end if
133-
134-
wend
135-
136-
dim as integer h = freefile
137-
open def_text_file for output as #h
138-
print #h, txt
139-
close #h
140-
141-
end sub
142-
143-
''
144-
sub ExtractPageNames _
145-
( _
146-
)
147-
148-
'' Extract page names and write final output
149-
150-
dim as integer b = 0, i, h1, h2
151-
dim as string x
152-
153-
print "Writing '" + def_index_file + "'"
154-
155-
h1 = freefile
156-
open def_text_file for input as #h1
157-
h2 = freefile
158-
open def_index_file for output as #h2
159-
while( eof(h1) = 0 )
160-
line input #h1, x
161-
if( b ) then
162-
if x = "----" then
163-
b = 0
164-
exit while
165-
elseif( len(x) > 2 ) then
166-
for i = 1 to len(x)
167-
select case mid(x,i,1)
168-
case "A" to "Z", "a" to "z", "0" to "9", "_"
169-
case else
170-
exit for
171-
end select
172-
next
173-
if i > 1 then
174-
print #h2, left(x, i - 1)
175-
end if
176-
end if
177-
else
178-
if x = "----" then
179-
b = 1
180-
end if
181-
end if
182-
wend
183-
close #h2
184-
close #h1
185-
186-
kill def_text_file
187-
188-
end sub
189-
19044
#ifdef __FB_LINUX__
19145
extern "c"
19246
declare function strcasecmp(byval as const zstring ptr, byval as const zstring ptr) as long
@@ -330,22 +184,18 @@ else
330184
print "Error"
331185
else
332186
print "OK"
333-
/'
334-
'' DEBUG
187+
print "Writing '" & def_index_file & "'"
188+
335189
dim as integer h = freefile
336-
open def_html_file for output as #h
337-
print #h, sBody
190+
open def_index_file for output as #h
191+
print #h, sBody;
338192
close #h
339-
'/
193+
340194
end if
341195

342196
delete wikicon
343197
end scope
344198

345-
RemoveHTMLtags( sBody )
346-
347-
ExtractPageNames( )
348-
349199
end if
350200

351201
print "Done."

doc/libfbdoc/CWikiConUrl.bas

Lines changed: 158 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -339,6 +339,162 @@ namespace fb.fbdoc
339339

340340
end sub
341341

342+
''
343+
private sub remove_html_tags _
344+
( _
345+
byref sBody as string _
346+
)
347+
348+
'' remove HTML tags from PageIndex
349+
350+
dim as string txt, html
351+
dim as integer n, b = 0, j = 1, atag = 0, i
352+
n = len(sBody)
353+
txt = ""
354+
355+
while( i <= n )
356+
357+
if( lcase(mid( sBody, i, 4 )) = "&lt;" ) then
358+
txt += "<"
359+
i += 4
360+
elseif( lcase(mid( sBody, i, 4 )) = "&gt;" ) then
361+
txt += ">"
362+
i += 4
363+
elseif( lcase(mid( sBody, i, 5 )) = "&amp;" ) then
364+
txt += "&"
365+
i += 5
366+
elseif( lcase(mid( sBody, i, 6 )) = "&nbsp;" ) then
367+
txt += " "
368+
i += 6
369+
elseif( mid( sBody, i, 4 ) = "All<" and atag = 1 ) then
370+
txt += "All" + crlf + "----" + crlf
371+
i += 3
372+
elseif( mid( sBody, i, 5 ) = "All <" and atag = 1 ) then
373+
txt += "All " + crlf + "----" + crlf
374+
i += 3
375+
elseif( lcase(mid( sBody, i, 1 )) = "<" ) then
376+
atag = 0
377+
b = 1
378+
j = i + 1
379+
while( j <= n and b > 0 )
380+
select case ( mid( sBody, j, 1 ))
381+
case "<"
382+
b += 1
383+
j += 1
384+
case ">"
385+
b -= 1
386+
j += 1
387+
case chr(34)
388+
j += 1
389+
while( j <= n )
390+
select case ( mid( sBody, j, 1 ))
391+
case chr(34)
392+
j += 1
393+
exit while
394+
case else
395+
j += 1
396+
end select
397+
wend
398+
case else
399+
j += 1
400+
end select
401+
wend
402+
403+
html = mid( sBody, i, j - i )
404+
select case lcase( html )
405+
case "<br>","<br />"
406+
txt += crlf
407+
case "<hr>","<hr />"
408+
txt += "----"
409+
case else
410+
if left( html, 3 ) = "<a " then
411+
atag = 2
412+
end if
413+
end select
414+
i = j
415+
416+
else
417+
txt += mid( sBody, i, 1 )
418+
i += 1
419+
end if
420+
421+
if( atag = 2 ) then
422+
atag = 1
423+
else
424+
atag = 0
425+
end if
426+
427+
wend
428+
429+
sBody = txt
430+
431+
end sub
432+
433+
''
434+
private sub extract_page_names _
435+
( _
436+
byref sBody as string _
437+
)
438+
439+
dim as string txt = ""
440+
dim as integer i = any, i0 = 0
441+
dim as integer n = len(sBody)
442+
dim as boolean bFirstMark = false
443+
dim as string x
444+
445+
while( i <= n )
446+
447+
'' find end of line
448+
i = i0
449+
while( i <= n )
450+
select case sBody[i]
451+
case 10, 13
452+
exit while
453+
end select
454+
i += 1
455+
wend
456+
x = mid( sBody, i0 + 1, i - i0 )
457+
458+
'' skip any LF and CR's
459+
while( i <= n )
460+
select case sBody[i]
461+
case 10, 13
462+
i += 1
463+
case else
464+
exit while
465+
end select
466+
wend
467+
i0 = i
468+
469+
if( bFirstMark ) then
470+
if x = "----" then
471+
bFirstMark = FALSE
472+
exit while
473+
elseif( len(x) > 2 ) then
474+
'' find the page name
475+
for i = 1 to len(x)
476+
select case mid( x, i, 1 )
477+
case "A" to "Z", "a" to "z", "0" to "9", "_"
478+
case else
479+
exit for
480+
end select
481+
next
482+
if i > 1 then
483+
txt &= left(x, i - 1) & nl
484+
end if
485+
end if
486+
else
487+
if x = "----" then
488+
bFirstMark = TRUE
489+
end if
490+
end if
491+
492+
wend
493+
494+
sBody = txt
495+
496+
end sub
497+
342498
'':::::
343499
private function get_pageid _
344500
( _
@@ -442,6 +598,8 @@ namespace fb.fbdoc
442598
if( stream->Receive( URL, TRUE, ctx->ca_file ) ) then
443599
body = stream->Read()
444600
remove_http_headers( body )
601+
remove_html_tags( body )
602+
extract_page_names( body )
445603
end if
446604

447605
delete stream
@@ -562,4 +720,3 @@ namespace fb.fbdoc
562720
end function
563721

564722
end namespace
565-

0 commit comments

Comments
 (0)