Skip to content

Commit 8df011d

Browse files
cnstLubos Kosco
authored andcommitted
introducing uuencode(5) support: makes Xref files much smaller (no more
random huge search-path links), plus should significantly offload the index (all the uu and base64 stuff is ignored)
1 parent 906c195 commit 8df011d

File tree

4 files changed

+445
-0
lines changed

4 files changed

+445
-0
lines changed
Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,103 @@
1+
/*
2+
* CDDL HEADER START
3+
*
4+
* The contents of this file are subject to the terms of the
5+
* Common Development and Distribution License (the "License").
6+
* You may not use this file except in compliance with the License.
7+
*
8+
* See LICENSE.txt included in this distribution for the specific
9+
* language governing permissions and limitations under the License.
10+
*
11+
* When distributing Covered Code, include this CDDL HEADER in each
12+
* file and include the License file at LICENSE.txt.
13+
* If applicable, add the following below this CDDL HEADER, with the
14+
* fields enclosed by brackets "[]" replaced with your own identifying
15+
* information: Portions Copyright [yyyy] [name of copyright owner]
16+
*
17+
* CDDL HEADER END
18+
*/
19+
20+
/*
21+
* Copyright (c) 2005, 2013, Oracle and/or its affiliates. All rights reserved.
22+
*/
23+
package org.opensolaris.opengrok.analysis.uue;
24+
25+
import java.io.IOException;
26+
import java.io.Reader;
27+
import java.io.Writer;
28+
import org.apache.lucene.analysis.Analyzer.TokenStreamComponents;
29+
import org.apache.lucene.document.Document;
30+
import org.apache.lucene.document.TextField;
31+
import org.opensolaris.opengrok.analysis.Definitions;
32+
import org.opensolaris.opengrok.analysis.FileAnalyzerFactory;
33+
import org.opensolaris.opengrok.analysis.StreamSource;
34+
import org.opensolaris.opengrok.analysis.TextAnalyzer;
35+
import org.opensolaris.opengrok.configuration.Project;
36+
import org.opensolaris.opengrok.history.Annotation;
37+
38+
/**
39+
* Analyzes [tn]roff files
40+
* Created on September 30, 2005
41+
*
42+
* @author Chandan
43+
*/
44+
public class UuencodeAnalyzer extends TextAnalyzer {
45+
private UuencodeXref xref;
46+
/**
47+
* Creates a new instance of UuencodeAnalyzer
48+
*/
49+
protected UuencodeAnalyzer(FileAnalyzerFactory factory) {
50+
super(factory);
51+
}
52+
53+
@Override
54+
public void analyze(Document doc, StreamSource src, Writer xrefOut) throws IOException {
55+
doc.add(new TextField("full", getReader(src.getStream())));
56+
57+
if (xrefOut != null) {
58+
try (Reader in = getReader(src.getStream())) {
59+
writeXref(in, xrefOut);
60+
}
61+
}
62+
}
63+
64+
@Override
65+
public TokenStreamComponents createComponents(String fieldName, Reader reader) {
66+
if ("full".equals(fieldName)) {
67+
return new TokenStreamComponents(new UuencodeFullTokenizer(reader));
68+
}
69+
return super.createComponents(fieldName, reader);
70+
}
71+
72+
/**
73+
* Write a cross referenced HTML file.
74+
*
75+
* @param in Input source
76+
* @param out Writer to write HTML cross-reference
77+
*/
78+
private void writeXref(Reader in, Writer out) throws IOException {
79+
if (xref == null) {
80+
xref = new UuencodeXref(in);
81+
} else {
82+
xref.reInit(in);
83+
}
84+
xref.project = project;
85+
xref.write(out);
86+
}
87+
88+
/**
89+
* Write a cross referenced HTML file reads the source from in
90+
*
91+
* @param in Input source
92+
* @param out Output xref writer
93+
* @param defs definitions for the file (could be null)
94+
* @param annotation annotation for the file (could be null)
95+
*/
96+
static void writeXref(Reader in, Writer out, Definitions defs, Annotation annotation, Project project) throws IOException {
97+
UuencodeXref xref = new UuencodeXref(in);
98+
xref.annotation = annotation;
99+
xref.project = project;
100+
xref.setDefs(defs);
101+
xref.write(out);
102+
}
103+
}
Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
/*
2+
* CDDL HEADER START
3+
*
4+
* The contents of this file are subject to the terms of the
5+
* Common Development and Distribution License (the "License").
6+
* You may not use this file except in compliance with the License.
7+
*
8+
* See LICENSE.txt included in this distribution for the specific
9+
* language governing permissions and limitations under the License.
10+
*
11+
* When distributing Covered Code, include this CDDL HEADER in each
12+
* file and include the License file at LICENSE.txt.
13+
* If applicable, add the following below this CDDL HEADER, with the
14+
* fields enclosed by brackets "[]" replaced with your own identifying
15+
* information: Portions Copyright [yyyy] [name of copyright owner]
16+
*
17+
* CDDL HEADER END
18+
*/
19+
20+
/*
21+
* Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
22+
* Copyright (c) 2012, 2013 Constantine A. Murenin <[email protected]>
23+
*/
24+
25+
package org.opensolaris.opengrok.analysis.uue;
26+
27+
import java.io.IOException;
28+
import java.io.Reader;
29+
import java.io.Writer;
30+
import org.opensolaris.opengrok.analysis.Definitions;
31+
import org.opensolaris.opengrok.analysis.FileAnalyzer;
32+
import org.opensolaris.opengrok.analysis.FileAnalyzer.Genre;
33+
import org.opensolaris.opengrok.analysis.FileAnalyzerFactory;
34+
import org.opensolaris.opengrok.configuration.Project;
35+
import org.opensolaris.opengrok.history.Annotation;
36+
37+
/**
38+
* @author Constantine A. Murenin <http://cnst.su/>
39+
*/
40+
41+
public class UuencodeAnalyzerFactory extends FileAnalyzerFactory {
42+
private static final String[] SUFFIXES = {
43+
/**
44+
* XXX:
45+
* FreeBSD and DragonFly .fnt files are uuencoded;
46+
* Minix3 .fnt files are binary. -- 2013-04 cnst
47+
*/
48+
"UU", "UUE", /*"FNT",*/ "BASE64"
49+
};
50+
51+
private static final String[] MAGICS = {
52+
"begin 4",
53+
"begin 6",
54+
"begin 7",
55+
"begin-b" /* XXX: Should be "begin-base64 ", but there seems to be a bug somewhere... */
56+
};
57+
// http://bxr.su/s?q=-"begin+644"+-"begin+755"+-"begin+744"+-"begin+444"+-"begin+666"+-"begin+664"+-"begin+600"+-"begin-base64"&path=fnt+OR+uu+OR+uue
58+
// http://bxr.su/s?q="begin+644"+OR+"begin+755"+OR+"begin+744"+OR+"begin+444"+OR+"begin+666"+OR+"begin+664"+OR+"begin+600"+OR+"begin-base64"&path=-fnt+-uu+-uue
59+
60+
public UuencodeAnalyzerFactory() {
61+
super(null, SUFFIXES, MAGICS, null, "text/plain", Genre.PLAIN);
62+
}
63+
64+
@Override
65+
protected FileAnalyzer newAnalyzer() {
66+
return new UuencodeAnalyzer(this);
67+
}
68+
69+
@Override
70+
public void writeXref(Reader in, Writer out, Definitions defs, Annotation annotation, Project project)
71+
throws IOException
72+
{
73+
UuencodeAnalyzer.writeXref(in, out, defs, annotation, project);
74+
}
75+
76+
}
Lines changed: 145 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,145 @@
1+
/*
2+
* CDDL HEADER START
3+
*
4+
* The contents of this file are subject to the terms of the
5+
* Common Development and Distribution License (the "License").
6+
* You may not use this file except in compliance with the License.
7+
*
8+
* See LICENSE.txt included in this distribution for the specific
9+
* language governing permissions and limitations under the License.
10+
*
11+
* When distributing Covered Code, include this CDDL HEADER in each
12+
* file and include the License file at LICENSE.txt.
13+
* If applicable, add the following below this CDDL HEADER, with the
14+
* fields enclosed by brackets "[]" replaced with your own identifying
15+
* information: Portions Copyright [yyyy] [name of copyright owner]
16+
*
17+
* CDDL HEADER END
18+
*/
19+
20+
/*
21+
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
22+
* Copyright (c) 2012, 2013 Constantine A. Murenin <[email protected]>
23+
*/
24+
25+
package org.opensolaris.opengrok.analysis.uue;
26+
import java.io.IOException;
27+
import java.io.Reader;
28+
import org.opensolaris.opengrok.analysis.JFlexTokenizer;
29+
30+
31+
%%
32+
33+
%public
34+
%class UuencodeFullTokenizer
35+
%extends JFlexTokenizer
36+
%unicode
37+
%init{
38+
super(in);
39+
%init}
40+
%type boolean
41+
%eofval{
42+
return false;
43+
%eofval}
44+
%caseless
45+
%char
46+
%{
47+
boolean b64;
48+
boolean modeFound;
49+
boolean nameFound;
50+
%}
51+
52+
//WhiteSpace = [ \t\f\r]+|\n
53+
Identifier = [a-zA-Z_] [a-zA-Z0-9_]*
54+
Number = [0-9]+|[0-9]+\.[0-9]+| "0[xX]" [0-9a-fA-F]+
55+
Printable = [\@\$\%\^\&\-+=\?\.\:]
56+
57+
%state BEGIN MODE NAME UUE
58+
59+
%%
60+
<<EOF>> { return false; }
61+
62+
<YYINITIAL> {
63+
^ ( "begin " | "begin-" ) {
64+
b64 = false;
65+
modeFound = false;
66+
nameFound = false;
67+
yybegin(BEGIN);
68+
yypushback(1);
69+
setAttribs(yytext().toLowerCase(), yychar, yychar + yylength());
70+
return true;
71+
}
72+
73+
{Identifier}|{Number}|{Printable} {
74+
setAttribs(yytext().toLowerCase(), yychar, yychar + yylength());
75+
return true;
76+
}
77+
78+
.|\n {}
79+
}
80+
81+
<BEGIN> {
82+
" " {
83+
if (!b64)
84+
yybegin(MODE);
85+
else
86+
yybegin(YYINITIAL);
87+
}
88+
"-" {
89+
if (b64)
90+
yybegin(YYINITIAL);
91+
b64 = true;
92+
setAttribs(yytext(), yychar, yychar + yylength());
93+
return true;
94+
}
95+
"base64 " {
96+
if (b64)
97+
yybegin(MODE);
98+
else
99+
yybegin(YYINITIAL);
100+
yypushback(1);
101+
setAttribs(yytext().toLowerCase(), yychar, yychar + yylength());
102+
return true;
103+
}
104+
.|\n { yybegin(YYINITIAL); yypushback(1); }
105+
}
106+
107+
<MODE> {
108+
" " { if (modeFound) yybegin(NAME); }
109+
{Identifier}|{Number}|{Printable} {
110+
modeFound = true;
111+
setAttribs(yytext().toLowerCase(), yychar, yychar + yylength());
112+
return true;
113+
}
114+
.|\n { yybegin(YYINITIAL); yypushback(1); }
115+
}
116+
117+
<NAME>{
118+
\n {
119+
if (nameFound)
120+
yybegin(UUE);
121+
else
122+
yybegin(YYINITIAL);
123+
}
124+
{Identifier}|{Number}|{Printable} {
125+
nameFound = true;
126+
setAttribs(yytext().toLowerCase(), yychar, yychar + yylength());
127+
return true;
128+
}
129+
. { yybegin(YYINITIAL); yypushback(1); }
130+
}
131+
132+
<UUE> {
133+
^ ( "end" | "====" ) \n {
134+
yypushback(1);
135+
String t = yytext();
136+
if (t.equals("end") && !b64) {
137+
yybegin(YYINITIAL);
138+
setAttribs(yytext().toLowerCase(), yychar, yychar + yylength());
139+
return true;
140+
} else if (t.equals("====") && b64)
141+
yybegin(YYINITIAL);
142+
}
143+
[ -~]* {}
144+
.|\n {}
145+
}

0 commit comments

Comments
 (0)