Skip to content

Commit 108bd6e

Browse files
committed
Add PythonXrefTest with one non-passing for dangling span
1 parent 4c99a20 commit 108bd6e

File tree

7 files changed

+547
-0
lines changed

7 files changed

+547
-0
lines changed

opengrok-indexer/pom.xml

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,13 @@ Portions Copyright (c) 2017, Chris Fraire <[email protected]>.
123123
<exclude>*.java</exclude>
124124
</excludes>
125125
</testResource>
126+
<testResource>
127+
<targetPath>org/opensolaris/opengrok/analysis/python/</targetPath>
128+
<directory>../test/org/opensolaris/opengrok/analysis/python/</directory>
129+
<excludes>
130+
<exclude>*.java</exclude>
131+
</excludes>
132+
</testResource>
126133
<testResource>
127134
<targetPath>org/opensolaris/opengrok/analysis/ruby/</targetPath>
128135
<directory>../test/org/opensolaris/opengrok/analysis/ruby/</directory>
Lines changed: 159 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,159 @@
1+
/*
2+
* CDDL HEADER START
3+
*
4+
* The contents of this file are subject to the terms of the
5+
* Common Development and Distribution License (the "License").
6+
* You may not use this file except in compliance with the License.
7+
*
8+
* See LICENSE.txt included in this distribution for the specific
9+
* language governing permissions and limitations under the License.
10+
*
11+
* When distributing Covered Code, include this CDDL HEADER in each
12+
* file and include the License file at LICENSE.txt.
13+
* If applicable, add the following below this CDDL HEADER, with the
14+
* fields enclosed by brackets "[]" replaced with your own identifying
15+
* information: Portions Copyright [yyyy] [name of copyright owner]
16+
*
17+
* CDDL HEADER END
18+
*/
19+
20+
/*
21+
* Copyright (c) 2012, 2016, Oracle and/or its affiliates. All rights reserved.
22+
* Portions Copyright (c) 2017, Chris Fraire <[email protected]>.
23+
*/
24+
25+
package org.opensolaris.opengrok.analysis.python;
26+
27+
import java.io.BufferedReader;
28+
import java.io.ByteArrayOutputStream;
29+
import java.io.IOException;
30+
import java.io.InputStream;
31+
import java.io.InputStreamReader;
32+
import java.io.OutputStream;
33+
import java.io.PrintStream;
34+
import java.io.StringWriter;
35+
import java.io.Writer;
36+
37+
import org.opensolaris.opengrok.analysis.CtagsReader;
38+
import org.opensolaris.opengrok.analysis.Definitions;
39+
import org.opensolaris.opengrok.analysis.FileAnalyzer;
40+
import org.opensolaris.opengrok.analysis.WriteXrefArgs;
41+
import org.junit.Test;
42+
import static org.junit.Assert.assertNotNull;
43+
import static org.opensolaris.opengrok.util.CustomAssertions.assertLinesEqual;
44+
45+
/**
46+
* Tests the {@link PythonXref} class.
47+
*/
48+
public class PythonXrefTest {
49+
50+
@Test
51+
public void sampleTest() throws IOException {
52+
writeAndCompare("org/opensolaris/opengrok/analysis/python/sample.py",
53+
"org/opensolaris/opengrok/analysis/python/sample_xref.html",
54+
getTagsDefinitions());
55+
}
56+
57+
@Test
58+
public void shouldCloseTruncatedStringSpan() throws IOException {
59+
writeAndCompare("org/opensolaris/opengrok/analysis/python/truncated.py",
60+
"org/opensolaris/opengrok/analysis/python/truncated_xref.html",
61+
null);
62+
}
63+
64+
private void writeAndCompare(String sourceResource, String resultResource,
65+
Definitions defs)
66+
throws IOException {
67+
68+
ByteArrayOutputStream baos = new ByteArrayOutputStream();
69+
ByteArrayOutputStream baosExp = new ByteArrayOutputStream();
70+
71+
InputStream res = getClass().getClassLoader().getResourceAsStream(
72+
sourceResource);
73+
assertNotNull(sourceResource + " should get-as-stream", res);
74+
writePythonXref(new PrintStream(baos), res, defs);
75+
res.close();
76+
77+
InputStream exp = getClass().getClassLoader().getResourceAsStream(
78+
resultResource);
79+
assertNotNull(resultResource + " should get-as-stream", exp);
80+
copyStream(exp, baosExp);
81+
exp.close();
82+
baosExp.close();
83+
baos.close();
84+
85+
String ostr = new String(baos.toByteArray(), "UTF-8");
86+
String gotten[] = ostr.split("\n");
87+
88+
String estr = new String(baosExp.toByteArray(), "UTF-8");
89+
String expected[] = estr.split("\n");
90+
91+
assertLinesEqual("Python xref", expected, gotten);
92+
}
93+
94+
private void writePythonXref(PrintStream oss, InputStream iss,
95+
Definitions defs)
96+
throws IOException {
97+
98+
oss.print(getHtmlBegin());
99+
100+
Writer sw = new StringWriter();
101+
PythonAnalyzerFactory fac = new PythonAnalyzerFactory();
102+
FileAnalyzer analyzer = fac.getAnalyzer();
103+
analyzer.setScopesEnabled(true);
104+
analyzer.setFoldingEnabled(true);
105+
WriteXrefArgs wargs = new WriteXrefArgs(
106+
new InputStreamReader(iss, "UTF-8"), sw);
107+
wargs.setDefs(defs);
108+
analyzer.writeXref(wargs);
109+
oss.print(sw.toString());
110+
111+
oss.print(getHtmlEnd());
112+
}
113+
114+
private void copyStream(InputStream iss, OutputStream oss)
115+
throws IOException {
116+
117+
byte buffer[] = new byte[8192];
118+
int read;
119+
do {
120+
read = iss.read(buffer, 0, buffer.length);
121+
if (read > 0) {
122+
oss.write(buffer, 0, read);
123+
}
124+
} while (read >= 0);
125+
}
126+
127+
private Definitions getTagsDefinitions() throws IOException {
128+
InputStream res = getClass().getClassLoader().getResourceAsStream(
129+
"org/opensolaris/opengrok/analysis/python/sampletags");
130+
assertNotNull("though sampletags should stream,", res);
131+
132+
BufferedReader in = new BufferedReader(new InputStreamReader(
133+
res, "UTF-8"));
134+
135+
CtagsReader rdr = new CtagsReader();
136+
String line;
137+
while ((line = in.readLine()) != null) {
138+
rdr.readLine(line);
139+
}
140+
return rdr.getDefinitions();
141+
}
142+
143+
private static String getHtmlBegin() {
144+
return "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" +
145+
"<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Strict//EN\"\n" +
146+
" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd\">\n" +
147+
"<html xmlns=\"http://www.w3.org/1999/xhtml\"" +
148+
" xml:lang=\"en\" lang=\"en\"\n" +
149+
" class=\"xref\">\n" +
150+
"<head>\n" +
151+
"<title>sampleFile - OpenGrok cross reference" +
152+
" for /sampleFile</title></head><body>\n";
153+
}
154+
155+
private static String getHtmlEnd() {
156+
return "</body>\n" +
157+
"</html>\n";
158+
}
159+
}
Lines changed: 150 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,150 @@
1+
# MIT License
2+
#
3+
# Copyright (c) 2017 OsciiArt
4+
#
5+
# Permission is hereby granted, free of charge, to any person obtaining a copy
6+
# of this software and associated documentation files (the "Software"), to deal
7+
# in the Software without restriction, including without limitation the rights
8+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9+
# copies of the Software, and to permit persons to whom the Software is
10+
# furnished to do so, subject to the following conditions:
11+
#
12+
# The above copyright notice and this permission notice shall be included in all
13+
# copies or substantial portions of the Software.
14+
#
15+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21+
# SOFTWARE.
22+
23+
#!/usr/bin/env python
24+
# -*- coding: utf-8 -*-
25+
26+
from keras.models import model_from_json
27+
import numpy as np
28+
import pandas as pd
29+
from PIL import Image
30+
import pickle
31+
import os
32+
33+
34+
# parameters
35+
model_path = "model/model.json"
36+
weight_path = "model/weight.hdf5"
37+
image_path = 'sample images/original images/21 original.png' # put the path of the image that you convert.
38+
new_width = 0 # adjust the width of the image. the original width is used if new_width = 0.
39+
input_shape = [64, 64, 1]
40+
41+
42+
def add_mergin(img, mergin):
43+
if mergin!=0:
44+
img_new = np.ones([img.shape[0] + 2 * mergin, img.shape[1] + 2 * mergin], dtype=np.uint8) * 255
45+
img_new[mergin:-mergin, mergin:-mergin] = img
46+
else:
47+
img_new = img
48+
return img_new
49+
50+
51+
def pickleload(path):
52+
with open(path, mode='rb') as f:
53+
data = pickle.load(f)
54+
return data
55+
56+
57+
# load model
58+
json_string = open(model_path).read()
59+
model = model_from_json(json_string)
60+
model.load_weights(weight_path)
61+
print("model load done")
62+
63+
char_list_path = "data/char_list.csv"
64+
char_list = pd.read_csv(char_list_path, encoding="cp932")
65+
print("len(char_list)", len(char_list))
66+
# print(char_list.head())
67+
char_list = char_list[char_list['frequency']>=10]
68+
char_list = char_list['char'].as_matrix()
69+
70+
for k, v in enumerate(char_list):
71+
if v==" ":
72+
space = k
73+
break
74+
print("class index of 1B space:", space)
75+
76+
77+
mergin = (input_shape[0] - 18) // 2
78+
img = Image.open(image_path)
79+
orig_width, orig_height = img.size
80+
if new_width==0: new_width = orig_width
81+
new_height = int(img.size[1] * new_width / img.size[0])
82+
img = img.resize((new_width, new_height), Image.LANCZOS)
83+
img = np.array(img)
84+
if len(img.shape) == 3:
85+
img = img[:, :, 0]
86+
87+
img_new = np.ones([img.shape[0]+2*mergin+18, img.shape[1]+2*mergin+18],
88+
dtype=np.uint8) * 255
89+
img_new[mergin:mergin+new_height, mergin:mergin+new_width] = img
90+
img = (img_new.astype(np.float32)) / 255
91+
92+
char_dict_path = "data/char_dict.pkl"
93+
char_dict = pickleload(char_dict_path)
94+
95+
print("len(char_dict)", len(char_dict))
96+
97+
output_dir = "output/"
98+
if not os.path.isdir(output_dir):
99+
os.makedirs(output_dir)
100+
101+
for slide in range(18):
102+
print("converting:", slide)
103+
num_line = (img.shape[0] - input_shape[0]) // 18
104+
img_width = img.shape[1]
105+
new_line = np.ones([1, img_width])
106+
img = np.concatenate([new_line, img], axis=0)
107+
predicts = []
108+
text = []
109+
for h in range(num_line):
110+
w = 0
111+
penalty = 1
112+
predict_line = []
113+
text_line = ""
114+
while w <= img_width - input_shape[1]:
115+
input_img = img[h*18:h*18+ input_shape[0], w:w+input_shape[1]]
116+
input_img = input_img.reshape([1,input_shape[0], input_shape[1], 1])
117+
predict = model.predict(input_img)
118+
if penalty: predict[0, space] = 0
119+
predict = np.argmax(predict[0])
120+
penalty = (predict==space)
121+
char = char_list[predict]
122+
predict_line.append(char)
123+
char_width = char_dict[char].shape[1]
124+
w += char_width
125+
text_line += char
126+
predicts.append(predict_line)
127+
text.append(text_line+'\r\n')
128+
# print(text)
129+
130+
img_aa = np.ones_like(img, dtype=np.uint8) * 255
131+
132+
for h in range(num_line):
133+
w = 0
134+
for char in predicts[h]:
135+
# print("w", w)
136+
char_width = char_dict[char].shape[1]
137+
char_img = 255 - char_dict[char].astype(np.uint8) * 255
138+
img_aa[h*18:h*18+16, w:w+char_width] = char_img
139+
w += char_width
140+
141+
img_aa = Image.fromarray(img_aa)
142+
img_aa = img_aa.crop([0,slide,new_width, new_height+slide])
143+
save_path = output_dir + os.path.basename(image_path)[:-4] + '_'\
144+
+ 'w' + str(new_width) \
145+
+ '_slide' + str(slide) + '.png'
146+
img_aa.save(save_path)
147+
148+
f=open(save_path[:-4] + '.txt', 'w')
149+
f.writelines(text)
150+
f.close()

0 commit comments

Comments
 (0)