oracle
diff --git a/‎opengrok-indexer/pom.xml
Lines changed: 7 additions & 0 deletions b/‎opengrok-indexer/pom.xml
Lines changed: 7 additions & 0 deletions
diff --git a/‎test/org/opensolaris/opengrok/analysis/python/PythonXrefTest.java
Lines changed: 159 additions & 0 deletions b/‎test/org/opensolaris/opengrok/analysis/python/PythonXrefTest.java
Lines changed: 159 additions & 0 deletions
diff --git a/‎test/org/opensolaris/opengrok/analysis/python/sample.py
Lines changed: 150 additions & 0 deletions b/‎test/org/opensolaris/opengrok/analysis/python/sample.py
Lines changed: 150 additions & 0 deletions
@@ -123,6 +123,13 @@ Portions Copyright (c) 2017, Chris Fraire <[email protected]>.
                     <exclude>*.java</exclude>
                 </excludes>
             </testResource>
+            <testResource>
+                <targetPath>org/opensolaris/opengrok/analysis/python/</targetPath>
+                <directory>../test/org/opensolaris/opengrok/analysis/python/</directory>
+                <excludes>
+                    <exclude>*.java</exclude>
+                </excludes>
+            </testResource>
             <testResource>
                 <targetPath>org/opensolaris/opengrok/analysis/ruby/</targetPath>
                 <directory>../test/org/opensolaris/opengrok/analysis/ruby/</directory>
 
@@ -0,0 +1,159 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * See LICENSE.txt included in this distribution for the specific
+ * language governing permissions and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at LICENSE.txt.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright (c) 2012, 2016, Oracle and/or its affiliates. All rights reserved.
+ * Portions Copyright (c) 2017, Chris Fraire <[email protected]>.
+ */
+
+package org.opensolaris.opengrok.analysis.python;
+
+import java.io.BufferedReader;
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.io.OutputStream;
+import java.io.PrintStream;
+import java.io.StringWriter;
+import java.io.Writer;
+
+import org.opensolaris.opengrok.analysis.CtagsReader;
+import org.opensolaris.opengrok.analysis.Definitions;
+import org.opensolaris.opengrok.analysis.FileAnalyzer;
+import org.opensolaris.opengrok.analysis.WriteXrefArgs;
+import org.junit.Test;
+import static org.junit.Assert.assertNotNull;
+import static org.opensolaris.opengrok.util.CustomAssertions.assertLinesEqual;
+
+/**
+ * Tests the {@link PythonXref} class.
+ */
+public class PythonXrefTest {
+
+    @Test
+    public void sampleTest() throws IOException {
+        writeAndCompare("org/opensolaris/opengrok/analysis/python/sample.py",
+            "org/opensolaris/opengrok/analysis/python/sample_xref.html",
+            getTagsDefinitions());
+    }
+
+    @Test
+    public void shouldCloseTruncatedStringSpan() throws IOException {
+        writeAndCompare("org/opensolaris/opengrok/analysis/python/truncated.py",
+            "org/opensolaris/opengrok/analysis/python/truncated_xref.html",
+            null);
+    }
+
+    private void writeAndCompare(String sourceResource, String resultResource,
+        Definitions defs)
+        throws IOException {
+
+        ByteArrayOutputStream baos = new ByteArrayOutputStream();
+        ByteArrayOutputStream baosExp = new ByteArrayOutputStream();
+
+        InputStream res = getClass().getClassLoader().getResourceAsStream(
+            sourceResource);
+        assertNotNull(sourceResource + " should get-as-stream", res);
+        writePythonXref(new PrintStream(baos), res, defs);
+        res.close();
+
+        InputStream exp = getClass().getClassLoader().getResourceAsStream(
+            resultResource);
+        assertNotNull(resultResource + " should get-as-stream", exp);
+        copyStream(exp, baosExp);
+        exp.close();
+        baosExp.close();
+        baos.close();
+
+        String ostr = new String(baos.toByteArray(), "UTF-8");
+        String gotten[] = ostr.split("\n");
+
+        String estr = new String(baosExp.toByteArray(), "UTF-8");
+        String expected[] = estr.split("\n");
+
+        assertLinesEqual("Python xref", expected, gotten);
+    }
+
+    private void writePythonXref(PrintStream oss, InputStream iss,
+        Definitions defs)
+        throws IOException {
+
+        oss.print(getHtmlBegin());
+
+        Writer sw = new StringWriter();
+        PythonAnalyzerFactory fac = new PythonAnalyzerFactory();
+        FileAnalyzer analyzer = fac.getAnalyzer();
+        analyzer.setScopesEnabled(true);
+        analyzer.setFoldingEnabled(true);
+        WriteXrefArgs wargs = new WriteXrefArgs(
+            new InputStreamReader(iss, "UTF-8"), sw);
+        wargs.setDefs(defs);
+        analyzer.writeXref(wargs);
+        oss.print(sw.toString());
+
+        oss.print(getHtmlEnd());
+    }
+
+    private void copyStream(InputStream iss, OutputStream oss)
+        throws IOException {
+
+        byte buffer[] = new byte[8192];
+        int read;
+        do {
+            read = iss.read(buffer, 0, buffer.length);
+            if (read > 0) {
+                oss.write(buffer, 0, read);
+            }
+        } while (read >= 0);
+    }
+
+    private Definitions getTagsDefinitions() throws IOException {
+        InputStream res = getClass().getClassLoader().getResourceAsStream(
+            "org/opensolaris/opengrok/analysis/python/sampletags");
+        assertNotNull("though sampletags should stream,", res);
+
+        BufferedReader in = new BufferedReader(new InputStreamReader(
+            res, "UTF-8"));
+
+        CtagsReader rdr = new CtagsReader();
+        String line;
+        while ((line = in.readLine()) != null) {
+            rdr.readLine(line);
+        }
+        return rdr.getDefinitions();
+    }
+
+    private static String getHtmlBegin() {
+        return "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" +
+            "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Strict//EN\"\n" +
+            "    \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd\">\n" +
+            "<html xmlns=\"http://www.w3.org/1999/xhtml\"" +
+            " xml:lang=\"en\" lang=\"en\"\n" +
+            "      class=\"xref\">\n" +
+            "<head>\n" +
+            "<title>sampleFile - OpenGrok cross reference" +
+            " for /sampleFile</title></head><body>\n";
+    }
+
+    private static String getHtmlEnd() {
+        return "</body>\n" +
+            "</html>\n";
+    }
+}
@@ -0,0 +1,150 @@
+# MIT License
+# 
+# Copyright (c) 2017 OsciiArt
+# 
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+# 
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+# 
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+from keras.models import model_from_json
+import numpy as np
+import pandas as pd
+from PIL import Image
+import pickle
+import os
+
+
+# parameters
+model_path = "model/model.json"
+weight_path = "model/weight.hdf5"
+image_path = 'sample images/original images/21 original.png' # put the path of the image that you convert.
+new_width = 0 # adjust the width of the image. the original width is used if new_width = 0.
+input_shape = [64, 64, 1]
+
+
+def add_mergin(img, mergin):
+    if mergin!=0:
+        img_new = np.ones([img.shape[0] + 2 * mergin, img.shape[1] + 2 * mergin], dtype=np.uint8) * 255
+        img_new[mergin:-mergin, mergin:-mergin] = img
+    else:
+        img_new = img
+    return img_new
+
+
+def pickleload(path):
+    with open(path, mode='rb') as f:
+        data = pickle.load(f)
+    return data
+
+
+# load model
+json_string = open(model_path).read()
+model = model_from_json(json_string)
+model.load_weights(weight_path)
+print("model load done")
+
+char_list_path = "data/char_list.csv"
+char_list = pd.read_csv(char_list_path, encoding="cp932")
+print("len(char_list)", len(char_list))
+# print(char_list.head())
+char_list = char_list[char_list['frequency']>=10]
+char_list = char_list['char'].as_matrix()
+
+for k, v in enumerate(char_list):
+    if v==" ":
+        space = k
+        break
+print("class index of 1B space:", space)
+
+
+mergin = (input_shape[0] - 18) // 2
+img = Image.open(image_path)
+orig_width, orig_height = img.size
+if new_width==0: new_width = orig_width
+new_height = int(img.size[1] * new_width / img.size[0])
+img = img.resize((new_width, new_height), Image.LANCZOS)
+img = np.array(img)
+if len(img.shape) == 3:
+    img = img[:, :, 0]
+
+img_new = np.ones([img.shape[0]+2*mergin+18, img.shape[1]+2*mergin+18],
+                  dtype=np.uint8) * 255
+img_new[mergin:mergin+new_height, mergin:mergin+new_width] = img
+img = (img_new.astype(np.float32)) / 255
+
+char_dict_path = "data/char_dict.pkl"
+char_dict = pickleload(char_dict_path)
+
+print("len(char_dict)", len(char_dict))
+
+output_dir = "output/"
+if not os.path.isdir(output_dir):
+    os.makedirs(output_dir)
+
+for slide in range(18):
+    print("converting:", slide)
+    num_line = (img.shape[0] - input_shape[0]) // 18
+    img_width = img.shape[1]
+    new_line = np.ones([1, img_width])
+    img = np.concatenate([new_line, img], axis=0)
+    predicts = []
+    text = []
+    for h in range(num_line):
+        w = 0
+        penalty = 1
+        predict_line = []
+        text_line = ""
+        while w <= img_width - input_shape[1]:
+            input_img = img[h*18:h*18+ input_shape[0], w:w+input_shape[1]]
+            input_img = input_img.reshape([1,input_shape[0], input_shape[1], 1])
+            predict = model.predict(input_img)
+            if penalty: predict[0, space] = 0
+            predict = np.argmax(predict[0])
+            penalty = (predict==space)
+            char = char_list[predict]
+            predict_line.append(char)
+            char_width = char_dict[char].shape[1]
+            w += char_width
+            text_line += char
+        predicts.append(predict_line)
+        text.append(text_line+'\r\n')
+    # print(text)
+
+    img_aa = np.ones_like(img, dtype=np.uint8) * 255
+
+    for h in range(num_line):
+        w = 0
+        for char in predicts[h]:
+            # print("w", w)
+            char_width = char_dict[char].shape[1]
+            char_img = 255 - char_dict[char].astype(np.uint8) * 255
+            img_aa[h*18:h*18+16, w:w+char_width] = char_img
+            w += char_width
+
+    img_aa = Image.fromarray(img_aa)
+    img_aa = img_aa.crop([0,slide,new_width, new_height+slide])
+    save_path = output_dir + os.path.basename(image_path)[:-4] + '_'\
+                + 'w' + str(new_width) \
+                + '_slide' + str(slide) + '.png'
+    img_aa.save(save_path)
+
+    f=open(save_path[:-4] + '.txt', 'w')
+    f.writelines(text)
+    f.close()