44import org .apache .fop .apps .Fop ;
55import org .apache .fop .apps .FopFactory ;
66import org .apache .fop .apps .MimeConstants ;
7+ import org .apache .tika .language .detect .LanguageDetector ;
8+ import org .apache .tika .language .detect .LanguageResult ;
9+ import org .apache .tika .langdetect .optimaize .OptimaizeLangDetector ;
10+
711import org .slf4j .Logger ;
812import org .slf4j .LoggerFactory ;
13+ import org .w3c .dom .DOMException ;
14+ import org .w3c .dom .Document ;
15+
16+ import org .xml .sax .SAXException ;
917
18+ import javax .xml .parsers .DocumentBuilder ;
19+ import javax .xml .parsers .DocumentBuilderFactory ;
20+ import javax .xml .parsers .ParserConfigurationException ;
1021import javax .xml .stream .XMLStreamException ;
1122import javax .xml .transform .Result ;
1223import javax .xml .transform .Source ;
1324import javax .xml .transform .Transformer ;
1425import javax .xml .transform .TransformerFactory ;
1526import javax .xml .transform .sax .SAXResult ;
1627import javax .xml .transform .stream .StreamSource ;
17- import java .io .File ;
28+ import javax .xml .xpath .XPathConstants ;
29+ import javax .xml .xpath .XPathExpressionException ;
30+ import javax .xml .xpath .XPath ;
31+ import javax .xml .xpath .XPathExpression ;
32+ import javax .xml .xpath .XPathFactory ;
33+
34+ import java .io .IOException ;
1835import java .io .InputStream ;
1936import java .io .OutputStream ;
37+ import java .io .ByteArrayOutputStream ;
38+ import java .io .ByteArrayInputStream ;
39+ import java .io .File ;
40+
41+ import java .net .URL ;
2042
2143
2244public class DdiPdfExportUtil {
2345
2446 private static final Logger logger = LoggerFactory .getLogger (DdiPdfExportUtil .class );
47+ public static class TitleAndDescription {
48+ public String Title ;
49+ public String Description ;
50+ public String Language ;
51+ }
2552
2653 private DdiPdfExportUtil () {
2754 // As this is a util class, adding a private constructor disallows instances of this class.
2855 }
56+
57+ private static String detectLanguage (TitleAndDescription td ) {
58+ String lang = "en" ; //default language
59+ LanguageDetector detector = new OptimaizeLangDetector ().loadModels ();
60+ LanguageResult result1 = detector .detect (td .Title );
61+ String lang1 = result1 .getLanguage ();
62+ if (result1 .isReasonablyCertain ()) {
63+ lang = lang1 ;
64+ } else {
65+ LanguageResult result2 = detector .detect (td .Description );
66+ if (result2 .isReasonablyCertain ()) {
67+ lang = result2 .getLanguage ();
68+ }
69+ }
70+
71+ URL found = DdiPdfExportUtil .class .getResource ("messages_" + lang + ".properties.xml" );
72+
73+ if (found != null ) {
74+ return lang ;
75+ } else {
76+ return null ;
77+ }
78+ }
79+
80+ private static TitleAndDescription getTitleAndDescription (InputStream datafile ) {
81+
82+ TitleAndDescription titleAndDescription = new TitleAndDescription ();
83+ String lang = null ;
84+ try {
85+ DocumentBuilderFactory dbf = DocumentBuilderFactory .newInstance ();
86+ DocumentBuilder builder = dbf .newDocumentBuilder ();
87+ Document doc = builder .parse (datafile );
88+ try {
89+ lang = doc .getDocumentElement ().getAttribute ("xml:lang" );
90+ } catch (DOMException e ) {
91+ lang = null ;
92+ logger .warn ("No language attribute" );
93+ }
94+ if (lang != null && !lang .equals ("" ) ) {
95+ titleAndDescription .Language = lang ;
96+ } else {
97+ XPathFactory xPathfactory = XPathFactory .newInstance ();
98+ XPath xpath = xPathfactory .newXPath ();
99+ try {
100+ XPathExpression expr = xpath .compile ("/codeBook/stdyDscr/citation/titlStmt/titl/text()" );
101+ titleAndDescription .Title = (String ) expr .evaluate (doc , XPathConstants .STRING );
102+ expr = xpath .compile ("/codeBook/stdyDscr/stdyInfo/abstract/text()" );
103+ titleAndDescription .Description = (String ) expr .evaluate (doc , XPathConstants .STRING );
104+ } catch (XPathExpressionException e ) {
105+ logger .error ("Error finding title and description" );
106+ logger .error (e .getMessage ());
107+ }
108+ }
109+
110+ return titleAndDescription ;
111+ } catch (ParserConfigurationException | SAXException | IOException e ) {
112+ logger .warn (e .getMessage ());
113+ return null ;
114+ }
115+
116+ }
29117
30118 public static void datasetPdfDDI (InputStream datafile , OutputStream outputStream ) throws XMLStreamException {
31119 try {
120+ String localeEnvVar = "en" ; //default language
121+ ByteArrayOutputStream baos = new ByteArrayOutputStream ();
122+ datafile .transferTo (baos );
123+
124+ byte [] buffer = baos .toByteArray ();
125+ InputStream clone1 = new ByteArrayInputStream (buffer );
126+ InputStream clone2 = new ByteArrayInputStream (buffer );
127+
128+ TitleAndDescription td = getTitleAndDescription (clone1 );
129+ if (td != null ) {
130+ if (td .Language != null ) {
131+ localeEnvVar = td .Language ;
132+ } else {
133+ String lang = detectLanguage (td );
134+ if (lang != null && !lang .equals ("" )) {
135+ localeEnvVar = lang ;
136+ }
137+ }
138+ }
139+
32140 InputStream styleSheetInput = DdiPdfExportUtil .class .getResourceAsStream ("ddi-to-fo.xsl" );
33141
34142 final FopFactory fopFactory = FopFactory .newInstance (new File ("." ).toURI ());
@@ -41,20 +149,11 @@ public static void datasetPdfDDI(InputStream datafile, OutputStream outputStream
41149 Source mySrc = new StreamSource (styleSheetInput );
42150 factory .setURIResolver (new FileResolver ());
43151 Transformer transformer = factory .newTransformer (mySrc );
44-
45- // Set the value of a <param> in the stylesheet
46- String localeEnvVar = System .getenv ().get ("LANG" );
47- if (localeEnvVar != null ) {
48- if (localeEnvVar .indexOf ('.' ) > 0 ) {
49- localeEnvVar = localeEnvVar .substring (0 , localeEnvVar .indexOf ('.' ));
50- }
51- } else {
52- localeEnvVar = "en" ;
53- }
152+
54153 transformer .setParameter ("language-code" , localeEnvVar );
55154
56155 // Setup input for XSLT transformation
57- Source src = new StreamSource (datafile );
156+ Source src = new StreamSource (clone2 );
58157
59158 // Resulting SAX events (the generated FO) must be piped through to FOP
60159 Result res = new SAXResult (fop .getDefaultHandler ());
0 commit comments