11package fr .insee .genesis .domain .utils ;
22
3+ import fr .insee .genesis .exceptions .GenesisException ;
34import lombok .experimental .UtilityClass ;
45
5- import java .io .FileNotFoundException ;
6- import java .io .FileReader ;
7- import java .io .FileWriter ;
8- import java .io .IOException ;
9- import java .util .ArrayList ;
10- import java .util .List ;
11-
126import javax .xml .XMLConstants ;
137import javax .xml .stream .XMLEventFactory ;
148import javax .xml .stream .XMLEventReader ;
2014import javax .xml .stream .events .StartDocument ;
2115import javax .xml .stream .events .StartElement ;
2216import javax .xml .stream .events .XMLEvent ;
17+ import java .io .FileReader ;
18+ import java .io .FileWriter ;
19+ import java .io .IOException ;
20+ import java .util .ArrayList ;
21+ import java .util .List ;
2322
2423@ UtilityClass
2524public class XMLSplitter {
2625
2726 // We use StAX in this class to deal with memory issues on huge XML files
28- public static void split (String inputfolder , String xmlfile , String outputFolder , String condition , int nbElementsByFile ) throws XMLStreamException , IOException {
27+ public static void split (String inputfolder , String xmlfile , String outputFolder , String condition , int nbElementsByFile ) throws XMLStreamException , IOException , GenesisException {
2928
3029 String xmlResource = inputfolder + xmlfile ;
3130 List <XMLEvent > header = getHeader (xmlResource , condition );
@@ -34,42 +33,45 @@ public static void split(String inputfolder, String xmlfile, String outputFolder
3433 XMLInputFactory xif = XMLInputFactory .newInstance ();
3534 xif .setProperty (XMLConstants .ACCESS_EXTERNAL_DTD , "" );
3635 xif .setProperty (XMLConstants .ACCESS_EXTERNAL_SCHEMA , "" );
37- XMLEventReader xer = xif .createXMLEventReader (new FileReader (xmlResource ));
38- StartElement rootStartElement = xer .nextTag ().asStartElement ();
39- StartDocument startDocument = xef .createStartDocument ();
40- EndDocument endDocument = xef .createEndDocument ();
41-
42- XMLOutputFactory xof = XMLOutputFactory .newFactory ();
43- int fileCount = 1 ;
44- while (xer .hasNext () && !xer .peek ().isEndDocument ()) {
45- XMLEvent xmlEvent = xer .nextEvent ();
46-
47- if (isStartElementWithName (condition , xmlEvent )) {
48- // Create a file for the fragment, the name is derived from the value of the id attribute
49- FileWriter fileWriter = new FileWriter (outputFolder + "split" + fileCount + ".xml" );
50-
51- // A StAX XMLEventWriter will be used to write the XML fragment
52- XMLEventWriter xew = xof .createXMLEventWriter (fileWriter );
53- xew .add (startDocument );
54-
55- // Add the elements which are common to all split files
56- addHeadersToNewFile (header , xew );
57-
58- // Write the XMLEvents that are part of SurveyUnit element
59- xew .add (xmlEvent );
60- xmlEvent = xer .nextEvent ();
61- int nbResponses = 1 ;
62- // We loop until we reach the end tag Survey units indicating the near end of the document
63- iterateOnSurveyUnits (condition , nbElementsByFile , xer , xmlEvent , xew , nbResponses );
64-
65- // Write the file, close everything we opened and update the file's counter
66- xew .add (xef .createEndElement (rootStartElement .getName (), null ));
67- xew .add (endDocument );
68- fileWriter .close ();
69-
70- fileCount ++;
71-
36+ try (FileReader fr = new FileReader (xmlResource )){
37+ XMLEventReader xer = xif .createXMLEventReader (fr );
38+
39+ StartElement rootStartElement = xer .nextTag ().asStartElement ();
40+ StartDocument startDocument = xef .createStartDocument ();
41+ EndDocument endDocument = xef .createEndDocument ();
42+
43+ XMLOutputFactory xof = XMLOutputFactory .newFactory ();
44+ int fileCount = 1 ;
45+ while (xer .hasNext () && !xer .peek ().isEndDocument ()) {
46+ XMLEvent xmlEvent = xer .nextEvent ();
47+
48+ if (isStartElementWithName (condition , xmlEvent )) {
49+ // Create a file for the fragment, the name is derived from the value of the id attribute
50+ FileWriter fileWriter = new FileWriter (outputFolder + "split" + fileCount + ".xml" );
51+
52+ // A StAX XMLEventWriter will be used to write the XML fragment
53+ XMLEventWriter xew = xof .createXMLEventWriter (fileWriter );
54+ xew .add (startDocument );
55+
56+ // Add the elements which are common to all split files
57+ addHeadersToNewFile (header , xew );
58+
59+ // Write the XMLEvents that are part of SurveyUnit element
60+ xew .add (xmlEvent );
61+ xmlEvent = xer .nextEvent ();
62+ int nbResponses = 1 ;
63+ // We loop until we reach the end tag Survey units indicating the near end of the document
64+ iterateOnSurveyUnits (condition , nbElementsByFile , xer , xmlEvent , xew , nbResponses );
65+
66+ // Write the file, close everything we opened and update the file's counter
67+ xew .add (xef .createEndElement (rootStartElement .getName (), null ));
68+ xew .add (endDocument );
69+ fileWriter .close ();
70+
71+ fileCount ++;
72+ }
7273 }
74+ xer .close ();
7375 }
7476 }
7577
@@ -103,33 +105,37 @@ private static boolean isEndElementWithName(XMLEvent xmlEvent, String condition)
103105 return xmlEvent .isEndElement () && xmlEvent .asEndElement ().getName ().getLocalPart ().equals (condition );
104106 }
105107
106- private static List <XMLEvent > getHeader (String xmlResource , String condition ) throws FileNotFoundException , XMLStreamException {
108+ private static List <XMLEvent > getHeader (String xmlResource , String condition ) throws XMLStreamException , GenesisException {
107109 XMLInputFactory xif = XMLInputFactory .newInstance ();
108110 xif .setProperty (XMLConstants .ACCESS_EXTERNAL_DTD , "" );
109111 xif .setProperty (XMLConstants .ACCESS_EXTERNAL_SCHEMA , "" );
110- XMLEventReader xer = xif .createXMLEventReader (new FileReader (xmlResource ));
111-
112- List <XMLEvent > cachedXMLEvents = new ArrayList <>();
113- while (xer .hasNext () && !xer .peek ().isEndDocument ()) {
114- XMLEvent xmlEvent = xer .nextTag ();
115- if (!xmlEvent .isStartElement ()) {
116- break ;
117- }
118- StartElement breakStartElement = xmlEvent .asStartElement ();
119-
120- cachedXMLEvents .add (breakStartElement );
121- xmlEvent = xer .nextEvent ();
122- while (!(xmlEvent .isEndElement () && xmlEvent .asEndElement ().getName ().equals (breakStartElement .getName ()))) {
123- if (isStartElementWithName (condition , xmlEvent )) {
124- xer .close ();
125- return cachedXMLEvents ;
126- }
127- cachedXMLEvents .add (xmlEvent );
128- xmlEvent = xer .nextEvent ();
129- }
130- }
131- xer .close ();
132- return List .of ();
112+ try (FileReader fr = new FileReader (xmlResource )) {
113+ XMLEventReader xer = xif .createXMLEventReader (fr );
114+
115+ List <XMLEvent > cachedXMLEvents = new ArrayList <>();
116+ while (xer .hasNext () && !xer .peek ().isEndDocument ()) {
117+ XMLEvent xmlEvent = xer .nextTag ();
118+ if (!xmlEvent .isStartElement ()) {
119+ break ;
120+ }
121+ StartElement breakStartElement = xmlEvent .asStartElement ();
122+
123+ cachedXMLEvents .add (breakStartElement );
124+ xmlEvent = xer .nextEvent ();
125+ while (!(xmlEvent .isEndElement () && xmlEvent .asEndElement ().getName ().equals (breakStartElement .getName ()))) {
126+ if (isStartElementWithName (condition , xmlEvent )) {
127+ xer .close ();
128+ return cachedXMLEvents ;
129+ }
130+ cachedXMLEvents .add (xmlEvent );
131+ xmlEvent = xer .nextEvent ();
132+ }
133+ }
134+ xer .close ();
135+ } catch (IOException e ) {
136+ throw new GenesisException (500 ,e .getMessage ());
137+ }
138+ return List .of ();
133139 }
134140
135141}
0 commit comments