3838import org .biojava .nbio .structure .ResidueNumber ;
3939import org .biojava .nbio .structure .Structure ;
4040import org .biojava .nbio .structure .StructureIO ;
41+ import org .biojava .nbio .structure .align .util .AtomCache ;
42+ import org .biojava .nbio .structure .chem .ChemCompGroupFactory ;
43+ import org .biojava .nbio .structure .chem .DownloadChemCompProvider ;
4144import org .biojava .nbio .structure .contact .AtomContact ;
4245import org .biojava .nbio .structure .contact .AtomContactSet ;
4346import org .biojava .nbio .structure .contact .Grid ;
4447import org .biojava .nbio .structure .contact .Pair ;
45- import org .biojava .nbio .structure .io .mmcif .SimpleMMcifConsumer ;
46- import org .biojava .nbio .structure .io .mmcif .SimpleMMcifParser ;
47- import org .biojava .nbio .structure .io .mmcif .model .ChemComp ;
48+ import org .biojava .nbio .structure .io .FileParsingParameters ;
49+ import org .biojava .nbio .structure .io .LocalPDBDirectory ;
50+ import org .biojava .nbio .structure .io .StructureFiletype ;
51+ import org .biojava .nbio .structure .io .cif .CifStructureConverter ;
52+ import org .biojava .nbio .structure .chem .ChemComp ;
4853import org .biojava .nbio .structure .io .mmtf .MmtfStructureReader ;
4954import org .biojava .nbio .structure .io .mmtf .MmtfStructureWriter ;
5055import org .biojava .nbio .structure .io .mmtf .MmtfUtils ;
@@ -434,11 +439,7 @@ public static StructureDataInterface convertToStructDataInt(Structure structure)
434439 * @throws IOException
435440 */
436441 private static Structure getStructureFromMmmCifText (byte [] inputText ) throws IOException {
437- SimpleMMcifConsumer simpleMMcifConsumer = new SimpleMMcifConsumer ();
438- SimpleMMcifParser simpleMMcifParser = new SimpleMMcifParser ();
439- simpleMMcifParser .addMMcifConsumer (simpleMMcifConsumer );
440- simpleMMcifParser .parse (new ByteArrayInputStream (inputText ));
441- return simpleMMcifConsumer .getStructure ();
442+ return CifStructureConverter .fromInputStream (new ByteArrayInputStream (inputText ));
442443 }
443444
444445 /**
@@ -477,7 +478,7 @@ public static String getTypeFromChainId(StructureDataInterface structureDataInte
477478 */
478479 public static void writeToFile (List <String > pdbCodeList , String uri , String producer ) {
479480 JavaSparkContext javaSparkContext = SparkUtils .getSparkContext ();
480- MmtfUtils . setUpBioJava ();
481+ setUpBioJava ();
481482 JavaPairRDD <Text , BytesWritable > distData =
482483 javaSparkContext .parallelize (pdbCodeList )
483484 .mapToPair (new PdbIdToMmtf (producer ))
@@ -486,4 +487,54 @@ public static void writeToFile(List<String> pdbCodeList, String uri, String prod
486487 distData .saveAsHadoopFile (uri , Text .class , BytesWritable .class , SequenceFileOutputFormat .class );
487488 javaSparkContext .close ();
488489 }
490+
491+ /**
492+ * Set up the configuration parameters for BioJava.
493+ */
494+ public static AtomCache setUpBioJava () {
495+ // Set up the atom cache etc
496+ AtomCache cache = new AtomCache ();
497+ cache .setFiletype (StructureFiletype .CIF );
498+
499+ // important: we want always to get the mmCIF file from server (sandbox). BioJava default behaviour would read from local cache and we don't want that for updated entries
500+ cache .setFetchBehavior (LocalPDBDirectory .FetchBehavior .FORCE_DOWNLOAD );
501+
502+ FileParsingParameters params = cache .getFileParsingParams ();
503+ params .setCreateAtomBonds (true );
504+ params .setAlignSeqRes (true );
505+ params .setParseBioAssembly (true );
506+ DownloadChemCompProvider cc = new DownloadChemCompProvider ();
507+ ChemCompGroupFactory .setChemCompProvider (cc );
508+ cc .checkDoFirstInstall ();
509+ cache .setFileParsingParams (params );
510+ StructureIO .setAtomCache (cache );
511+ return cache ;
512+ }
513+
514+ /**
515+ * Set up the configuration parameters for BioJava.
516+ * @param ccBaseUrl base URL for chemcomp files (in sandbox layout .../H/HEM/HEM.cif) from which chem comp cif files
517+ * will be read
518+ */
519+ public static AtomCache setUpBioJava (String ccBaseUrl ) {
520+ // Set up the atom cache etc
521+ AtomCache cache = new AtomCache ();
522+ cache .setFiletype (StructureFiletype .CIF );
523+
524+ // important: we want always to get the mmCIF file from server (sandbox). BioJava default behaviour would read from local cache and we don't want that for updated entries
525+ cache .setFetchBehavior (LocalPDBDirectory .FetchBehavior .FORCE_DOWNLOAD );
526+
527+ FileParsingParameters params = cache .getFileParsingParams ();
528+ params .setCreateAtomBonds (true );
529+ params .setAlignSeqRes (true );
530+ params .setParseBioAssembly (true );
531+ DownloadChemCompProvider .serverBaseUrl = ccBaseUrl ;
532+ DownloadChemCompProvider .useDefaultUrlLayout = false ;
533+ DownloadChemCompProvider cc = new DownloadChemCompProvider ();
534+ ChemCompGroupFactory .setChemCompProvider (cc );
535+ cc .checkDoFirstInstall ();
536+ cache .setFileParsingParams (params );
537+ StructureIO .setAtomCache (cache );
538+ return cache ;
539+ }
489540}
0 commit comments