Skip to content

Commit a1a93e5

Browse files
committed
Dependency update
1 parent 154a4a5 commit a1a93e5

File tree

5 files changed

+88
-23
lines changed

5 files changed

+88
-23
lines changed

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,9 @@
33
# Mobile Tools for Java (J2ME)
44
.mtj.tmp/
55

6+
.idea
7+
*.iml
8+
69
# Package Files #
710
*.jar
811
*.war

pom.xml

Lines changed: 18 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -44,8 +44,8 @@
4444
<scala.version>2.11</scala.version>
4545
<slf4j.version>1.7.20</slf4j.version>
4646
<log4j.version>2.13.3</log4j.version>
47-
<biojava.version>5.0.0-alpha6</biojava.version>
48-
<mmtf-spark.version>0.0.6</mmtf-spark.version>
47+
<biojava.version>6.0.0-alpha4</biojava.version>
48+
<mmtf-spark.version>0.0.8</mmtf-spark.version>
4949
<additionalparam>-Xdoclint:none</additionalparam>
5050
</properties>
5151

@@ -90,17 +90,27 @@
9090
<artifactId>argparse4j</artifactId>
9191
<version>0.7.0</version>
9292
</dependency>
93-
<!-- for CSV support -->
94-
<dependency>
95-
<groupId>com.databricks</groupId>
96-
<artifactId>spark-csv_${scala.version}</artifactId>
97-
<version>1.2.0</version>
98-
</dependency>
93+
9994
<dependency>
10095
<groupId>org.biojava</groupId>
10196
<artifactId>biojava-structure</artifactId>
10297
<version>${biojava.version}</version>
10398
</dependency>
99+
<dependency>
100+
<groupId>org.rcsb</groupId>
101+
<artifactId>mmtf-api</artifactId>
102+
<version>1.0.10</version>
103+
</dependency>
104+
<dependency>
105+
<groupId>org.rcsb</groupId>
106+
<artifactId>mmtf-codec</artifactId>
107+
<version>1.0.10</version>
108+
</dependency>
109+
<dependency>
110+
<groupId>org.rcsb</groupId>
111+
<artifactId>mmtf-serialization</artifactId>
112+
<version>1.0.10</version>
113+
</dependency>
104114
<dependency>
105115
<groupId>org.rcsb</groupId>
106116
<artifactId>mmtf-spark</artifactId>

src/main/java/org/biojava/spark/mappers/MapperUtils.java

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
import org.biojava.nbio.structure.StructureImpl;
1717
import org.biojava.nbio.structure.io.mmtf.MmtfStructureReader;
1818
import org.biojava.nbio.structure.io.mmtf.MmtfStructureWriter;
19-
import org.biojava.nbio.structure.io.mmtf.MmtfUtils;
19+
import org.biojava.spark.utils.BiojavaSparkUtils;
2020
import org.rcsb.mmtf.dataholders.MmtfStructure;
2121
import org.rcsb.mmtf.decoder.GenericDecoder;
2222
import org.rcsb.mmtf.decoder.StructureDataToAdapter;
@@ -66,7 +66,7 @@ public static Structure byteArrToBiojavaStruct(String pdbCodePlus, byte[] inputB
6666
* @return a {@link JavaPairRDD} with key {@link Text} and value {@link BytesWritable}
6767
*/
6868
public static JavaPairRDD<Text, BytesWritable> generateRdd(List<String> inputList, String ccdUrl) {
69-
MmtfUtils.setUpBioJava(ccdUrl);
69+
BiojavaSparkUtils.setUpBioJava(ccdUrl);
7070
return SparkUtils.getSparkContext().parallelize(inputList)
7171
.mapToPair(t -> MapperUtils.getByteArray(t))
7272
.mapToPair(t -> new Tuple2<String,byte[]>(t._1, WriterUtils.gzipCompress(t._2)))
@@ -100,7 +100,7 @@ public static Tuple2<String,byte[]> getByteArray(String pdbId, String producer)
100100
return new Tuple2<String,byte[]>(structure.getPDBCode(), outByteArr);
101101
}
102102

103-
private static byte[] produceByteArray(Structure structure, String mmtfProducer) {
103+
private static byte[] produceByteArray(Structure structure, String mmtfProducer) throws IOException {
104104
MmtfStructure mmtfStructure = encodeStructure(structure);
105105
mmtfStructure.setMmtfProducer(mmtfProducer);
106106
ByteArrayOutputStream bos = new ByteArrayOutputStream();
@@ -114,7 +114,7 @@ private static MmtfStructure encodeStructure(Structure structure) {
114114
MmtfStructure mmtfStructure = new GenericEncoder(inflatorToGet).getMmtfEncodedStructure();
115115
return mmtfStructure;
116116
}
117-
private static Structure getFomByteArray(byte[] inputByteArr) {
117+
private static Structure getFomByteArray(byte[] inputByteArr) throws IOException {
118118
MmtfStructureReader mmtfStructureReader = new MmtfStructureReader();
119119
new StructureDataToAdapter(new GenericDecoder(new MessagePackSerialization().deserialize(new ByteArrayInputStream(inputByteArr))), mmtfStructureReader);
120120
return mmtfStructureReader.getStructure();

src/main/java/org/biojava/spark/utils/BiojavaSparkUtils.java

Lines changed: 60 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -38,13 +38,18 @@
3838
import org.biojava.nbio.structure.ResidueNumber;
3939
import org.biojava.nbio.structure.Structure;
4040
import org.biojava.nbio.structure.StructureIO;
41+
import org.biojava.nbio.structure.align.util.AtomCache;
42+
import org.biojava.nbio.structure.chem.ChemCompGroupFactory;
43+
import org.biojava.nbio.structure.chem.DownloadChemCompProvider;
4144
import org.biojava.nbio.structure.contact.AtomContact;
4245
import org.biojava.nbio.structure.contact.AtomContactSet;
4346
import org.biojava.nbio.structure.contact.Grid;
4447
import org.biojava.nbio.structure.contact.Pair;
45-
import org.biojava.nbio.structure.io.mmcif.SimpleMMcifConsumer;
46-
import org.biojava.nbio.structure.io.mmcif.SimpleMMcifParser;
47-
import org.biojava.nbio.structure.io.mmcif.model.ChemComp;
48+
import org.biojava.nbio.structure.io.FileParsingParameters;
49+
import org.biojava.nbio.structure.io.LocalPDBDirectory;
50+
import org.biojava.nbio.structure.io.StructureFiletype;
51+
import org.biojava.nbio.structure.io.cif.CifStructureConverter;
52+
import org.biojava.nbio.structure.chem.ChemComp;
4853
import org.biojava.nbio.structure.io.mmtf.MmtfStructureReader;
4954
import org.biojava.nbio.structure.io.mmtf.MmtfStructureWriter;
5055
import org.biojava.nbio.structure.io.mmtf.MmtfUtils;
@@ -434,11 +439,7 @@ public static StructureDataInterface convertToStructDataInt(Structure structure)
434439
* @throws IOException
435440
*/
436441
private static Structure getStructureFromMmmCifText(byte[] inputText) throws IOException {
437-
SimpleMMcifConsumer simpleMMcifConsumer = new SimpleMMcifConsumer();
438-
SimpleMMcifParser simpleMMcifParser = new SimpleMMcifParser();
439-
simpleMMcifParser.addMMcifConsumer(simpleMMcifConsumer);
440-
simpleMMcifParser.parse(new ByteArrayInputStream(inputText));
441-
return simpleMMcifConsumer.getStructure();
442+
return CifStructureConverter.fromInputStream(new ByteArrayInputStream(inputText));
442443
}
443444

444445
/**
@@ -477,7 +478,7 @@ public static String getTypeFromChainId(StructureDataInterface structureDataInte
477478
*/
478479
public static void writeToFile(List<String> pdbCodeList, String uri, String producer) {
479480
JavaSparkContext javaSparkContext = SparkUtils.getSparkContext();
480-
MmtfUtils.setUpBioJava();
481+
setUpBioJava();
481482
JavaPairRDD<Text, BytesWritable> distData =
482483
javaSparkContext.parallelize(pdbCodeList)
483484
.mapToPair(new PdbIdToMmtf(producer))
@@ -486,4 +487,54 @@ public static void writeToFile(List<String> pdbCodeList, String uri, String prod
486487
distData.saveAsHadoopFile(uri, Text.class, BytesWritable.class, SequenceFileOutputFormat.class);
487488
javaSparkContext.close();
488489
}
490+
491+
/**
492+
* Set up the configuration parameters for BioJava.
493+
*/
494+
public static AtomCache setUpBioJava() {
495+
// Set up the atom cache etc
496+
AtomCache cache = new AtomCache();
497+
cache.setFiletype(StructureFiletype.CIF);
498+
499+
// important: we want always to get the mmCIF file from server (sandbox). BioJava default behaviour would read from local cache and we don't want that for updated entries
500+
cache.setFetchBehavior(LocalPDBDirectory.FetchBehavior.FORCE_DOWNLOAD);
501+
502+
FileParsingParameters params = cache.getFileParsingParams();
503+
params.setCreateAtomBonds(true);
504+
params.setAlignSeqRes(true);
505+
params.setParseBioAssembly(true);
506+
DownloadChemCompProvider cc = new DownloadChemCompProvider();
507+
ChemCompGroupFactory.setChemCompProvider(cc);
508+
cc.checkDoFirstInstall();
509+
cache.setFileParsingParams(params);
510+
StructureIO.setAtomCache(cache);
511+
return cache;
512+
}
513+
514+
/**
515+
* Set up the configuration parameters for BioJava.
516+
* @param ccBaseUrl base URL for chemcomp files (in sandbox layout .../H/HEM/HEM.cif) from which chem comp cif files
517+
* will be read
518+
*/
519+
public static AtomCache setUpBioJava(String ccBaseUrl) {
520+
// Set up the atom cache etc
521+
AtomCache cache = new AtomCache();
522+
cache.setFiletype(StructureFiletype.CIF);
523+
524+
// important: we want always to get the mmCIF file from server (sandbox). BioJava default behaviour would read from local cache and we don't want that for updated entries
525+
cache.setFetchBehavior(LocalPDBDirectory.FetchBehavior.FORCE_DOWNLOAD);
526+
527+
FileParsingParameters params = cache.getFileParsingParams();
528+
params.setCreateAtomBonds(true);
529+
params.setAlignSeqRes(true);
530+
params.setParseBioAssembly(true);
531+
DownloadChemCompProvider.serverBaseUrl = ccBaseUrl;
532+
DownloadChemCompProvider.useDefaultUrlLayout = false;
533+
DownloadChemCompProvider cc = new DownloadChemCompProvider();
534+
ChemCompGroupFactory.setChemCompProvider(cc);
535+
cc.checkDoFirstInstall();
536+
cache.setFileParsingParams(params);
537+
StructureIO.setAtomCache(cache);
538+
return cache;
539+
}
489540
}

src/test/java/org/biojava/spark/utils/TestEntryPoint.java

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@
22

33
import org.apache.spark.api.java.JavaSparkContext;
44
import org.junit.Test;
5+
import org.rcsb.mmtf.spark.utils.SparkUtils;
6+
57
/**
68
* A basic test of {@link EntryPoint} class.
79
* @author Anthony Bradley
@@ -14,8 +16,7 @@ public class TestEntryPoint {
1416
*/
1517
@Test
1618
public void testBasic(){
17-
EntryPoint entryPoint = new EntryPoint();
18-
JavaSparkContext sparkCont = entryPoint.getSparkUtils().getSparkContext();
19+
JavaSparkContext sparkCont = SparkUtils.getSparkContext();
1920
System.out.println(sparkCont);
2021
}
2122

0 commit comments

Comments
 (0)