Skip to content

Commit a5d63a1

Browse files
committed
Added more documentation, some minor IFD tags, removed test for LZW
1 parent b83dd80 commit a5d63a1

File tree

14 files changed

+74
-33
lines changed

14 files changed

+74
-33
lines changed

src/main/java/org/apache/sysds/runtime/io/ReaderCOG.java

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,9 @@ private MatrixBlock readCOG(BufferedInputStream bis, long estnnz) throws IOExcep
5959
// TODO: Currently only reads the first image which is the full resolution image
6060
// In the future, this could be extended to read the overviews as well
6161
// But keep in mind that we are only returning a single MatrixBlock, so there needs to be some special handling
62+
// TODO: Is the metadata (e.g. the coordinates) necessary in SystemDS? Currently not possible as we only return a MatrixBlock
63+
// However, this could possibly be changed in the future to somehow also store relevant metadata if desired.
64+
// Currently this implementation reads the most important data from the header.
6265
COGProperties cogP = new COGProperties(cogHeader.getIFD());
6366

6467
// number of tiles for Width and Length
@@ -97,8 +100,6 @@ private MatrixBlock readCOG(BufferedInputStream bis, long estnnz) throws IOExcep
97100
byteReader.reset();
98101
}
99102

100-
// TODO: If the tile is compressed, decompress the currentTileData here
101-
102103
if (cogP.getCompression() == 8) {
103104
currentTileData = COGCompressionUtils.decompressDeflate(currentTileData);
104105
}

src/main/java/org/apache/sysds/runtime/io/cog/COGByteReader.java

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,11 @@
55
import java.io.BufferedInputStream;
66
import java.io.IOException;
77

8+
/**
9+
* This class is used by the COGReader to read bytes from a BufferedInputStream.
10+
* It is wrapper that keeps track of the bytes read and can therefore be used to
11+
* easily go to specific offsets.
12+
*/
813
public class COGByteReader {
914
private long totalBytesRead;
1015
private BufferedInputStream bis;
@@ -45,18 +50,38 @@ public byte[] readBytes(int length) {
4550
return header;
4651
}
4752

53+
/**
54+
* Reads a given number of bytes from the BufferedInputStream.
55+
* Increments the totalBytesRead counter by the number of bytes read.
56+
* @param length
57+
* @return
58+
*/
4859
public byte[] readBytes(long length) {
60+
// TODO: When properly implementing BigTIFF, this could be a problem when not being able to skip bytes
61+
// In BigTIFF the offset can be larger than maxInt which isn't a problem for skipping bytes
62+
// but could be a problem when the tiles are not sequential in the file and we need to jump back
63+
// to a previous position (where we can't use skip).
4964
if (length > Integer.MAX_VALUE) {
5065
throw new DMLRuntimeException("Cannot read more than Integer.MAX_VALUE bytes at once");
5166
}
5267
return readBytes((int) length);
5368
}
5469

70+
/**
71+
* Offers the same functionality as BufferedInputStream.mark.
72+
* Allows for returning to a previous point if the readlimit is not exceeded.
73+
* @param readlimit
74+
*/
5575
public void mark(long readlimit) {
5676
this.readlimit = readlimit;
5777
bis.mark((int) readlimit + 1);
5878
}
5979

80+
/**
81+
* Offers the same functionality as BufferedInputStream.reset.
82+
* Resets the stream to the last marked position.
83+
* @throws DMLRuntimeException
84+
*/
6085
public void reset() throws DMLRuntimeException {
6186
try {
6287
bis.reset();
@@ -66,6 +91,12 @@ public void reset() throws DMLRuntimeException {
6691
}
6792
}
6893

94+
/**
95+
* Skips a given number of bytes without reading them.
96+
* Useful for jumping to specific offsets
97+
* @param n Number of bytes to skip
98+
* @throws DMLRuntimeException
99+
*/
69100
public void skipBytes(long n) throws DMLRuntimeException {
70101
try {
71102
long skipped = bis.skip(n);

src/main/java/org/apache/sysds/runtime/io/cog/COGCompressionUtils.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ public class COGCompressionUtils {
1414
* @throws DMLRuntimeException
1515
*/
1616
public static byte[] decompressDeflate(byte[] compressedData) throws DMLRuntimeException {
17+
// Use the native Java implementation of deflate to decompress the data
1718
Inflater inflater = new Inflater();
1819
inflater.setInput(compressedData);
1920

src/main/java/org/apache/sysds/runtime/io/cog/COGHeader.java

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,12 +2,13 @@
22

33
import org.apache.sysds.runtime.DMLRuntimeException;
44

5-
import java.io.BufferedInputStream;
6-
import java.io.IOException;
75
import java.util.ArrayList;
86
import java.nio.ByteBuffer;
97
import java.nio.ByteOrder;
108

9+
/**
10+
* Represents a header for a COG file. This includes IFDs, endianess etc.
11+
*/
1112
public class COGHeader {
1213
private boolean isLittleEndian;
1314
private String GDALMetadata;
@@ -137,6 +138,12 @@ public Number parseByteArray(byte[] bytes, int length, int offset, boolean isDec
137138
}
138139
}
139140

141+
/**
142+
* Prepares the COG header by reading the first 4 bytes and determining the byte order.
143+
* Needs to be called before anything else is done with the COG header.
144+
* @param byteReader
145+
* @return
146+
*/
140147
private static COGHeader prepareHeader(COGByteReader byteReader) {
141148
// Read first 4 bytes to determine byte order and make sure it is a valid TIFF
142149
byte[] header = byteReader.readBytes(4);
@@ -388,6 +395,8 @@ public static String isCompatible(IFDTag[] IFD) {
388395
break;
389396
case Compression:
390397
// After implementing additional decompression methods, this can be extended
398+
// TODO: LZW would be a great addition as it is widely used
399+
// Furthermore, JPEG support would also be a good addition
391400
// 1: none, 8: deflate
392401
if (tag.getData()[0].intValue() != 1 && tag.getData()[0].intValue() != 8) {
393402
return "Unsupported compression: " + tag.getData()[0];

src/main/java/org/apache/sysds/runtime/io/cog/COGProperties.java

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,9 @@
22

33
import java.util.Arrays;
44

5+
/**
6+
* Properties of a COG file that are useful for reading the file.
7+
*/
58
public class COGProperties {
69

710
private int rows;

src/main/java/org/apache/sysds/runtime/io/cog/IFDTag.java

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
11
package org.apache.sysds.runtime.io.cog;
22

3+
/**
4+
* Represents a single tag in the IFD of a TIFF file
5+
*/
36
public class IFDTag {
47
private IFDTagDictionary tagId;
58
private short dataType;

src/main/java/org/apache/sysds/runtime/io/cog/IFDTagDictionary.java

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
11
package org.apache.sysds.runtime.io.cog;
22

3+
/**
4+
* Enum for mapping IFD tag values to their corresponding tag names
5+
*/
36
public enum IFDTagDictionary {
47
Unknown(-1),
58
// Right now we will only support baseline TIFF
@@ -8,7 +11,6 @@ public enum IFDTagDictionary {
811
ImageWidth(256),
912
ImageLength(257),
1013
BitsPerSample(258),
11-
// TODO: LZW Compression
1214
Compression(259),
1315
PhotometricInterpretation(262),
1416
Threshholding(263),
@@ -53,9 +55,13 @@ public enum IFDTagDictionary {
5355
TileLength(323),
5456
TileOffsets(324),
5557
TileByteCounts(325),
56-
// TODO: Support nodata values
57-
GDALNoData(42113);
58-
58+
GDALNoData(42113),
59+
GeoKeyDirectoryTag(34735),
60+
GeoDoubleParamsTag(34736),
61+
GeoAsciiParamsTag(34737),
62+
ModelPixelScaleTag(33550),
63+
ModelTiepointTag(33922),
64+
ModelTransformationTag(34264);
5965

6066

6167
private final int value;

src/main/java/org/apache/sysds/runtime/io/cog/SampleFormatDataTypes.java

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
11
package org.apache.sysds.runtime.io.cog;
22

3+
/**
4+
* Enum for mapping sample formats of TIFF image data to names
5+
*/
36
public enum SampleFormatDataTypes {
47
UNSIGNED_INTEGER(1),
58
SIGNED_INTEGER(2),

src/main/java/org/apache/sysds/runtime/io/cog/TIFFDataTypes.java

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
11
package org.apache.sysds.runtime.io.cog;
22

3+
/**
4+
* Enum for mapping data types of IFD tags in TIFF to readable names
5+
*/
36
public enum TIFFDataTypes {
47
BYTE(1),
58
ASCII(2),

src/test/java/org/apache/sysds/test/functions/io/cog/ReadCOGTestNTilesNBandsUInt16PC1ComLZWRect.java

Lines changed: 0 additions & 24 deletions
This file was deleted.

0 commit comments

Comments
 (0)