Skip to content

Commit c99b22f

Browse files
authored
Merge pull request #289 from ipinfo/large-db-support
Add support for files larger than 2GB
2 parents f77fca0 + ba92425 commit c99b22f

File tree

15 files changed

+1256
-181
lines changed

15 files changed

+1256
-181
lines changed
Lines changed: 117 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,117 @@
1+
package com.maxmind.db;
2+
3+
import java.io.IOException;
4+
import java.nio.channels.FileChannel;
5+
import java.nio.charset.CharacterCodingException;
6+
import java.nio.charset.CharsetDecoder;
7+
8+
/**
9+
* A generic buffer abstraction that supports sequential and random access
10+
* to binary data. Implementations may be backed by a single {@link
11+
* java.nio.ByteBuffer} or multiple buffers for larger capacities.
12+
*
13+
* <p>This interface is designed to provide a long-based API while
14+
* remaining compatible with the limitations of underlying storage.
15+
*/
16+
interface Buffer {
17+
/**
18+
* Returns the total capacity of this buffer in bytes.
19+
*
20+
* @return the capacity
21+
*/
22+
long capacity();
23+
24+
/**
25+
* Returns the current position of this buffer.
26+
*
27+
* @return the position
28+
*/
29+
long position();
30+
31+
/**
32+
* Sets the buffer's position.
33+
*
34+
* @param newPosition the new position
35+
* @return this buffer
36+
*/
37+
Buffer position(long newPosition);
38+
39+
/**
40+
* Returns the current limit of this buffer.
41+
*
42+
* @return the limit
43+
*/
44+
long limit();
45+
46+
/**
47+
* Sets the buffer's limit.
48+
*
49+
* @param newLimit the new limit
50+
* @return this buffer
51+
*/
52+
Buffer limit(long newLimit);
53+
54+
/**
55+
* Reads the next byte at the current position and advances the position.
56+
*
57+
* @return the byte value
58+
*/
59+
byte get();
60+
61+
/**
62+
* Reads bytes into the given array and advances the position.
63+
*
64+
* @param dst the destination array
65+
* @return this buffer
66+
*/
67+
Buffer get(byte[] dst);
68+
69+
/**
70+
* Reads a byte at the given absolute index without changing the position.
71+
*
72+
* @param index the index to read from
73+
* @return the byte value
74+
*/
75+
byte get(long index);
76+
77+
/**
78+
* Reads the next 8 bytes as a double and advances the position.
79+
*
80+
* @return the double value
81+
*/
82+
double getDouble();
83+
84+
/**
85+
* Reads the next 4 bytes as a float and advances the position.
86+
*
87+
* @return the float value
88+
*/
89+
float getFloat();
90+
91+
/**
92+
* Creates a new buffer that shares the same content but has independent
93+
* position, limit, and mark values.
94+
*
95+
* @return a duplicate buffer
96+
*/
97+
Buffer duplicate();
98+
99+
/**
100+
* Reads data from the given channel into this buffer starting at the
101+
* current position.
102+
*
103+
* @param channel the file channel
104+
* @return the number of bytes read
105+
* @throws IOException if an I/O error occurs
106+
*/
107+
long readFrom(FileChannel channel) throws IOException;
108+
109+
/**
110+
* Decodes the buffer's content into a string using the given decoder.
111+
*
112+
* @param decoder the charset decoder
113+
* @return the decoded string
114+
* @throws CharacterCodingException if decoding fails
115+
*/
116+
String decode(CharsetDecoder decoder) throws CharacterCodingException;
117+
}

src/main/java/com/maxmind/db/BufferHolder.java

Lines changed: 55 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,34 +1,46 @@
11
package com.maxmind.db;
22

33
import com.maxmind.db.Reader.FileMode;
4-
import java.io.ByteArrayOutputStream;
54
import java.io.File;
65
import java.io.IOException;
76
import java.io.InputStream;
87
import java.io.RandomAccessFile;
98
import java.nio.ByteBuffer;
109
import java.nio.channels.FileChannel;
11-
import java.nio.channels.FileChannel.MapMode;
10+
import java.util.ArrayList;
11+
import java.util.List;
1212

1313
final class BufferHolder {
1414
// DO NOT PASS OUTSIDE THIS CLASS. Doing so will remove thread safety.
15-
private final ByteBuffer buffer;
15+
private final Buffer buffer;
1616

1717
BufferHolder(File database, FileMode mode) throws IOException {
18-
try (
19-
final RandomAccessFile file = new RandomAccessFile(database, "r");
20-
final FileChannel channel = file.getChannel()
21-
) {
18+
this(database, mode, MultiBuffer.DEFAULT_CHUNK_SIZE);
19+
}
20+
21+
BufferHolder(File database, FileMode mode, int chunkSize) throws IOException {
22+
try (RandomAccessFile file = new RandomAccessFile(database, "r");
23+
FileChannel channel = file.getChannel()) {
24+
long size = channel.size();
2225
if (mode == FileMode.MEMORY) {
23-
final ByteBuffer buf = ByteBuffer.wrap(new byte[(int) channel.size()]);
24-
if (channel.read(buf) != buf.capacity()) {
26+
Buffer buf;
27+
if (size <= chunkSize) {
28+
buf = new SingleBuffer(size);
29+
} else {
30+
buf = new MultiBuffer(size);
31+
}
32+
if (buf.readFrom(channel) != buf.capacity()) {
2533
throw new IOException("Unable to read "
26-
+ database.getName()
27-
+ " into memory. Unexpected end of stream.");
34+
+ database.getName()
35+
+ " into memory. Unexpected end of stream.");
2836
}
29-
this.buffer = buf.asReadOnlyBuffer();
37+
this.buffer = buf;
3038
} else {
31-
this.buffer = channel.map(MapMode.READ_ONLY, 0, channel.size()).asReadOnlyBuffer();
39+
if (size <= chunkSize) {
40+
this.buffer = SingleBuffer.mapFromChannel(channel);
41+
} else {
42+
this.buffer = MultiBuffer.mapFromChannel(channel);
43+
}
3244
}
3345
}
3446
}
@@ -41,23 +53,44 @@ final class BufferHolder {
4153
* @throws NullPointerException if you provide a NULL InputStream
4254
*/
4355
BufferHolder(InputStream stream) throws IOException {
56+
this(stream, MultiBuffer.DEFAULT_CHUNK_SIZE);
57+
}
58+
59+
BufferHolder(InputStream stream, int chunkSize) throws IOException {
4460
if (null == stream) {
4561
throw new NullPointerException("Unable to use a NULL InputStream");
4662
}
47-
final ByteArrayOutputStream baos = new ByteArrayOutputStream();
48-
final byte[] bytes = new byte[16 * 1024];
49-
int br;
50-
while (-1 != (br = stream.read(bytes))) {
51-
baos.write(bytes, 0, br);
63+
List<ByteBuffer> chunks = new ArrayList<>();
64+
long total = 0;
65+
byte[] tmp = new byte[chunkSize];
66+
int read;
67+
68+
while (-1 != (read = stream.read(tmp))) {
69+
ByteBuffer chunk = ByteBuffer.allocate(read);
70+
chunk.put(tmp, 0, read);
71+
chunk.flip();
72+
chunks.add(chunk);
73+
total += read;
74+
}
75+
76+
if (total <= chunkSize) {
77+
byte[] data = new byte[(int) total];
78+
int pos = 0;
79+
for (ByteBuffer chunk : chunks) {
80+
System.arraycopy(chunk.array(), 0, data, pos, chunk.capacity());
81+
pos += chunk.capacity();
82+
}
83+
this.buffer = SingleBuffer.wrap(data);
84+
} else {
85+
this.buffer = new MultiBuffer(chunks.toArray(new ByteBuffer[0]), chunkSize);
5286
}
53-
this.buffer = ByteBuffer.wrap(baos.toByteArray()).asReadOnlyBuffer();
5487
}
5588

5689
/*
57-
* Returns a duplicate of the underlying ByteBuffer. The returned ByteBuffer
90+
* Returns a duplicate of the underlying Buffer. The returned Buffer
5891
* should not be shared between threads.
5992
*/
60-
ByteBuffer get() {
93+
Buffer get() {
6194
// The Java API docs for buffer state:
6295
//
6396
// Buffers are not safe for use by multiple concurrent threads. If a buffer is to be
@@ -70,7 +103,7 @@ ByteBuffer get() {
70103
// * https://github.com/maxmind/MaxMind-DB-Reader-java/issues/65
71104
// * https://github.com/maxmind/MaxMind-DB-Reader-java/pull/69
72105
//
73-
// Given that we are not modifying the original ByteBuffer in any way and all currently
106+
// Given that we are not modifying the original Buffer in any way and all currently
74107
// known and most reasonably imaginable implementations of duplicate() only do read
75108
// operations on the original buffer object, the risk of not synchronizing this call seems
76109
// relatively low and worth taking for the performance benefit when lookups are being done

src/main/java/com/maxmind/db/CacheKey.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,5 +10,5 @@
1010
* @param cls the class of the value
1111
* @param type the type of the value
1212
*/
13-
public record CacheKey<T>(int offset, Class<T> cls, java.lang.reflect.Type type) {
13+
public record CacheKey<T>(long offset, Class<T> cls, java.lang.reflect.Type type) {
1414
}
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
package com.maxmind.db;
22

3-
record CtrlData(Type type, int ctrlByte, int offset, int size) {
3+
record CtrlData(Type type, int ctrlByte, long offset, int size) {
44
}

src/main/java/com/maxmind/db/Decoder.java

Lines changed: 21 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -34,11 +34,11 @@ class Decoder {
3434

3535
private final CharsetDecoder utfDecoder = UTF_8.newDecoder();
3636

37-
private final ByteBuffer buffer;
37+
private final Buffer buffer;
3838

3939
private final ConcurrentHashMap<Class<?>, CachedConstructor<?>> constructors;
4040

41-
Decoder(NodeCache cache, ByteBuffer buffer, long pointerBase) {
41+
Decoder(NodeCache cache, Buffer buffer, long pointerBase) {
4242
this(
4343
cache,
4444
buffer,
@@ -49,7 +49,7 @@ class Decoder {
4949

5050
Decoder(
5151
NodeCache cache,
52-
ByteBuffer buffer,
52+
Buffer buffer,
5353
long pointerBase,
5454
ConcurrentHashMap<Class<?>, CachedConstructor<?>> constructors
5555
) {
@@ -61,7 +61,7 @@ class Decoder {
6161

6262
private final NodeCache.Loader cacheLoader = this::decode;
6363

64-
<T> T decode(int offset, Class<T> cls) throws IOException {
64+
<T> T decode(long offset, Class<T> cls) throws IOException {
6565
if (offset >= this.buffer.capacity()) {
6666
throw new InvalidDatabaseException(
6767
"The MaxMind DB file's data section contains bad data: "
@@ -73,7 +73,7 @@ <T> T decode(int offset, Class<T> cls) throws IOException {
7373
}
7474

7575
private <T> DecodedValue decode(CacheKey<T> key) throws IOException {
76-
int offset = key.offset();
76+
long offset = key.offset();
7777
if (offset >= this.buffer.capacity()) {
7878
throw new InvalidDatabaseException(
7979
"The MaxMind DB file's data section contains bad data: "
@@ -132,8 +132,8 @@ private <T> DecodedValue decode(Class<T> cls, java.lang.reflect.Type genericType
132132

133133
DecodedValue decodePointer(long pointer, Class<?> cls, java.lang.reflect.Type genericType)
134134
throws IOException {
135-
int targetOffset = (int) pointer;
136-
int position = buffer.position();
135+
long targetOffset = pointer;
136+
long position = buffer.position();
137137

138138
CacheKey<?> key = new CacheKey<>(targetOffset, cls, genericType);
139139
DecodedValue o = cache.get(key, cacheLoader);
@@ -185,10 +185,10 @@ private <T> Object decodeByType(
185185
}
186186
}
187187

188-
private String decodeString(int size) throws CharacterCodingException {
189-
int oldLimit = buffer.limit();
188+
private String decodeString(long size) throws CharacterCodingException {
189+
long oldLimit = buffer.limit();
190190
buffer.limit(buffer.position() + size);
191-
String s = utfDecoder.decode(buffer).toString();
191+
String s = buffer.decode(utfDecoder);
192192
buffer.limit(oldLimit);
193193
return s;
194194
}
@@ -202,9 +202,13 @@ private int decodeInt32(int size) {
202202
}
203203

204204
private long decodeLong(int size) {
205-
long integer = 0;
205+
return Decoder.decodeLong(this.buffer, 0, size);
206+
}
207+
208+
static long decodeLong(Buffer buffer, int base, int size) {
209+
long integer = base;
206210
for (int i = 0; i < size; i++) {
207-
integer = (integer << 8) | (this.buffer.get() & 0xFF);
211+
integer = (integer << 8) | (buffer.get() & 0xFF);
208212
}
209213
return integer;
210214
}
@@ -221,7 +225,7 @@ private int decodeInteger(int base, int size) {
221225
return Decoder.decodeInteger(this.buffer, base, size);
222226
}
223227

224-
static int decodeInteger(ByteBuffer buffer, int base, int size) {
228+
static int decodeInteger(Buffer buffer, int base, int size) {
225229
int integer = base;
226230
for (int i = 0; i < size; i++) {
227231
integer = (integer << 8) | (buffer.get() & 0xFF);
@@ -412,7 +416,7 @@ private <T> Object decodeMapIntoObject(int size, Class<T> cls)
412416

413417
Integer parameterIndex = parameterIndexes.get(key);
414418
if (parameterIndex == null) {
415-
int offset = this.nextValueOffset(this.buffer.position(), 1);
419+
long offset = this.nextValueOffset(this.buffer.position(), 1);
416420
this.buffer.position(offset);
417421
continue;
418422
}
@@ -485,7 +489,7 @@ private static <T> String getParameterName(
485489
+ " is not annotated with MaxMindDbParameter.");
486490
}
487491

488-
private int nextValueOffset(int offset, int numberToSkip)
492+
private long nextValueOffset(long offset, int numberToSkip)
489493
throws InvalidDatabaseException {
490494
if (numberToSkip == 0) {
491495
return offset;
@@ -518,7 +522,7 @@ private int nextValueOffset(int offset, int numberToSkip)
518522
return nextValueOffset(offset, numberToSkip - 1);
519523
}
520524

521-
private CtrlData getCtrlData(int offset)
525+
private CtrlData getCtrlData(long offset)
522526
throws InvalidDatabaseException {
523527
if (offset >= this.buffer.capacity()) {
524528
throw new InvalidDatabaseException(
@@ -566,7 +570,7 @@ private byte[] getByteArray(int length) {
566570
return Decoder.getByteArray(this.buffer, length);
567571
}
568572

569-
private static byte[] getByteArray(ByteBuffer buffer, int length) {
573+
private static byte[] getByteArray(Buffer buffer, int length) {
570574
byte[] bytes = new byte[length];
571575
buffer.get(bytes);
572576
return bytes;

0 commit comments

Comments
 (0)