Skip to content

Commit 4aa4a58

Browse files
authored
added pretty printing for config, indexer, searcher (#39)
* added pretty print for things like config, indexer, etc * Revert "added pretty print for things like config, indexer, etc" This reverts commit 28ca783. * add Pretty printing types * added tests, and fix formatting * test for show_config is added
1 parent 1480f52 commit 4aa4a58

File tree

5 files changed

+67
-2
lines changed

5 files changed

+67
-2
lines changed

src/indexing.jl

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -145,3 +145,14 @@ function index(indexer::Indexer)
145145
# check if all relevant files are saved
146146
_check_all_files_are_saved(indexer.config.index_path)
147147
end
148+
149+
function Base.show(io::IO, ::MIME"text/plain", indexer::Indexer)
150+
print(io, "ColBERT Indexer:\n")
151+
print(io, " collection size: $(length(indexer.collection)) documents\n")
152+
print(io, " checkpoint: $(indexer.config.checkpoint)\n")
153+
collection_path = indexer.config.collection
154+
if collection_path isa String && !isempty(collection_path)
155+
print(io, " collection path: $(collection_path)\n")
156+
end
157+
print(io, " index path: $(indexer.config.index_path)\n")
158+
end

src/infra/config.jl

Lines changed: 29 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ Base.@kwdef struct ColBERTConfig
6363
query_token::String = "[Q]"
6464
doc_token::String = "[D]"
6565

66-
# resource settings
66+
# resource settings
6767
checkpoint::String = "colbert-ir/colbertv2.0"
6868
collection::Union{String, Vector{String}} = ""
6969

@@ -88,3 +88,31 @@ Base.@kwdef struct ColBERTConfig
8888
nprobe::Int = 2
8989
ncandidates::Int = 8192
9090
end
91+
92+
function Base.show(io::IO, ::MIME"text/plain", config::ColBERTConfig)
93+
print(io, "ColBERTConfig:\n")
94+
print(io, " model:\n")
95+
print(io, " checkpoint: $(config.checkpoint)\n")
96+
print(io, " dim: $(config.dim)\n")
97+
print(io, " documents:\n")
98+
print(io,
99+
" collection: $(config.collection isa String ? config.collection : "$(length(config.collection)) documents")\n")
100+
print(io, " max length: $(config.doc_maxlen)\n")
101+
print(io, " mask punctuation: $(config.mask_punctuation)\n")
102+
print(io, " queries:\n")
103+
print(io, " max length: $(config.query_maxlen)\n")
104+
print(io, " attend to mask: $(config.attend_to_mask_tokens)\n")
105+
print(io, " indexing:\n")
106+
print(io, " path: $(config.index_path)\n")
107+
print(io, " batch size: $(config.index_bsize)\n")
108+
print(io, " chunk size: $(config.chunksize)\n")
109+
print(io, " compression bits: $(config.nbits)\n")
110+
print(io, " kmeans iterations: $(config.kmeans_niters)\n")
111+
print(io, " search:\n")
112+
print(io, " nprobe: $(config.nprobe)\n")
113+
print(io, " ncandidates: $(config.ncandidates)\n")
114+
print(io, " hardware:\n")
115+
print(io, " gpu: $(config.use_gpu)\n")
116+
print(io, " rank: $(config.rank)\n")
117+
print(io, " nranks: $(config.nranks)\n")
118+
end

src/searching.jl

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -126,3 +126,12 @@ function search(searcher::Searcher, query::String, k::Int)
126126
pids, scores = pids[indices], scores[indices]
127127
pids[1:k], scores[1:k]
128128
end
129+
130+
function Base.show(io::IO, ::MIME"text/plain", searcher::Searcher)
131+
print(io, "ColBERT Searcher:\n")
132+
print(io, " checkpoint: $(searcher.config.checkpoint)\n")
133+
print(io, " index path: $(searcher.config.index_path)\n")
134+
print(io, " embeddings:\n")
135+
print(io, " total: $(sum(searcher.doclens))\n")
136+
print(io, " centroids: $(size(searcher.centroids,2))\n")
137+
end

test/runtests.jl

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,14 +21,17 @@ const FLOAT_TYPES = [Float16, Float32, Float64]
2121
include("indexing/codecs/residual.jl")
2222
include("indexing/collection_indexer.jl")
2323

24-
# modelling operations
24+
# modelling operations
2525
include("modelling/tokenization/tokenizer_utils.jl")
2626
include("modelling/embedding_utils.jl")
2727

2828
# search operations
2929
include("searching.jl")
3030
include("search/ranking.jl")
3131

32+
# show operations
33+
include("show_methods.jl")
34+
3235
# utils
3336
include("utils.jl")
3437

test/show_methods.jl

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
@testset "show methods" begin
2+
mktempdir() do dir
3+
config = ColBERTConfig(
4+
checkpoint = "dummy-checkpoint",
5+
index_path = dir,
6+
collection = ["doc1", "doc2"]
7+
)
8+
9+
str = sprint(show, MIME("text/plain"), config)
10+
@test occursin(" checkpoint: dummy-checkpoint", str)
11+
@test occursin(" path: $dir", str)
12+
@test occursin(" collection: 2 documents", str)
13+
end
14+
end

0 commit comments

Comments
 (0)