Skip to content

Commit 0189488

Browse files
committed
Add records of invalid files and duplicates based on hash search
1 parent b446f16 commit 0189488

File tree

7 files changed

+245
-243
lines changed

7 files changed

+245
-243
lines changed

cmd/flibgolite/main.go

Lines changed: 36 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -143,48 +143,41 @@ func reindexStock() {
143143
genresTree := genres.NewGenresTree(cfg.Genres.TREE_FILE)
144144
hashes := hash.InitHashes(db.DB)
145145

146-
databaseQueue := make(chan model.Book, cfg.Database.BOOK_QUEUE_SIZE)
147-
defer close(databaseQueue)
148-
databaseHandler := &database.Handler{
149-
CFG: cfg,
150-
DB: db,
151-
LOG: stockLog,
152-
Queue: databaseQueue,
153-
Hashes: hashes,
154-
}
155-
databaseHandler.StopDB = make(chan struct{})
156-
defer close(databaseHandler.StopDB)
157-
158-
go databaseHandler.AddBooksToIndex()
159-
146+
bookQueue := make(chan model.Book, cfg.Database.BOOK_QUEUE_SIZE)
147+
defer close(bookQueue)
160148
fileQueue := make(chan stock.File, cfg.Database.FILE_QUEUE_SIZE)
161149
defer close(fileQueue)
162150
stockHandler := &stock.Handler{
163-
CFG: cfg,
164-
LOG: stockLog,
165-
DB: db,
166-
GT: genresTree,
167-
Queue: fileQueue,
168-
Hashes: hashes,
151+
CFG: cfg,
152+
LOG: stockLog,
153+
DB: db,
154+
GT: genresTree,
155+
BookQueue: bookQueue,
156+
FileQueue: fileQueue,
157+
Hashes: hashes,
169158
}
170-
stockHandler.InitStockFolders()
159+
stockHandler.StopDB = make(chan struct{})
160+
defer close(stockHandler.StopDB)
171161
stockHandler.StopScan = make(chan struct{})
172162
defer close(stockHandler.StopScan)
163+
164+
stockHandler.InitStockFolders()
165+
go stockHandler.AddBooksToIndex()
173166
for i := 0; i < cfg.Database.MAX_SCAN_THREADS; i++ {
174-
go stockHandler.ParseFB2Queue(databaseQueue)
167+
go stockHandler.ParseFB2Queue()
175168
}
176169

177170
defer func() { stockHandler.StopScan <- struct{}{} }()
178171
dir := cfg.Library.STOCK_DIR
179172
if len(cfg.Library.NEW_DIR) > 0 {
180173
dir = cfg.Library.NEW_DIR
181174
}
182-
stockHandler.ScanDir(dir, databaseQueue)
175+
stockHandler.ScanDir(dir)
183176

184177
stockHandler.StopScan <- struct{}{}
185178

186-
databaseHandler.StopDB <- struct{}{}
187-
<-databaseHandler.StopDB
179+
stockHandler.StopDB <- struct{}{}
180+
<-stockHandler.StopDB
188181

189182
stockLog.S.Println("<<< Book stock reindex finished <<<<<<<<<<<<<<<<<<<<<<<<<<<")
190183
stockLog.S.Println("Time elapsed: ", time.Since(start))
@@ -211,35 +204,28 @@ func run() {
211204
genresTree := genres.NewGenresTree(cfg.Genres.TREE_FILE)
212205
hashes := hash.InitHashes(db.DB)
213206

214-
databaseQueue := make(chan model.Book, cfg.Database.BOOK_QUEUE_SIZE)
215-
defer close(databaseQueue)
216-
databaseHandler := &database.Handler{
217-
CFG: cfg,
218-
DB: db,
219-
LOG: stockLog,
220-
Queue: databaseQueue,
221-
Hashes: hashes,
222-
}
223-
databaseHandler.StopDB = make(chan struct{})
224-
defer close(databaseHandler.StopDB)
225-
226-
go databaseHandler.AddBooksToIndex()
227-
207+
bookQueue := make(chan model.Book, cfg.Database.BOOK_QUEUE_SIZE)
208+
defer close(bookQueue)
228209
fileQueue := make(chan stock.File, cfg.Database.FILE_QUEUE_SIZE)
229210
defer close(fileQueue)
230211
stockHandler := &stock.Handler{
231-
CFG: cfg,
232-
LOG: stockLog,
233-
DB: db,
234-
GT: genresTree,
235-
Queue: fileQueue,
236-
Hashes: hashes,
212+
CFG: cfg,
213+
LOG: stockLog,
214+
DB: db,
215+
GT: genresTree,
216+
BookQueue: bookQueue,
217+
FileQueue: fileQueue,
218+
Hashes: hashes,
237219
}
220+
stockHandler.StopDB = make(chan struct{})
221+
defer close(stockHandler.StopDB)
238222
stockHandler.InitStockFolders()
239223
stockHandler.StopScan = make(chan struct{})
240224
defer close(stockHandler.StopScan)
225+
226+
go stockHandler.AddBooksToIndex()
241227
for i := 0; i < cfg.Database.MAX_SCAN_THREADS; i++ {
242-
go stockHandler.ParseFB2Queue(databaseQueue)
228+
go stockHandler.ParseFB2Queue()
243229
}
244230
go func() {
245231
defer func() { stockHandler.StopScan <- struct{}{} }()
@@ -248,7 +234,7 @@ func run() {
248234
dir = cfg.Library.NEW_DIR
249235
}
250236
for {
251-
stockHandler.ScanDir(dir, databaseQueue)
237+
stockHandler.ScanDir(dir)
252238
time.Sleep(time.Duration(cfg.Database.POLL_DELAY) * time.Second)
253239
select {
254240
case <-stockHandler.StopScan:
@@ -293,9 +279,9 @@ func run() {
293279
stockHandler.LOG.S.Printf("New acquisitions scanning was stoped correctly\n")
294280

295281
// Stop addind new acquisitions to index and wait for completion
296-
databaseHandler.StopDB <- struct{}{}
297-
<-databaseHandler.StopDB
298-
databaseHandler.LOG.S.Printf("New acquisitions adding was stoped correctly\n")
282+
stockHandler.StopDB <- struct{}{}
283+
<-stockHandler.StopDB
284+
stockHandler.LOG.S.Printf("New acquisitions adding was stoped correctly\n")
299285

300286
// Shutdown OPDS server
301287
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)

pkg/config/config.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -96,12 +96,12 @@ func LoadConfig(rootDir string) *Config {
9696
c := &Config{
9797
Library: Library{
9898
STOCK_DIR: "books/stock",
99-
TRASH_DIR: "books/trash",
99+
TRASH_DIR: "",
100100
NEW_DIR: "",
101101
},
102102
Database: Database{
103103
DSN: "dbdata/books.db",
104-
POLL_DELAY: 30,
104+
POLL_DELAY: 300,
105105
MAX_SCAN_THREADS: 10,
106106
BOOK_QUEUE_SIZE: 20000,
107107
FILE_QUEUE_SIZE: 20000,
@@ -119,7 +119,7 @@ func LoadConfig(rootDir string) *Config {
119119
OPDS: OPDS{
120120
PORT: 8085,
121121
TITLE: "FLib Go Go Go!!!",
122-
PAGE_SIZE: 30,
122+
PAGE_SIZE: 20,
123123
LATEST_DAYS: 14,
124124
NO_CONVERSION: false,
125125
},

pkg/config/config.yml

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
library:
22
# Book folders
33
STOCK: "books/stock" # Book stock
4-
TRASH: "books/trash" # Error and duplicate files and archives will be moved to this folder
4+
#TRASH: "books/trash" # Error and duplicate files and archives will be moved to this folder
55
#NEW: "books/new" # Uncomment the line to have separate folder for new acquired books
66

77
genres:
@@ -10,7 +10,7 @@ genres:
1010
database:
1111
DSN: "dbdata/books.db"
1212
# Delay before start each new acquisitions folder processing
13-
POLL_DELAY: 30
13+
POLL_DELAY: 300
1414
# Maximum parallel new acquisitions processing routines
1515
MAX_SCAN_THREADS: 10
1616
# Book queue size
@@ -20,7 +20,7 @@ database:
2020
# Maximum number of books in one transaction
2121
MAX_BOOKS_IN_TX: 20000
2222
# Level of checking new books for duplicates: N - no check, F - fast check (default) by CRC32, S - slow check by CRC32 or title and plot comparison
23-
#DEDUPLICATE_LEVEL: "N"
23+
DEDUPLICATE_LEVEL: "F"
2424

2525
logs:
2626
# Logs are here
@@ -36,11 +36,11 @@ opds:
3636
# OPDS-server title that is displayed in a book reader
3737
TITLE: "FLib Go Go Go!!!"
3838
# OPDS feeds entries page size
39-
PAGE_SIZE: 30
39+
PAGE_SIZE: 20
4040
# Latest books period in days
4141
LATEST_DAYS: 14
4242
# Do not convert FB2 to EPUB format if set to true, default: false
43-
#NO_CONVERSION: true
43+
NO_CONVERSION: false
4444

4545
locales:
4646
# Locales folder. You can add your own locale file there like en.yml, ru.yml, uk.yml

pkg/database/db.go

Lines changed: 4 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -7,13 +7,10 @@ import (
77
"os"
88
"path/filepath"
99
"strings"
10-
"sync"
10+
11+
// "sync"
1112

1213
"github.com/jmoiron/sqlx"
13-
"github.com/vinser/flibgolite/pkg/config"
14-
"github.com/vinser/flibgolite/pkg/hash"
15-
"github.com/vinser/flibgolite/pkg/model"
16-
"github.com/vinser/flibgolite/pkg/rlog"
1714

1815
_ "embed"
1916

@@ -28,18 +25,6 @@ var SQLITE_DB_INIT string
2825
//go:embed sqlite_db_drop.sql
2926
var SQLITE_DB_DROP string
3027

31-
// ==================================
32-
type Handler struct {
33-
CFG *config.Config
34-
Hashes *hash.BookHashes
35-
DB *DB
36-
TX *TX
37-
LOG *rlog.Log
38-
WG *sync.WaitGroup
39-
Queue <-chan model.Book
40-
StopDB chan struct{}
41-
}
42-
4328
type DB struct {
4429
*sqlx.DB
4530
}
@@ -113,7 +98,7 @@ type TX struct {
11398
Stmt map[string]*sqlx.Stmt
11499
}
115100

116-
func (db *DB) txBegin() *TX {
101+
func (db *DB) TxBegin() *TX {
117102
TX := &TX{
118103
Tx: db.DB.MustBegin(),
119104
Stmt: map[string]*sqlx.Stmt{},
@@ -122,7 +107,7 @@ func (db *DB) txBegin() *TX {
122107
return TX
123108
}
124109

125-
func (tx *TX) txEnd() {
110+
func (tx *TX) TxEnd() {
126111
defer func() {
127112
for _, stmt := range tx.Stmt {
128113
stmt.Close()

pkg/database/dbstock.go

Lines changed: 1 addition & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,7 @@ package database
22

33
import (
44
"database/sql"
5-
"fmt"
65
"log"
7-
"path/filepath"
8-
"time"
96

107
"github.com/vinser/flibgolite/pkg/hash"
118
"github.com/vinser/flibgolite/pkg/model"
@@ -23,70 +20,8 @@ func (tx *TX) PrepareStatements() {
2320
tx.Stmt["insertIntoSeries"] = tx.mustPrepare(`INSERT INTO series (name) VALUES (?)`)
2421
}
2522

26-
func (h *Handler) AddBooksToIndex() {
27-
defer func() {
28-
h.TX.txEnd()
29-
h.StopDB <- struct{}{}
30-
}()
31-
bookInTX := 0
32-
for {
33-
select {
34-
case book := <-h.Queue:
35-
if bookInTX == 0 {
36-
h.TX = h.DB.txBegin()
37-
}
38-
h.NewBook(&book)
39-
bookInTX++
40-
h.LOG.I.Printf("file %s from %s has been added\n", book.File, book.Archive)
41-
if bookInTX >= h.CFG.Database.MAX_BOOKS_IN_TX {
42-
h.TX.txEnd()
43-
bookInTX = 0
44-
}
45-
case <-time.After(time.Second):
46-
h.LOG.D.Printf("Book queue timeout")
47-
if h.TX != nil {
48-
h.TX.txEnd()
49-
}
50-
bookInTX = 0
51-
case <-h.StopDB:
52-
return
53-
}
54-
}
55-
}
56-
57-
// Files and Archives
58-
func (db *DB) NotInStock(name string) error {
59-
var id int64
60-
switch filepath.Ext(name) {
61-
case ".zip":
62-
q := "SELECT id FROM books WHERE archive=?"
63-
err := db.QueryRow(q, name).Scan(&id)
64-
if err != sql.ErrNoRows {
65-
return fmt.Errorf("archive %s is in stock already and has been skipped", name)
66-
}
67-
case ".epub", ".fb2":
68-
q := "SELECT id FROM books WHERE file=? AND archive=''"
69-
err := db.QueryRow(q, name).Scan(&id)
70-
if err != sql.ErrNoRows {
71-
return fmt.Errorf("file %s is in stock already and has been skipped", name)
72-
}
73-
default:
74-
return fmt.Errorf("file %s has unsupported format and has been skipped", name)
75-
}
76-
return nil
77-
}
78-
7923
// Books
80-
func (h *Handler) NewBook(b *model.Book) {
81-
tx := h.TX
82-
switch h.Hashes.IsUnique(b) {
83-
case hash.DuplicateCRC32:
84-
tx.RecordBookState(b, hash.DuplicateCRC32)
85-
return
86-
case hash.DuplicateTitlePlot:
87-
tx.RecordBookState(b, hash.DuplicateTitlePlot)
88-
return
89-
}
24+
func (tx *TX) NewBook(b *model.Book) {
9025

9126
languageId := tx.NewLanguage(b.Language)
9227
serieId := tx.NewSerie(b.Serie)

0 commit comments

Comments
 (0)