forked from internetarchive/gowarc
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathfile.go
More file actions
85 lines (73 loc) · 1.98 KB
/
file.go
File metadata and controls
85 lines (73 loc) · 1.98 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
package warc
import (
"fmt"
"os"
"strconv"
"strings"
"sync/atomic"
"time"
)
// generateWARCFilename generate a WARC file name following recommendations of the specs:
// Prefix-Timestamp-Serial-Crawlhost.warc.gz
func generateWARCFilename(prefix string, compression compressionType, serial *atomic.Uint64) string {
var filename strings.Builder
filename.WriteString(prefix)
filename.WriteString("-")
now := time.Now().UTC()
filename.WriteString(now.Format("20060102150405") + strconv.Itoa(now.Nanosecond())[:3])
filename.WriteString("-")
var newSerial uint64
for {
oldSerial := serial.Load()
if oldSerial >= 99999 {
if serial.CompareAndSwap(oldSerial, 1) {
newSerial = 1
break
}
} else {
if serial.CompareAndSwap(oldSerial, oldSerial+1) {
newSerial = oldSerial + 1
break
}
}
}
filename.WriteString(formatSerial(newSerial, "5"))
filename.WriteString("-")
hostName, err := os.Hostname()
if err != nil {
panic(err)
}
filename.WriteString(hostName)
var fileExt string
switch compression {
case CompressionGzip:
fileExt = ".warc.gz.open"
case CompressionZstd:
fileExt = ".warc.zst.open"
case CompressionNone:
fileExt = ".warc.open"
default:
panic(fmt.Sprintf("invalid compression algorithm: %v", compression))
}
filename.WriteString(fileExt)
return filename.String()
}
// formatSerial add the correct padding to the serial
// E.g. with serial = 23 and format = 5:
// formatSerial return 00023
func formatSerial(serial uint64, format string) string {
return fmt.Sprintf("%0"+format+"d", serial)
}
// isFielSizeExceeded compare the size of a file (filePath) with
// a max size (maxSize), if the size of filePath exceed maxSize,
// it returns true, else, it returns false
func isFileSizeExceeded(file *os.File, maxSize float64) bool {
// Get actual file size
stat, err := file.Stat()
if err != nil {
panic(err)
}
fileSize := (float64)((stat.Size() / 1024) / 1024)
// If fileSize exceed maxSize, return true
return fileSize >= maxSize
}