Skip to content

Commit 90c263a

Browse files
authored
Merge pull request #729 from mjkim610/export-har
Implement incremental writing of HAR file
2 parents c49a346 + 647fcd0 commit 90c263a

File tree

4 files changed

+444
-38
lines changed

4 files changed

+444
-38
lines changed

go.mod

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ require (
1919
github.com/projectdiscovery/roundrobin v0.0.6
2020
github.com/projectdiscovery/tinydns v0.0.77
2121
github.com/projectdiscovery/utils v0.4.13
22+
github.com/stretchr/testify v1.9.0
2223
github.com/things-go/go-socks5 v0.0.5
2324
golang.org/x/net v0.33.0
2425
gopkg.in/yaml.v3 v3.0.1
@@ -37,6 +38,7 @@ require (
3738
github.com/klauspost/pgzip v1.2.6 // indirect
3839
github.com/lufia/plan9stats v0.0.0-20211012122336-39d0f177ccd0 // indirect
3940
github.com/mholt/archiver/v3 v3.5.1 // indirect
41+
github.com/pmezard/go-difflib v1.0.0 // indirect
4042
github.com/power-devops/perfstat v0.0.0-20210106213030-5aafc221ea8c // indirect
4143
github.com/projectdiscovery/machineid v0.0.0-20240226150047-2e2c51e35983 // indirect
4244
github.com/shirou/gopsutil/v3 v3.23.7 // indirect

pkg/logger/har/har.go

Lines changed: 257 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,257 @@
1+
package har
2+
3+
import (
4+
"encoding/json"
5+
"io"
6+
"net/http"
7+
"os"
8+
"sync"
9+
"time"
10+
11+
"github.com/projectdiscovery/gologger"
12+
"github.com/projectdiscovery/martian/v3/har"
13+
)
14+
15+
const FlushInterval = 10 * time.Second
16+
17+
type Writer struct {
18+
f *os.File
19+
mutex *sync.Mutex
20+
}
21+
22+
type Logger struct {
23+
martianHarLogger *har.Logger
24+
writer *Writer
25+
done chan struct{}
26+
wg sync.WaitGroup
27+
}
28+
29+
func NewLogger(filePath string, flushInterval time.Duration) (*Logger, error) {
30+
martianHarLogger := har.NewLogger()
31+
writer, err := newWriter(filePath)
32+
if err != nil {
33+
return nil, err
34+
}
35+
36+
logger := &Logger{
37+
martianHarLogger: martianHarLogger,
38+
writer: writer,
39+
done: make(chan struct{}),
40+
}
41+
42+
logger.wg.Add(1)
43+
go func(logger *Logger) {
44+
defer logger.wg.Done()
45+
ticker := time.NewTicker(flushInterval)
46+
defer ticker.Stop()
47+
for {
48+
select {
49+
case <-ticker.C:
50+
if err := writer.append(martianHarLogger.ExportAndReset()); err != nil {
51+
gologger.Error().Msgf("Could not write HAR log: %s\n", err)
52+
}
53+
case <-logger.done:
54+
return
55+
}
56+
}
57+
}(logger)
58+
59+
return logger, nil
60+
}
61+
62+
func newWriter(filePath string) (*Writer, error) {
63+
file, err := os.Create(filePath)
64+
if err != nil {
65+
return nil, err
66+
}
67+
return &Writer{f: file, mutex: &sync.Mutex{}}, nil
68+
}
69+
70+
func (l *Logger) ModifyRequest(req *http.Request) error {
71+
return l.martianHarLogger.ModifyRequest(req)
72+
}
73+
74+
func (l *Logger) ModifyResponse(resp *http.Response) error {
75+
return l.martianHarLogger.ModifyResponse(resp)
76+
}
77+
78+
func (l *Logger) Flush() error {
79+
return l.writer.append(l.martianHarLogger.ExportAndReset())
80+
}
81+
82+
func (w *Writer) append(harObj *har.HAR) error {
83+
if harObj == nil || harObj.Log == nil || len(harObj.Log.Entries) == 0 {
84+
return nil
85+
}
86+
87+
w.mutex.Lock()
88+
defer w.mutex.Unlock()
89+
90+
// Check if file is empty (new file)
91+
fileInfo, err := w.f.Stat()
92+
if err != nil {
93+
return err
94+
}
95+
96+
if fileInfo.Size() == 0 {
97+
// Write complete HAR structure for new file
98+
encoder := json.NewEncoder(w.f)
99+
encoder.SetIndent("", " ")
100+
return encoder.Encode(harObj)
101+
}
102+
103+
// For existing file, append entries efficiently
104+
return w.appendEntries(harObj.Log.Entries)
105+
}
106+
107+
func (w *Writer) appendEntries(entries []*har.Entry) error {
108+
// Get current file size
109+
fileInfo, err := w.f.Stat()
110+
if err != nil {
111+
return err
112+
}
113+
114+
// Read the last few bytes to find where to insert new entries
115+
// We need to find the position before the closing "]" of the entries array
116+
readSize := int64(200) // Read last 200 bytes, should be enough to find closing brackets
117+
if fileInfo.Size() < readSize {
118+
readSize = fileInfo.Size()
119+
}
120+
121+
// Seek to position to read from
122+
seekPos := fileInfo.Size() - readSize
123+
if _, err := w.f.Seek(seekPos, io.SeekStart); err != nil {
124+
return err
125+
}
126+
127+
// Read the last part of the file
128+
lastBytes := make([]byte, readSize)
129+
n, err := w.f.Read(lastBytes)
130+
if err != nil && err != io.EOF {
131+
return err
132+
}
133+
lastBytes = lastBytes[:n]
134+
135+
// Find the position of the last "]" before the final "}"
136+
// This is where we need to insert new entries
137+
content := string(lastBytes)
138+
lastEntryEnd := -1
139+
140+
// Look for the pattern "]\n}" which indicates end of entries array
141+
for i := len(content) - 3; i >= 0; i-- {
142+
if i+2 < len(content) && content[i:i+3] == "]\n}" {
143+
lastEntryEnd = i
144+
break
145+
}
146+
}
147+
148+
// If we can't find the pattern, fall back to rewriting the whole file
149+
if lastEntryEnd == -1 {
150+
return w.rewriteFile(entries)
151+
}
152+
153+
// Calculate the actual position in the file where we need to truncate
154+
truncatePos := seekPos + int64(lastEntryEnd)
155+
156+
// Truncate the file at the position before the closing "]\n}"
157+
if err := w.f.Truncate(truncatePos); err != nil {
158+
return err
159+
}
160+
161+
// Seek to the truncation point
162+
if _, err := w.f.Seek(truncatePos, io.SeekStart); err != nil {
163+
return err
164+
}
165+
166+
// Write comma and newline if there were existing entries
167+
if truncatePos > 0 {
168+
if _, err := w.f.WriteString(",\n"); err != nil {
169+
return err
170+
}
171+
}
172+
173+
// Write each new entry with proper formatting
174+
for i, entry := range entries {
175+
if i > 0 {
176+
if _, err := w.f.WriteString(",\n"); err != nil {
177+
return err
178+
}
179+
}
180+
181+
// Marshal the entry with proper indentation
182+
entryBytes, err := json.MarshalIndent(entry, " ", " ")
183+
if err != nil {
184+
return err
185+
}
186+
187+
// Adjust indentation to match HAR file format
188+
entryStr := string(entryBytes)
189+
// Replace the first 4 spaces with 2 spaces to match the entries array indentation
190+
if len(entryStr) > 4 && entryStr[:4] == " " {
191+
entryStr = " " + entryStr[4:]
192+
}
193+
194+
if _, err := w.f.WriteString(entryStr); err != nil {
195+
return err
196+
}
197+
}
198+
199+
// Write the closing brackets
200+
if _, err := w.f.WriteString("\n ]\n}"); err != nil {
201+
return err
202+
}
203+
204+
return nil
205+
}
206+
207+
func (w *Writer) rewriteFile(entries []*har.Entry) error {
208+
// Fallback method: read existing file and rewrite
209+
// This is the old inefficient method, but kept as fallback
210+
if _, err := w.f.Seek(0, 0); err != nil {
211+
return err
212+
}
213+
214+
decoder := json.NewDecoder(w.f)
215+
var existingHar har.HAR
216+
if err := decoder.Decode(&existingHar); err != nil && err != io.EOF {
217+
return err
218+
}
219+
220+
// Merge entries
221+
if existingHar.Log != nil {
222+
existingHar.Log.Entries = append(existingHar.Log.Entries, entries...)
223+
} else {
224+
// This shouldn't happen in normal flow, but handle it
225+
existingHar = har.HAR{
226+
Log: &har.Log{
227+
Version: "1.2",
228+
Creator: &har.Creator{
229+
Name: "proxify",
230+
Version: "1.0",
231+
},
232+
Entries: entries,
233+
},
234+
}
235+
}
236+
237+
// Truncate and rewrite
238+
if err := w.f.Truncate(0); err != nil {
239+
return err
240+
}
241+
if _, err := w.f.Seek(0, 0); err != nil {
242+
return err
243+
}
244+
245+
encoder := json.NewEncoder(w.f)
246+
encoder.SetIndent("", " ")
247+
return encoder.Encode(existingHar)
248+
}
249+
250+
func (l *Logger) Close() error {
251+
close(l.done)
252+
l.wg.Wait()
253+
if err := l.Flush(); err != nil {
254+
gologger.Error().Msgf("Could not flush HAR log on close: %s\n", err)
255+
}
256+
return l.writer.f.Close()
257+
}

0 commit comments

Comments
 (0)