|  | 
|  | 1 | +/* | 
|  | 2 | +Copyright 2025 The llm-d-inference-sim Authors. | 
|  | 3 | +
 | 
|  | 4 | +Licensed under the Apache License, Version 2.0 (the "License"); | 
|  | 5 | +you may not use this file except in compliance with the License. | 
|  | 6 | +You may obtain a copy of the License at | 
|  | 7 | +
 | 
|  | 8 | +    http://www.apache.org/licenses/LICENSE-2.0 | 
|  | 9 | +
 | 
|  | 10 | +Unless required by applicable law or agreed to in writing, software | 
|  | 11 | +distributed under the License is distributed on an "AS IS" BASIS, | 
|  | 12 | +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | 
|  | 13 | +See the License for the specific language governing permissions and | 
|  | 14 | +limitations under the License. | 
|  | 15 | +*/ | 
|  | 16 | + | 
|  | 17 | +package llmdinferencesim | 
|  | 18 | + | 
|  | 19 | +import ( | 
|  | 20 | +	"context" | 
|  | 21 | +	"database/sql" | 
|  | 22 | +	"errors" | 
|  | 23 | +	"fmt" | 
|  | 24 | +	"io" | 
|  | 25 | +	"net/http" | 
|  | 26 | +	"os" | 
|  | 27 | +	"os/signal" | 
|  | 28 | +	"path/filepath" | 
|  | 29 | +	"syscall" | 
|  | 30 | + | 
|  | 31 | +	"github.com/go-logr/logr" | 
|  | 32 | +) | 
|  | 33 | + | 
|  | 34 | +type Dataset struct { | 
|  | 35 | +	db     *sql.DB | 
|  | 36 | +	logger logr.Logger | 
|  | 37 | +} | 
|  | 38 | + | 
|  | 39 | +func (d *Dataset) downloadDataset(url string, savePath string) error { | 
|  | 40 | +	// Set up signal handling for Ctrl+C (SIGINT) | 
|  | 41 | +	ctx, cancel := context.WithCancel(context.Background()) | 
|  | 42 | +	defer cancel() | 
|  | 43 | +	sigs := make(chan os.Signal, 1) | 
|  | 44 | +	signal.Notify(sigs, os.Interrupt, syscall.SIGTERM) | 
|  | 45 | +	defer signal.Stop(sigs) | 
|  | 46 | + | 
|  | 47 | +	// Goroutine to listen for signal | 
|  | 48 | +	go func() { | 
|  | 49 | +		<-sigs | 
|  | 50 | +		d.logger.Info("Interrupt signal received, cancelling download...") | 
|  | 51 | +		cancel() | 
|  | 52 | +	}() | 
|  | 53 | + | 
|  | 54 | +	out, err := os.Create(savePath) | 
|  | 55 | +	if err != nil { | 
|  | 56 | +		return err | 
|  | 57 | +	} | 
|  | 58 | +	defer func() { | 
|  | 59 | +		cerr := out.Close() | 
|  | 60 | +		if cerr != nil { | 
|  | 61 | +			d.logger.Error(cerr, "failed to close file after download") | 
|  | 62 | +		} | 
|  | 63 | +	}() | 
|  | 64 | + | 
|  | 65 | +	resp, err := http.Get(url) | 
|  | 66 | +	if err != nil { | 
|  | 67 | +		return err | 
|  | 68 | +	} | 
|  | 69 | +	defer func() { | 
|  | 70 | +		cerr := resp.Body.Close() | 
|  | 71 | +		if cerr != nil { | 
|  | 72 | +			d.logger.Error(cerr, "failed to close response body after download") | 
|  | 73 | +		} | 
|  | 74 | +	}() | 
|  | 75 | + | 
|  | 76 | +	if resp.StatusCode != http.StatusOK { | 
|  | 77 | +		return fmt.Errorf("bad status: %s", resp.Status) | 
|  | 78 | +	} | 
|  | 79 | + | 
|  | 80 | +	// Progress reader with context | 
|  | 81 | +	pr := &progressReader{ | 
|  | 82 | +		Reader: resp.Body, | 
|  | 83 | +		total:  resp.ContentLength, | 
|  | 84 | +		logger: d.logger, | 
|  | 85 | +		ctx:    ctx, | 
|  | 86 | +	} | 
|  | 87 | + | 
|  | 88 | +	written, err := io.Copy(out, pr) | 
|  | 89 | +	if err != nil { | 
|  | 90 | +		// Remove incomplete file | 
|  | 91 | +		cerr := os.Remove(savePath) | 
|  | 92 | +		if cerr != nil { | 
|  | 93 | +			d.logger.Error(cerr, "failed to remove incomplete file after download") | 
|  | 94 | +		} | 
|  | 95 | +		// If context was cancelled, return a specific error | 
|  | 96 | +		if errors.Is(err, context.Canceled) { | 
|  | 97 | +			return errors.New("download cancelled by user") | 
|  | 98 | +		} | 
|  | 99 | +		return fmt.Errorf("failed to download file: %w", err) | 
|  | 100 | +	} | 
|  | 101 | +	// Check if file size is zero or suspiciously small | 
|  | 102 | +	if written == 0 { | 
|  | 103 | +		cerr := os.Remove(savePath) | 
|  | 104 | +		if cerr != nil { | 
|  | 105 | +			d.logger.Error(cerr, "failed to remove empty file after download") | 
|  | 106 | +		} | 
|  | 107 | +		return errors.New("downloaded file is empty") | 
|  | 108 | +	} | 
|  | 109 | + | 
|  | 110 | +	// Ensure file is fully flushed and closed before returning success | 
|  | 111 | +	if err := out.Sync(); err != nil { | 
|  | 112 | +		cerr := os.Remove(savePath) | 
|  | 113 | +		if cerr != nil { | 
|  | 114 | +			d.logger.Error(cerr, "failed to remove incomplete file after download") | 
|  | 115 | +		} | 
|  | 116 | +		return fmt.Errorf("failed to sync file: %w", err) | 
|  | 117 | +	} | 
|  | 118 | + | 
|  | 119 | +	return nil | 
|  | 120 | +} | 
|  | 121 | + | 
|  | 122 | +// progressReader wraps an io.Reader and logs download progress. | 
|  | 123 | +type progressReader struct { | 
|  | 124 | +	io.Reader | 
|  | 125 | +	total      int64 | 
|  | 126 | +	downloaded int64 | 
|  | 127 | +	lastPct    int | 
|  | 128 | +	logger     logr.Logger | 
|  | 129 | +	ctx        context.Context | 
|  | 130 | +} | 
|  | 131 | + | 
|  | 132 | +func (pr *progressReader) Read(p []byte) (int, error) { | 
|  | 133 | +	select { | 
|  | 134 | +	case <-pr.ctx.Done(): | 
|  | 135 | +		return 0, pr.ctx.Err() | 
|  | 136 | +	default: | 
|  | 137 | +	} | 
|  | 138 | +	n, err := pr.Reader.Read(p) | 
|  | 139 | +	pr.downloaded += int64(n) | 
|  | 140 | +	if pr.total > 0 { | 
|  | 141 | +		pct := int(float64(pr.downloaded) * 100 / float64(pr.total)) | 
|  | 142 | +		if pct != pr.lastPct && pct%10 == 0 { // log every 10% | 
|  | 143 | +			pr.logger.Info(fmt.Sprintf("Download progress: %d%%", pct)) | 
|  | 144 | +			pr.lastPct = pct | 
|  | 145 | +		} | 
|  | 146 | +	} | 
|  | 147 | +	return n, err | 
|  | 148 | +} | 
|  | 149 | +func (d *Dataset) connectToDB(path string) error { | 
|  | 150 | +	// check if file exists | 
|  | 151 | +	_, err := os.Stat(path) | 
|  | 152 | +	if err != nil { | 
|  | 153 | +		return fmt.Errorf("database file does not exist: %w", err) | 
|  | 154 | +	} | 
|  | 155 | +	d.db, err = sql.Open("sqlite3", path) | 
|  | 156 | +	if err != nil { | 
|  | 157 | +		return fmt.Errorf("failed to open database: %w", err) | 
|  | 158 | +	} | 
|  | 159 | +	return nil | 
|  | 160 | +} | 
|  | 161 | + | 
|  | 162 | +func (d *Dataset) Init(path string, url string, savePath string) error { | 
|  | 163 | +	if path != "" { | 
|  | 164 | +		return d.connectToDB(path) | 
|  | 165 | +	} | 
|  | 166 | +	if url != "" { | 
|  | 167 | +		if savePath == "" { | 
|  | 168 | +			savePath = "~/.llmd/dataset.sqlite3" | 
|  | 169 | +		} | 
|  | 170 | + | 
|  | 171 | +		_, err := os.Stat(savePath) | 
|  | 172 | +		if err != nil { | 
|  | 173 | +			// file does not exist, download it | 
|  | 174 | +			folder := filepath.Dir(savePath) | 
|  | 175 | +			err := os.MkdirAll(folder, 0755) | 
|  | 176 | +			if err != nil { | 
|  | 177 | +				return fmt.Errorf("failed to create parent directory: %w", err) | 
|  | 178 | +			} | 
|  | 179 | +			d.logger.Info("Downloading dataset from URL", "url", url, "to", savePath) | 
|  | 180 | +			err = d.downloadDataset(url, savePath) | 
|  | 181 | +			if err != nil { | 
|  | 182 | +				return fmt.Errorf("failed to download dataset: %w", err) | 
|  | 183 | +			} | 
|  | 184 | +		} | 
|  | 185 | +		d.logger.Info("Using dataset from", "path", savePath) | 
|  | 186 | + | 
|  | 187 | +		return d.connectToDB(savePath) | 
|  | 188 | +	} | 
|  | 189 | +	return errors.New("no dataset path or url provided") | 
|  | 190 | +} | 
|  | 191 | + | 
|  | 192 | +func (d *Dataset) Close() error { | 
|  | 193 | +	if d.db != nil { | 
|  | 194 | +		return d.db.Close() | 
|  | 195 | +	} | 
|  | 196 | +	return nil | 
|  | 197 | +} | 
0 commit comments