Skip to content

Commit 5083aae

Browse files
committed
Further optimise Estimator for HDDs by iterating over files in sector order
1 parent ac167c1 commit 5083aae

File tree

2 files changed

+124
-44
lines changed

2 files changed

+124
-44
lines changed

CompactGUI.Core/Estimator.vb

Lines changed: 123 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,13 @@
11
Imports System.IO
22
Imports System.IO.Compression
3+
Imports System.Runtime.InteropServices
34

45
Imports K4os.Compression.LZ4
56

67
Imports K4os.Compression.LZ4.Streams
78

9+
Imports Microsoft.Win32.SafeHandles
10+
811

912
'https://unix.stackexchange.com/questions/155901/estimate-compressibility-of-file
1013

@@ -29,6 +32,7 @@ Public Class Estimator
2932
Public CompressionRatio As Single
3033
End Class
3134

35+
Public Shared IsAlternate As Boolean = False
3236

3337
Public Function EstimateCompressability(analysisResult As List(Of AnalysedFileDetails), ishdd As Boolean, Optional MaxParallelism As Integer = 1, Optional clusterSize As Integer = 4096, Optional cancellationToken As Threading.CancellationToken = Nothing) As List(Of (AnalysedFile As AnalysedFileDetails, CompressionRatio As Single))
3438

@@ -38,26 +42,37 @@ Public Class Estimator
3842

3943
Me.IsHDD = ishdd
4044

45+
46+
'Filter the files based on the cluster size and sort them by cluster location if it's an HDD to minimize seek time
47+
Dim filteredList = analysisResult.Where(Function(f) f.UncompressedSize > clusterSize)
48+
49+
Dim sw = Stopwatch.StartNew()
50+
51+
If ishdd Then filteredList = filteredList.OrderBy(Function(f) GetFirstLcn(f.FileName))
52+
Dim finalList = filteredList.ToList
53+
sw.Stop()
54+
Debug.WriteLine($"Filtered and sorted {finalList.Count} files in {sw.ElapsedMilliseconds} ms")
55+
If IsAlternate Then Debug.WriteLine("Alternate mode enabled, using different handle method.")
56+
57+
4158
Dim paraOptions As New ParallelOptions With {.MaxDegreeOfParallelism = MaxParallelism}
4259

43-
Parallel.ForEach(analysisResult, parallelOptions:=paraOptions, Sub(fl)
60+
Parallel.ForEach(finalList, parallelOptions:=paraOptions, Sub(fl)
61+
cancellationToken.ThrowIfCancellationRequested()
4462

45-
If fl.UncompressedSize > clusterSize Then
46-
If cancellationToken <> Nothing AndAlso cancellationToken.IsCancellationRequested Then
47-
Throw New OperationCanceledException(cancellationToken)
48-
End If
49-
Dim estimatedRatio = EstimateCompressabilityLZ4(fl.FileName, fl.UncompressedSize, cancellationToken)
63+
Dim estimatedRatio = EstimateCompressabilityLZ4(fl.FileName, fl.UncompressedSize, cancellationToken)
5064

51-
_filesList.Add((New FileDetails With {.AnalysedFile = fl, .CompressionRatio = estimatedRatio}))
52-
End If
53-
End Sub)
65+
_filesList.Add((New FileDetails With {.AnalysedFile = fl, .CompressionRatio = estimatedRatio}))
66+
End Sub)
5467

5568
Dim retList As New List(Of (AnalysedFile As AnalysedFileDetails, CompressionRatio As Single))
5669

5770
For Each item In _filesList
5871
retList.Add((item.AnalysedFile, item.CompressionRatio))
5972
Next
6073

74+
IsAlternate = Not IsAlternate ' Toggle the alternate mode for next run
75+
6176
Return retList
6277

6378
End Function
@@ -114,9 +129,7 @@ Public Class Estimator
114129
Dim bytesRead As Integer = input.Read(buffer, 0, BlockSize)
115130

116131
While bytesRead > 0
117-
If cancellationToken <> Nothing AndAlso cancellationToken.IsCancellationRequested Then
118-
Throw New OperationCanceledException(cancellationToken)
119-
End If
132+
cancellationToken.ThrowIfCancellationRequested()
120133
compressionStream.Write(buffer, 0, bytesRead)
121134
totalWritten += bytesRead
122135
bytesRead = input.Read(buffer, 0, BlockSize)
@@ -127,9 +140,7 @@ Public Class Estimator
127140
Dim stepSize As Long = BlockSize * (totalBlocks \ samplesNeeded)
128141
Dim buffer(BlockSize - 1) As Byte
129142
For i As Integer = 0 To samplesNeeded - 1
130-
If cancellationToken <> Nothing AndAlso cancellationToken.IsCancellationRequested Then
131-
Throw New OperationCanceledException(cancellationToken)
132-
End If
143+
cancellationToken.ThrowIfCancellationRequested()
133144
input.Position = stepSize * i
134145
Dim bytesRead As Integer = input.Read(buffer, 0, BlockSize)
135146
compressionStream.Write(buffer, 0, bytesRead)
@@ -141,33 +152,6 @@ Public Class Estimator
141152
Return Math.Min(compressed.Length / Math.Max(totalWritten, 1), 1.0)
142153
End Function
143154

144-
'Private Function EstimateCompressabilityHDD(input As FileStream, fileSize As Long, compressionFactory As CompressionStreamFactory, Optional cancellationToken As Threading.CancellationToken = Nothing) As Double
145-
' Dim MiddleChunkSize As Integer = SampleSize * BlockSize ' 10KB
146-
147-
' Dim totalWritten As Long = 0
148-
' Dim compressed = New MemoryStream()
149-
150-
' Using compressionStream As Stream = compressionFactory(compressed)
151-
' ' If file is smaller than 10KB, just use the whole file
152-
' Dim chunkSize As Integer = CInt(Math.Min(MiddleChunkSize, fileSize))
153-
' Dim middleStart As Long = Math.Max(0, (fileSize \ 2) - (chunkSize \ 2))
154-
155-
' Dim buffer(chunkSize - 1) As Byte
156-
' input.Position = middleStart
157-
' Dim bytesRead As Integer = input.Read(buffer, 0, chunkSize)
158-
159-
' If cancellationToken <> Nothing AndAlso cancellationToken.IsCancellationRequested Then
160-
' Throw New OperationCanceledException(cancellationToken)
161-
' End If
162-
163-
' If bytesRead > 0 Then
164-
' compressionStream.Write(buffer, 0, bytesRead)
165-
' totalWritten += bytesRead
166-
' End If
167-
' End Using
168-
169-
' Return Math.Min(compressed.Length / Math.Max(totalWritten, 1), 1.0)
170-
'End Function
171155

172156
Private Function EstimateCompressabilityHDD(input As FileStream, fileSize As Long, compressionFactory As CompressionStreamFactory, Optional cancellationToken As Threading.CancellationToken = Nothing) As Double
173157
Dim NumClusters As Integer = SampleSize ' or any small number you want to sample
@@ -198,6 +182,102 @@ Public Class Estimator
198182
Return Math.Min(compressed.Length / Math.Max(totalWritten, 1), 1.0)
199183
End Function
200184

185+
Public Function GetFirstLcn(filePath As String) As Long
186+
187+
Dim handle As SafeFileHandle = File.OpenHandle(filePath)
188+
189+
If handle.IsInvalid Then Throw New IOException("Failed to open file handle.")
190+
191+
192+
Dim inBuffer As NtfsInterop.STARTING_VCN_INPUT_BUFFER
193+
inBuffer.StartingVcn = 0
194+
Dim inBufferSize = Marshal.SizeOf(inBuffer)
195+
Dim inBufferPtr = Marshal.AllocHGlobal(inBufferSize)
196+
Marshal.StructureToPtr(inBuffer, inBufferPtr, False)
197+
198+
Dim outBufferSize = 4096
199+
Dim outBufferPtr = Marshal.AllocHGlobal(outBufferSize)
200+
Dim bytesReturned As Integer = 0
201+
202+
Try
203+
Dim result = NtfsInterop.DeviceIoControl(
204+
handle,
205+
NtfsInterop.FSCTL_GET_RETRIEVAL_POINTERS,
206+
inBufferPtr,
207+
inBufferSize,
208+
outBufferPtr,
209+
outBufferSize,
210+
bytesReturned,
211+
IntPtr.Zero
212+
)
213+
214+
'Probably errors because the file is empty
215+
If Not result Then Return Long.MaxValue
216+
217+
218+
' Marshal the output buffer to get the first LCN
219+
Dim extentOffset As Integer = Marshal.OffsetOf(Of RETRIEVAL_POINTERS_BUFFER)("Extents").ToInt32()
220+
Dim lcn As Long = Marshal.ReadInt64(outBufferPtr, extentOffset + 8)
221+
Return lcn
222+
223+
Finally
224+
Marshal.FreeHGlobal(inBufferPtr)
225+
Marshal.FreeHGlobal(outBufferPtr)
226+
handle.Close()
227+
End Try
228+
End Function
229+
230+
End Class
231+
232+
Friend Module NtfsInterop
233+
Public Const FSCTL_GET_RETRIEVAL_POINTERS As UInteger = &H90073
234+
Public Const OPEN_EXISTING As Integer = 3
235+
Public Const FILE_FLAG_BACKUP_SEMANTICS As Integer = &H2000000
236+
Public Const FILE_SHARE_READ As Integer = 1
237+
Public Const FILE_SHARE_WRITE As Integer = 2
238+
Public Const GENERIC_READ As Integer = &H80000000
239+
240+
<DllImport("kernel32.dll", SetLastError:=True, CharSet:=CharSet.Unicode)>
241+
Public Function CreateFile(
242+
lpFileName As String,
243+
dwDesiredAccess As Integer,
244+
dwShareMode As Integer,
245+
lpSecurityAttributes As IntPtr,
246+
dwCreationDisposition As Integer,
247+
dwFlagsAndAttributes As Integer,
248+
hTemplateFile As IntPtr
249+
) As SafeFileHandle
250+
End Function
251+
252+
<DllImport("kernel32.dll", SetLastError:=True)>
253+
Public Function DeviceIoControl(
254+
hDevice As SafeFileHandle,
255+
dwIoControlCode As UInteger,
256+
lpInBuffer As IntPtr,
257+
nInBufferSize As Integer,
258+
lpOutBuffer As IntPtr,
259+
nOutBufferSize As Integer,
260+
ByRef lpBytesReturned As Integer,
261+
lpOverlapped As IntPtr
262+
) As Boolean
263+
End Function
201264

202265

203-
End Class
266+
<StructLayout(LayoutKind.Sequential)>
267+
Public Structure STARTING_VCN_INPUT_BUFFER
268+
Public StartingVcn As Long
269+
End Structure
270+
271+
<StructLayout(LayoutKind.Sequential)>
272+
Public Structure RETRIEVAL_POINTERS_BUFFER
273+
Public ExtentCount As Integer
274+
Public StartingVcn As Long
275+
Public Extents As LCN_EXTENT ' This is actually an array, but we only need the first
276+
End Structure
277+
278+
<StructLayout(LayoutKind.Sequential)>
279+
Public Structure LCN_EXTENT
280+
Public NextVcn As Long
281+
Public Lcn As Long
282+
End Structure
283+
End Module

CompactGUI.Core/Uncompactor.vb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ Public Class Uncompactor : Implements ICompressor, IDisposable
5454
Try
5555
Using fs As FileStream = New FileStream(path, FileMode.Open)
5656
Dim hDevice = fs.SafeFileHandle.DangerousGetHandle
57-
Dim res = DeviceIoControl(hDevice, FSCTL_DELETE_EXTERNAL_BACKING, IntPtr.Zero, 0, IntPtr.Zero, 0, IntPtr.Zero, IntPtr.Zero)
57+
Dim res = WOFHelper.DeviceIoControl(hDevice, FSCTL_DELETE_EXTERNAL_BACKING, IntPtr.Zero, 0, IntPtr.Zero, 0, IntPtr.Zero, IntPtr.Zero)
5858
Return res
5959
End Using
6060
Catch ex As Exception

0 commit comments

Comments
 (0)