Skip to content

Commit e9941fd

Browse files
committed
Adds cache invalidation logic
1 parent f7930a9 commit e9941fd

File tree

3 files changed

+225
-0
lines changed

3 files changed

+225
-0
lines changed

lib/msf/core/modules/metadata/store.rb

Lines changed: 212 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
11
require 'json'
2+
require 'parallel'
3+
require 'zlib'
24

35
#
46
# Handles storage of module metadata on disk. A base metadata file is always included - this was added to ensure a much
@@ -14,6 +16,7 @@ def initialize
1416

1517
BaseMetaDataFile = 'modules_metadata_base.json'
1618
UserMetaDataFile = 'modules_metadata.json'
19+
CacheMetaDataFile = 'cache_metadata_base.json'
1720

1821
#
1922
# Initializes from user store (under ~/store/.msf4) if it exists. else base file (under $INSTALL_ROOT/db) is copied and loaded.
@@ -124,4 +127,213 @@ def load_cache_from_file_store
124127
}
125128
end
126129

130+
# This method uses a per-file CRC32 cache to avoid recalculating checksums for files that have not changed.
131+
# It loads the cache, checks each file's mtime and size, and only recalculates the CRC32 if needed.
132+
#
133+
# @return [Boolean] True if the current checksum matches the cached one
134+
def self.valid_checksum?
135+
current_checksum = get_current_checksum
136+
137+
get_store_cache_path
138+
ensure_cache_file_exists(current_checksum)
139+
140+
cached_sha = get_cached_checksum
141+
142+
checksums_match?(current_checksum, cached_sha)
143+
end
144+
145+
# Calculate the current checksum for all module and library files
146+
# This calculates checksums for each file, caches them, and then
147+
# generates an overall checksum from the individual file checksums.
148+
#
149+
# @return [String] The current overall checksum
150+
def self.get_current_checksum
151+
files = collect_files_to_check
152+
per_file_cache_file = get_per_file_cache_path
153+
per_file_cache = load_per_file_cache(per_file_cache_file)
154+
155+
file_crc32s_with_metadata = calculate_file_checksums(files, per_file_cache)
156+
157+
updated_cache = file_crc32s_with_metadata.to_h
158+
file_crc32s = file_crc32s_with_metadata.map { |_, meta| meta['crc32'] }
159+
160+
save_per_file_cache(per_file_cache_file, updated_cache)
161+
162+
calculate_overall_checksum(file_crc32s)
163+
end
164+
165+
# Compare the current checksum with the cached checksum
166+
# @param [String] current_checksum The calculated checksum for the current state
167+
# @param [String] cached_checksum The checksum retrieved from cache
168+
# @return [Boolean] True if checksums match, false otherwise
169+
def self.checksums_match?(current_checksum, cached_checksum)
170+
current_checksum == cached_checksum
171+
end
172+
173+
# Calculate the overall checksum from individual file checksums
174+
# @param [Array<Integer>] file_crc32s Array of individual file CRC32 values
175+
# @return [String] The hexadecimal representation of the overall CRC32
176+
def self.calculate_overall_checksum(file_crc32s)
177+
Zlib.crc32(file_crc32s.join).to_s(16)
178+
end
179+
180+
# Collect all files that need to be checked for checksums
181+
# @return [Array<String>] List of file paths
182+
def self.collect_files_to_check
183+
# Define the directories to scan for files
184+
modules_dir = File.join(Msf::Config.install_root, 'modules', '**', '*')
185+
local_modules_dir = File.join(Msf::Config.user_module_directory, '**', '*')
186+
lib_dir = File.join(Msf::Config.install_root, 'lib', '**', '*')
187+
# Gather all files from the specified directories
188+
Dir.glob([modules_dir, lib_dir, local_modules_dir]).select { |f| File.file?(f) }.sort
189+
end
190+
191+
# Calculate checksums for all files, using the cache when possible
192+
# @param [Array<String>] files List of file paths to check
193+
# @param [Hash] cache Current cache data
194+
# @return [Array<Array>] Array of [file_path, metadata] pairs
195+
def self.calculate_file_checksums(files, cache)
196+
Parallel.map(files, in_threads: Etc.nprocessors * 2) do |file|
197+
# Get file metadata (size and last modified time)
198+
file_metadata = File.stat(file)
199+
cache_entry = cache[file]
200+
# Use cached CRC32 if mtime and size match, otherwise recalculate
201+
if cache_entry && cache_entry['mtime'] == file_metadata.mtime.to_i && cache_entry['size'] == file_metadata.size
202+
crc32 = cache_entry['crc32']
203+
else
204+
crc32 = File.open(file, 'rb') { |fd| Zlib.crc32(fd.read) }
205+
end
206+
# Return file and its metadata for later aggregation
207+
[file, {
208+
'crc32' => crc32,
209+
'mtime' => file_metadata.mtime.to_i,
210+
'size' => file_metadata.size
211+
}]
212+
end
213+
end
214+
215+
# Get the path to the per-file cache
216+
# @return [String] Path to the per-file cache
217+
def self.get_per_file_cache_path
218+
File.join(Msf::Config.config_directory, 'store', 'per_file_metadata_cache.json')
219+
end
220+
221+
# Get the path to the cache store file
222+
# @return [String] Path to the cache store file
223+
def self.get_store_cache_path
224+
File.join(Msf::Config.config_directory, "store", CacheMetaDataFile)
225+
end
226+
227+
# Get the path to the DB cache file
228+
# @return [String] Path to the DB cache file
229+
def self.get_db_cache_path
230+
File.join(Msf::Config.install_root, "db", CacheMetaDataFile)
231+
end
232+
233+
# Load the per-file cache from disk
234+
# @param [String] cache_file Path to the cache file
235+
# @return [Hash] The loaded cache or an empty hash if the file doesn't exist
236+
def self.load_per_file_cache(cache_file)
237+
File.exist?(cache_file) ? JSON.parse(File.read(cache_file)) : {}
238+
end
239+
240+
# Save the updated per-file cache to disk
241+
# @param [String] cache_file Path to the cache file
242+
# @param [Hash] updated_cache The cache data to save
243+
# @return [void]
244+
def self.save_per_file_cache(cache_file, updated_cache)
245+
# Ensure the directory for the cache file exists before writing
246+
FileUtils.mkdir_p(File.dirname(cache_file))
247+
# Save the updated per-file cache to disk
248+
File.write(cache_file, JSON.pretty_generate(updated_cache))
249+
end
250+
251+
# Create or update a cache file with the given checksum
252+
# @param [String] file_path Path to the cache file
253+
# @param [String] checksum The checksum to store
254+
# @return [void]
255+
def self.create_or_update_cache_file(file_path, checksum)
256+
# Ensure directory exists
257+
FileUtils.mkdir_p(File.dirname(file_path))
258+
259+
if File.exist?(file_path)
260+
# Update existing file
261+
cache_content = JSON.parse(File.read(file_path))
262+
cache_content['checksum']['crc32'] = checksum
263+
else
264+
# Create new file
265+
cache_content = {
266+
"checksum" => {
267+
"crc32" => checksum
268+
}
269+
}
270+
end
271+
272+
File.write(file_path, JSON.pretty_generate(cache_content))
273+
end
274+
275+
# Ensure the db cache file exists, creating it if necessary
276+
# @param [String] current_checksum The current checksum to use if creating a new cache file
277+
# @return [void]
278+
def self.ensure_cache_file_exists(current_checksum)
279+
# Path to the DB cache file
280+
cache_db_path = get_db_cache_path
281+
282+
# Only create the db cache file if it doesn't exist
283+
# The user's cache file (~/.msf4/store/cache_metadata_base.json) should only be created when changes are made
284+
unless File.exist?(cache_db_path)
285+
# Ensure directory exists
286+
FileUtils.mkdir_p(File.dirname(cache_db_path))
287+
cache_content = {
288+
"checksum" => {
289+
"crc32" => current_checksum
290+
}
291+
}
292+
File.write(cache_db_path, JSON.pretty_generate(cache_content))
293+
end
294+
end
295+
296+
# Get the cached checksum value without creating any new files
297+
# @return [String, nil] The cached checksum value or nil if no cache exists
298+
def self.get_cached_checksum
299+
cache_store_path = get_store_cache_path
300+
cache_db_path = get_db_cache_path
301+
302+
# First try user's cache file
303+
if File.exist?(cache_store_path)
304+
cache_content = JSON.parse(File.read(cache_store_path))
305+
return cache_content.dig('checksum', 'crc32')
306+
end
307+
308+
# Fall back to db cache file
309+
if File.exist?(cache_db_path)
310+
cache_content = JSON.parse(File.read(cache_db_path))
311+
return cache_content.dig('checksum', 'crc32')
312+
end
313+
314+
# If neither exists, return nil to trigger a cache rebuild
315+
# This allows the build process to work with neither file present
316+
nil
317+
end
318+
319+
# Update the cache checksum file with the current crc32 checksum of the module paths.
320+
#
321+
# @param [String] current_checksum The current checksum to store in the cache
322+
# @return [void]
323+
def self.update_cache_checksum(current_checksum)
324+
cache_store_path = get_store_cache_path
325+
cache_db_path = get_db_cache_path
326+
327+
if File.exist?(cache_store_path)
328+
# Update the existing user cache file
329+
create_or_update_cache_file(cache_store_path, current_checksum)
330+
elsif File.exist?(cache_db_path)
331+
# Copy the DB cache file to the user's directory and update it
332+
FileUtils.cp(cache_db_path, cache_store_path)
333+
create_or_update_cache_file(cache_store_path, current_checksum)
334+
else
335+
# Create a new cache file if neither exists
336+
create_or_update_cache_file(cache_store_path, current_checksum)
337+
end
338+
end
127339
end

lib/msf/ui/console/driver.rb

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -163,6 +163,18 @@ def initialize(prompt = DefaultPrompt, prompt_char = DefaultPromptChar, opts = {
163163
self.framework.init_module_paths(module_paths: opts['ModulePath'], defer_module_loads: opts['DeferModuleLoads'])
164164
end
165165

166+
# If the module cache is invalid, we need to invalidate it and update the cache.
167+
# We will also need to set 'DeferModuleLoads' to false so that the modules are reloaded.
168+
unless Msf::Modules::Metadata::Store.valid_checksum?
169+
# Get the current checksum and update the cache with it
170+
current_checksum = Msf::Modules::Metadata::Store.get_current_checksum
171+
Msf::Modules::Metadata::Store.update_cache_checksum(current_checksum)
172+
173+
framework.threads.spawn("ModuleCacheRebuild", true) do
174+
framework.modules.refresh_cache_from_module_files
175+
end
176+
end
177+
166178
unless opts['DeferModuleLoads']
167179
framework.threads.spawn("ModuleCacheRebuild", true) do
168180
framework.modules.refresh_cache_from_module_files

tools/automation/cache/build_new_cache.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
#!/bin/sh -ex
22
bundle install
33
rm -f db/modules_metadata_base.json
4+
rm -f db/cache_metadata_base.json
45
git ls-files modules/ -z | xargs -0 -n1 -P `nproc` -I{} -- git log -1 --format="%ai {}" {} | while read -r udate utime utz ufile ; do
56
touch -d "$udate $utime" $ufile
67
done

0 commit comments

Comments
 (0)