Skip to content

Commit 592da6b

Browse files
authored
Fixes to in-memory caching and added tests for file caching (#119)
1 parent 7a167a2 commit 592da6b

File tree

13 files changed

+431
-81
lines changed

13 files changed

+431
-81
lines changed

.gitignore

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ target/
1313
.DS_Store
1414

1515
# Testing directories
16-
.venv/
16+
./.venv/
1717
tmp/
1818
temp/
1919
docs/node_modules/

Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

crates/pet-conda/tests/ci_test.rs

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -310,10 +310,7 @@ fn detect_new_conda_env_created_with_p_flag_with_python() {
310310
use pet_core::{
311311
os_environment::EnvironmentApi, python_environment::PythonEnvironmentKind, Locator,
312312
};
313-
use pet_reporter::{
314-
cache::{self, CacheReporter},
315-
collect,
316-
};
313+
use pet_reporter::{cache::CacheReporter, collect};
317314

318315
setup();
319316
let env_name = "env_with_python3";

crates/pet-homebrew/src/environments.rs

Lines changed: 41 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33

44
use crate::sym_links::get_known_symlinks;
55
use lazy_static::lazy_static;
6+
use log::trace;
67
use pet_core::python_environment::{
78
PythonEnvironment, PythonEnvironmentBuilder, PythonEnvironmentKind,
89
};
@@ -12,19 +13,21 @@ use std::path::{Path, PathBuf};
1213

1314
lazy_static! {
1415
static ref PYTHON_VERSION: Regex =
15-
Regex::new(r"/(\d+\.\d+\.\d+)/").expect("error parsing Version regex for Homebrew");
16+
Regex::new(r"/(\d+\.\d+\.\d+)").expect("error parsing Version regex for Homebrew");
1617
}
1718

1819
pub fn get_python_info(
1920
python_exe_from_bin_dir: &Path,
2021
resolved_exe: &Path,
2122
) -> Option<PythonEnvironment> {
22-
// let user_friendly_exe = python_exe_from_bin_dir;
23-
let python_version = resolved_exe.to_string_lossy().to_string();
24-
let version = match PYTHON_VERSION.captures(&python_version) {
25-
Some(captures) => captures.get(1).map(|version| version.as_str().to_string()),
26-
None => None,
27-
};
23+
let version = get_version(resolved_exe);
24+
25+
trace!(
26+
"Getting homebrew python info for {:?} => {:?} with version {:?}",
27+
python_exe_from_bin_dir,
28+
resolved_exe,
29+
version
30+
);
2831

2932
let mut symlinks = vec![
3033
python_exe_from_bin_dir.to_path_buf(),
@@ -57,6 +60,14 @@ pub fn get_python_info(
5760
Some(env)
5861
}
5962

63+
fn get_version(resolved_exe: &Path) -> Option<String> {
64+
let python_version = resolved_exe.to_string_lossy().to_string();
65+
match PYTHON_VERSION.captures(&python_version) {
66+
Some(captures) => captures.get(1).map(|version| version.as_str().to_string()),
67+
None => None,
68+
}
69+
}
70+
6071
fn get_prefix(_resolved_file: &Path) -> Option<PathBuf> {
6172
// NOTE:
6273
// While testing found that on Mac Intel
@@ -135,3 +146,26 @@ fn get_prefix(_resolved_file: &Path) -> Option<PathBuf> {
135146
// }
136147
None
137148
}
149+
150+
#[cfg(test)]
151+
mod tests {
152+
use super::*;
153+
154+
#[test]
155+
#[cfg(unix)]
156+
fn extract_version() {
157+
assert_eq!(
158+
get_version(&PathBuf::from(
159+
"/home/linuxbrew/.linuxbrew/Cellar/[email protected]/3.12.4/bin/python3"
160+
)),
161+
Some("3.12.4".to_string())
162+
);
163+
164+
assert_eq!(
165+
get_version(&PathBuf::from(
166+
"/home/linuxbrew/.linuxbrew/Cellar/[email protected]/3.11.9_1/bin/python3.11"
167+
)),
168+
Some("3.11.9".to_string())
169+
);
170+
}
171+
}

crates/pet-python-utils/Cargo.toml

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,3 +15,12 @@ serde = { version = "1.0.152", features = ["derive"] }
1515
log = "0.4.21"
1616
serde_json = "1.0.93"
1717
sha2 = "0.10.6"
18+
env_logger = "0.10.2"
19+
20+
[features]
21+
ci = []
22+
ci-jupyter-container = []
23+
ci-homebrew-container = []
24+
ci-poetry-global = []
25+
ci-poetry-project = []
26+
ci-poetry-custom = []

crates/pet-python-utils/src/cache.rs

Lines changed: 55 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -2,17 +2,18 @@
22
// Licensed under the MIT License.
33

44
use lazy_static::lazy_static;
5-
use log::trace;
5+
use log::{trace, warn};
66
use std::{
77
collections::{hash_map::Entry, HashMap, HashSet},
8+
io,
89
path::PathBuf,
910
sync::{Arc, Mutex},
1011
time::SystemTime,
1112
};
1213

1314
use crate::{
1415
env::ResolvedPythonEnv,
15-
fs_cache::{get_cache_from_file, store_cache_in_file},
16+
fs_cache::{delete_cache_file, get_cache_from_file, store_cache_in_file},
1617
};
1718

1819
lazy_static! {
@@ -25,6 +26,10 @@ pub trait CacheEntry: Send + Sync {
2526
fn track_symlinks(&self, symlinks: Vec<PathBuf>);
2627
}
2728

29+
pub fn clear_cache() -> io::Result<()> {
30+
CACHE.clear()
31+
}
32+
2833
pub fn create_cache(executable: PathBuf) -> Arc<Mutex<Box<dyn CacheEntry>>> {
2934
CACHE.create_cache(executable)
3035
}
@@ -61,28 +66,39 @@ impl CacheImpl {
6166
/// Once a cache directory has been set, you cannot change it.
6267
/// No point supporting such a scenario.
6368
fn set_cache_directory(&self, cache_dir: PathBuf) {
69+
if let Some(cache_dir) = self.cache_dir.lock().unwrap().clone() {
70+
warn!(
71+
"Cache directory has already been set to {:?}. Cannot change it now.",
72+
cache_dir
73+
);
74+
return;
75+
}
76+
trace!("Setting cache directory to {:?}", cache_dir);
6477
self.cache_dir.lock().unwrap().replace(cache_dir);
6578
}
79+
fn clear(&self) -> io::Result<()> {
80+
trace!("Clearing cache");
81+
self.locks.lock().unwrap().clear();
82+
if let Some(cache_directory) = self.cache_dir.lock().unwrap().clone() {
83+
std::fs::remove_dir_all(cache_directory)
84+
} else {
85+
Ok(())
86+
}
87+
}
6688
fn create_cache(&self, executable: PathBuf) -> LockableCacheEntry {
67-
if let Some(cache_directory) = self.cache_dir.lock().unwrap().as_ref() {
68-
match self.locks.lock().unwrap().entry(executable.clone()) {
69-
Entry::Occupied(lock) => lock.get().clone(),
70-
Entry::Vacant(lock) => {
71-
let cache =
72-
Box::new(CacheEntryImpl::create(cache_directory.clone(), executable))
73-
as Box<(dyn CacheEntry + 'static)>;
74-
lock.insert(Arc::new(Mutex::new(cache))).clone()
75-
}
89+
let cache_directory = self.cache_dir.lock().unwrap().clone();
90+
match self.locks.lock().unwrap().entry(executable.clone()) {
91+
Entry::Occupied(lock) => lock.get().clone(),
92+
Entry::Vacant(lock) => {
93+
let cache = Box::new(CacheEntryImpl::create(cache_directory.clone(), executable))
94+
as Box<(dyn CacheEntry + 'static)>;
95+
lock.insert(Arc::new(Mutex::new(cache))).clone()
7696
}
77-
} else {
78-
Arc::new(Mutex::new(Box::new(CacheEntryImpl::empty(
79-
executable.clone(),
80-
))))
8197
}
8298
}
8399
}
84100

85-
type FilePathWithMTimeCTime = (PathBuf, Option<SystemTime>, Option<SystemTime>);
101+
type FilePathWithMTimeCTime = (PathBuf, SystemTime, SystemTime);
86102

87103
struct CacheEntryImpl {
88104
cache_directory: Option<PathBuf>,
@@ -92,17 +108,9 @@ struct CacheEntryImpl {
92108
symlinks: Arc<Mutex<Vec<FilePathWithMTimeCTime>>>,
93109
}
94110
impl CacheEntryImpl {
95-
pub fn create(cache_directory: PathBuf, executable: PathBuf) -> impl CacheEntry {
111+
pub fn create(cache_directory: Option<PathBuf>, executable: PathBuf) -> impl CacheEntry {
96112
CacheEntryImpl {
97-
cache_directory: Some(cache_directory),
98-
executable,
99-
envoronment: Arc::new(Mutex::new(None)),
100-
symlinks: Arc::new(Mutex::new(Vec::new())),
101-
}
102-
}
103-
pub fn empty(executable: PathBuf) -> impl CacheEntry {
104-
CacheEntryImpl {
105-
cache_directory: None,
113+
cache_directory,
106114
executable,
107115
envoronment: Arc::new(Mutex::new(None)),
108116
symlinks: Arc::new(Mutex::new(Vec::new())),
@@ -112,10 +120,21 @@ impl CacheEntryImpl {
112120
// Check if any of the exes have changed since we last cached this.
113121
for symlink_info in self.symlinks.lock().unwrap().iter() {
114122
if let Ok(metadata) = symlink_info.0.metadata() {
115-
if metadata.modified().ok() != symlink_info.1
116-
|| metadata.created().ok() != symlink_info.2
123+
if metadata.modified().ok() != Some(symlink_info.1)
124+
|| metadata.created().ok() != Some(symlink_info.2)
117125
{
126+
trace!(
127+
"Symlink {:?} has changed since we last cached it. original mtime & ctime {:?}, {:?}, current mtime & ctime {:?}, {:?}",
128+
symlink_info.0,
129+
symlink_info.1,
130+
symlink_info.2,
131+
metadata.modified().ok(),
132+
metadata.created().ok()
133+
);
118134
self.envoronment.lock().unwrap().take();
135+
if let Some(cache_directory) = &self.cache_directory {
136+
delete_cache_file(cache_directory, &self.executable);
137+
}
119138
}
120139
}
121140
}
@@ -149,11 +168,12 @@ impl CacheEntry for CacheEntryImpl {
149168
let mut symlinks = vec![];
150169
for symlink in environment.symlinks.clone().unwrap_or_default().iter() {
151170
if let Ok(metadata) = symlink.metadata() {
152-
symlinks.push((
153-
symlink.clone(),
154-
metadata.modified().ok(),
155-
metadata.created().ok(),
156-
));
171+
// We only care if we have the information
172+
if let (Some(modified), Some(created)) =
173+
(metadata.modified().ok(), metadata.created().ok())
174+
{
175+
symlinks.push((symlink.clone(), modified, created));
176+
}
157177
}
158178
}
159179

@@ -167,8 +187,9 @@ impl CacheEntry for CacheEntryImpl {
167187
.unwrap()
168188
.replace(environment.clone());
169189

190+
trace!("Caching interpreter info for {:?}", self.executable);
191+
170192
if let Some(ref cache_directory) = self.cache_directory {
171-
trace!("Storing cache for {:?}", self.executable);
172193
store_cache_in_file(cache_directory, &self.executable, &environment, symlinks)
173194
}
174195
}

crates/pet-python-utils/src/fs_cache.rs

Lines changed: 49 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
// Copyright (c) Microsoft Corporation.
22
// Licensed under the MIT License.
33

4-
use log::trace;
4+
use log::{error, trace};
55
use pet_fs::path::norm_case;
66
use serde::{Deserialize, Serialize};
77
use sha2::{Digest, Sha256};
@@ -14,7 +14,7 @@ use std::{
1414

1515
use crate::env::ResolvedPythonEnv;
1616

17-
type FilePathWithMTimeCTime = (PathBuf, Option<SystemTime>, Option<SystemTime>);
17+
type FilePathWithMTimeCTime = (PathBuf, SystemTime, SystemTime);
1818

1919
#[derive(Debug, Clone, Serialize, Deserialize)]
2020
#[serde(rename_all = "camelCase")]
@@ -23,8 +23,13 @@ struct CacheEntry {
2323
pub symlinks: Vec<FilePathWithMTimeCTime>,
2424
}
2525

26-
fn generate_cache_file(cache_directory: &Path, executable: &PathBuf) -> PathBuf {
27-
cache_directory.join(format!("{}.1.json", generate_hash(executable)))
26+
pub fn generate_cache_file(cache_directory: &Path, executable: &PathBuf) -> PathBuf {
27+
cache_directory.join(format!("{}.2.json", generate_hash(executable)))
28+
}
29+
30+
pub fn delete_cache_file(cache_directory: &Path, executable: &PathBuf) {
31+
let cache_file = generate_cache_file(cache_directory, executable);
32+
let _ = fs::remove_file(cache_file);
2833
}
2934

3035
pub fn get_cache_from_file(
@@ -35,11 +40,29 @@ pub fn get_cache_from_file(
3540
let file = File::open(cache_file.clone()).ok()?;
3641
let reader = BufReader::new(file);
3742
let cache: CacheEntry = serde_json::from_reader(reader).ok()?;
43+
// Account for conflicts in the cache file
44+
// i.e. the hash generated is same for another file, remember we only take the first 16 chars.
45+
if !cache
46+
.environment
47+
.clone()
48+
.symlinks
49+
.unwrap_or_default()
50+
.contains(executable)
51+
{
52+
trace!(
53+
"Cache file {:?} {:?}, does not match executable {:?} (possible hash collision)",
54+
cache_file,
55+
cache.environment,
56+
executable
57+
);
58+
return None;
59+
}
3860

3961
// Check if any of the exes have changed since we last cached them.
4062
let cache_is_valid = cache.symlinks.iter().all(|symlink| {
4163
if let Ok(metadata) = symlink.0.metadata() {
42-
metadata.modified().ok() == symlink.1 && metadata.created().ok() == symlink.2
64+
metadata.modified().ok() == Some(symlink.1)
65+
&& metadata.created().ok() == Some(symlink.2)
4366
} else {
4467
// File may have been deleted.
4568
false
@@ -62,15 +85,27 @@ pub fn store_cache_in_file(
6285
symlinks_with_times: Vec<FilePathWithMTimeCTime>,
6386
) {
6487
let cache_file = generate_cache_file(cache_directory, executable);
65-
let _ = std::fs::create_dir_all(cache_directory);
66-
67-
let cache = CacheEntry {
68-
environment: environment.clone(),
69-
symlinks: symlinks_with_times,
70-
};
71-
if let Ok(file) = std::fs::File::create(cache_file.clone()) {
72-
trace!("Caching {:?} in {:?}", executable, cache_file);
73-
let _ = serde_json::to_writer_pretty(file, &cache).ok();
88+
match std::fs::create_dir_all(cache_directory) {
89+
Ok(_) => {
90+
let cache = CacheEntry {
91+
environment: environment.clone(),
92+
symlinks: symlinks_with_times,
93+
};
94+
match std::fs::File::create(cache_file.clone()) {
95+
Ok(file) => {
96+
trace!("Caching {:?} in {:?}", executable, cache_file);
97+
match serde_json::to_writer_pretty(file, &cache) {
98+
Ok(_) => (),
99+
Err(err) => error!("Error writing cache file {:?} {:?}", cache_file, err),
100+
}
101+
}
102+
Err(err) => error!("Error creating cache file {:?} {:?}", cache_file, err),
103+
}
104+
}
105+
Err(err) => error!(
106+
"Error creating cache directory {:?} {:?}",
107+
cache_directory, err
108+
),
74109
}
75110
}
76111

crates/pet-python-utils/src/lib.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
pub mod cache;
55
pub mod env;
66
pub mod executable;
7-
mod fs_cache;
7+
pub mod fs_cache;
88
mod headers;
99
pub mod platform_dirs;
1010
pub mod version;

0 commit comments

Comments
 (0)