Skip to content

Commit e36442c

Browse files
author
Malcolm Greaves
committed
wip oxen df add-image
1 parent edaa18f commit e36442c

File tree

6 files changed

+925
-11
lines changed

6 files changed

+925
-11
lines changed

oxen-rust/src/cli/src/cmd/df.rs

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,9 @@ use liboxen::model::LocalRepository;
99
use liboxen::util::fs;
1010

1111
use crate::cmd::RunCmd;
12+
13+
pub mod add_image;
14+
1215
pub const NAME: &str = "df";
1316
pub struct DFCmd;
1417

@@ -20,10 +23,14 @@ impl RunCmd for DFCmd {
2023

2124
fn args(&self) -> Command {
2225
// Setups the CLI args for the command
26+
let add_image_cmd = add_image::AddImageCmd;
2327
Command::new(NAME)
2428
.about("View and transform data frames. Supported types: csv, tsv, ndjson, jsonl, parquet.")
2529
.arg(arg!(<PATH> ... "The DataFrame you want to process. If in the schema subcommand the schema ref."))
2630
.arg_required_else_help(true)
31+
.subcommand_negates_reqs(true)
32+
.args_conflicts_with_subcommands(true)
33+
.subcommand(add_image_cmd.args())
2734
.arg(
2835
Arg::new("write")
2936
.long("write")
@@ -240,6 +247,21 @@ impl RunCmd for DFCmd {
240247
}
241248

242249
async fn run(&self, args: &clap::ArgMatches) -> Result<(), OxenError> {
250+
// Check for subcommands first
251+
if let Some((name, sub_matches)) = args.subcommand() {
252+
match name {
253+
add_image::NAME => {
254+
let cmd = add_image::AddImageCmd;
255+
return cmd.run(sub_matches).await;
256+
}
257+
_ => {
258+
return Err(OxenError::basic_str(format!(
259+
"Unknown df subcommand: {name}"
260+
)));
261+
}
262+
}
263+
}
264+
243265
// Parse Args
244266
let mut opts = DFCmd::parse_df_args(args);
245267
let Some(path) = args.get_one::<String>("PATH") else {
Lines changed: 131 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,131 @@
1+
use std::path::PathBuf;
2+
3+
use async_trait::async_trait;
4+
use clap::{Arg, Command};
5+
6+
use liboxen::command;
7+
use liboxen::error::OxenError;
8+
use liboxen::model::LocalRepository;
9+
10+
use crate::cmd::RunCmd;
11+
12+
pub const NAME: &str = "add-image";
13+
14+
pub struct AddImageCmd;
15+
16+
#[async_trait]
17+
impl RunCmd for AddImageCmd {
18+
fn name(&self) -> &str {
19+
NAME
20+
}
21+
22+
fn args(&self) -> Command {
23+
Command::new(NAME)
24+
.about("Add image(s) to a data frame, copying external images into the repo and staging all files.")
25+
.arg(
26+
Arg::new("IMAGE_PATH")
27+
.help("Path(s) to image file(s) to add")
28+
.required(true)
29+
.num_args(1..),
30+
)
31+
.arg(
32+
Arg::new("file")
33+
.long("file")
34+
.short('f')
35+
.help("The data frame file to add the image path(s) to")
36+
.required(true),
37+
)
38+
.arg(
39+
Arg::new("dest")
40+
.long("dest")
41+
.help("Destination directory or path in the repo for external images"),
42+
)
43+
.arg(
44+
Arg::new("extension")
45+
.long("extension")
46+
.help("Override the data frame format (e.g. csv, tsv, parquet)"),
47+
)
48+
}
49+
50+
async fn run(&self, args: &clap::ArgMatches) -> Result<(), OxenError> {
51+
let repo = LocalRepository::from_current_dir()?;
52+
53+
let current_dir = std::env::current_dir()
54+
.map_err(|e| OxenError::basic_str(format!("Failed to get current directory: {e}")))?;
55+
56+
// Collect image paths, resolving relative to CWD
57+
let image_paths: Vec<PathBuf> = args
58+
.get_many::<String>("IMAGE_PATH")
59+
.ok_or_else(|| OxenError::basic_str("At least one IMAGE_PATH is required"))?
60+
.map(|p| {
61+
let path = PathBuf::from(p);
62+
if path.is_absolute() {
63+
path
64+
} else {
65+
current_dir.join(path)
66+
}
67+
})
68+
.collect();
69+
70+
// Get df path relative to repo root
71+
let df_arg = args
72+
.get_one::<String>("file")
73+
.ok_or_else(|| OxenError::basic_str("--file is required"))?;
74+
let df_path = PathBuf::from(df_arg);
75+
let df_repo_relative = if df_path.is_absolute() {
76+
liboxen::util::fs::path_relative_to_dir(&df_path, &repo.path)?
77+
} else {
78+
// Resolve relative to CWD, then make repo-relative
79+
let abs_df = current_dir.join(&df_path);
80+
// If the file doesn't exist yet, we can't canonicalize.
81+
// Just compute the relative path.
82+
if abs_df.exists() {
83+
let canonical = abs_df.canonicalize().map_err(|e| {
84+
OxenError::basic_str(format!("Could not canonicalize {abs_df:?}: {e}"))
85+
})?;
86+
let repo_canonical = repo.path.canonicalize().map_err(|e| {
87+
OxenError::basic_str(format!(
88+
"Could not canonicalize repo path {:?}: {e}",
89+
repo.path
90+
))
91+
})?;
92+
liboxen::util::fs::path_relative_to_dir(&canonical, &repo_canonical)?
93+
} else {
94+
let repo_canonical = repo.path.canonicalize().map_err(|e| {
95+
OxenError::basic_str(format!(
96+
"Could not canonicalize repo path {:?}: {e}",
97+
repo.path
98+
))
99+
})?;
100+
let abs_cwd_canonical = current_dir.canonicalize().map_err(|e| {
101+
OxenError::basic_str(format!("Could not canonicalize CWD: {e}"))
102+
})?;
103+
let cwd_relative =
104+
liboxen::util::fs::path_relative_to_dir(&abs_cwd_canonical, &repo_canonical)?;
105+
cwd_relative.join(&df_path)
106+
}
107+
};
108+
109+
let dest = args.get_one::<String>("dest").map(PathBuf::from);
110+
let dest_ref = dest.as_deref();
111+
112+
let extension_override = args.get_one::<String>("extension").map(|s| s.as_str());
113+
114+
let result = command::df::add_images(
115+
&repo,
116+
&df_repo_relative,
117+
&image_paths,
118+
dest_ref,
119+
extension_override,
120+
)
121+
.await?;
122+
123+
println!(
124+
"Added {} image(s) to data frame '{}'",
125+
result.len(),
126+
df_repo_relative.display()
127+
);
128+
129+
Ok(())
130+
}
131+
}

oxen-rust/src/lib/src/command/df.rs

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
//! Interact with DataFrames
44
//!
55
6-
use std::path::Path;
6+
use std::path::{Path, PathBuf};
77

88
use crate::core::df::tabular;
99
use crate::error::OxenError;
@@ -74,6 +74,24 @@ pub async fn add_row(path: &Path, data: &str) -> Result<(), OxenError> {
7474
}
7575
}
7676

77+
/// Add images to a dataframe, copying external images into the repo and staging all files.
78+
pub async fn add_images(
79+
repo: &LocalRepository,
80+
df_path: &Path,
81+
image_paths: &[PathBuf],
82+
dest: Option<&Path>,
83+
extension_override: Option<&str>,
84+
) -> Result<Vec<PathBuf>, OxenError> {
85+
repositories::data_frames::images::add_images_to_df(
86+
repo,
87+
df_path,
88+
image_paths,
89+
dest,
90+
extension_override,
91+
)
92+
.await
93+
}
94+
7795
/// Add a column to a dataframe
7896
pub async fn add_column(path: &Path, data: &str) -> Result<(), OxenError> {
7997
if util::fs::is_tabular(path) {

oxen-rust/src/lib/src/core/df/tabular.rs

Lines changed: 20 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1427,20 +1427,30 @@ pub fn write_df(df: &mut DataFrame, path: impl AsRef<Path>) -> Result<(), OxenEr
14271427
let err = format!("Unknown file type write_df {path:?} {extension:?}");
14281428

14291429
match extension {
1430-
Some(extension) => match extension {
1431-
"ndjson" => write_df_jsonl(df, path),
1432-
"jsonl" => write_df_jsonl(df, path),
1433-
"json" => write_df_json(df, path),
1434-
"tsv" => write_df_csv(df, path, b'\t'),
1435-
"csv" => write_df_csv(df, path, b','),
1436-
"parquet" => write_df_parquet(df, path),
1437-
"arrow" => write_df_arrow(df, path),
1438-
_ => Err(OxenError::basic_str(err)),
1439-
},
1430+
Some(ext) => write_df_with_ext(df, path, ext),
14401431
None => Err(OxenError::basic_str(err)),
14411432
}
14421433
}
14431434

1435+
pub fn write_df_with_ext(
1436+
df: &mut DataFrame,
1437+
path: impl AsRef<Path>,
1438+
ext: &str,
1439+
) -> Result<(), OxenError> {
1440+
let path = path.as_ref();
1441+
match ext {
1442+
"ndjson" | "jsonl" => write_df_jsonl(df, path),
1443+
"json" => write_df_json(df, path),
1444+
"tsv" => write_df_csv(df, path, b'\t'),
1445+
"csv" => write_df_csv(df, path, b','),
1446+
"parquet" => write_df_parquet(df, path),
1447+
"arrow" => write_df_arrow(df, path),
1448+
_ => Err(OxenError::basic_str(format!(
1449+
"Unknown file type write_df_with_ext {path:?} {ext:?}"
1450+
))),
1451+
}
1452+
}
1453+
14441454
pub async fn copy_df(
14451455
input: impl AsRef<Path>,
14461456
output: impl AsRef<Path>,

oxen-rust/src/lib/src/repositories/data_frames.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ use crate::opts::DFOpts;
77

88
use std::path::Path;
99

10+
pub mod images;
1011
pub mod schemas;
1112

1213
pub async fn get_slice(

0 commit comments

Comments
 (0)