Skip to content

Commit 6a2d3ee

Browse files
Add working directory and staging area support (#11)
* Add working directory and staging area support with pseudo-refs Implement WORKDIR/WORKTREE and STAGED/INDEX pseudo-refs across the extension, enabling queries against uncommitted changes: - New git_status() table function for querying working tree status - RefKind enum in GitContextManager to distinguish commit/workdir/index refs - git_read() support for reading files from disk (@workdir) or index (@StaGeD) - git_tree() support for listing files from workdir/index with untracked param - git:// filesystem protocol support for read_text/read_csv with pseudo-refs - @Shorthand syntax (e.g. git_tree('@workdir')) for convenience Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * Address PR review feedback for dirty checks support - Fix staged/unstaged semantics: use flag-based checks instead of pointer presence to avoid marking untracked files as unstaged - Add path traversal protection via SafeWorkdirPath/GetWorkdirRoot using realpath canonicalization - Check git_index_read return values at all 5 callsites - Fix LATERAL git_status_each to return NEED_MORE_INPUT on zero rows - Use GIT_STATUS_OPT_DISABLE_PATHSPEC_MATCH for exact path filtering - Add binary detection for untracked files in git_tree via NUL byte sampling Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * Fix code formatting to pass CI format check Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> --------- Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
1 parent 0bdd2cd commit 6a2d3ee

15 files changed

+1453
-39
lines changed

CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ set(LOADABLE_EXTENSION_NAME ${TARGET_NAME}_loadable_extension)
1515
project(${TARGET_NAME})
1616
include_directories(src/include)
1717

18-
set(EXTENSION_SOURCES src/duck_tails_extension.cpp src/git_filesystem.cpp src/git_functions.cpp src/git_log.cpp src/git_path.cpp src/git_utils.cpp src/git_context_manager.cpp src/git_tree.cpp src/git_parents.cpp src/git_branches.cpp src/git_tags.cpp src/git_read.cpp src/git_uri.cpp src/text_diff.cpp src/git_history.cpp)
18+
set(EXTENSION_SOURCES src/duck_tails_extension.cpp src/git_filesystem.cpp src/git_functions.cpp src/git_log.cpp src/git_path.cpp src/git_utils.cpp src/git_context_manager.cpp src/git_tree.cpp src/git_parents.cpp src/git_branches.cpp src/git_tags.cpp src/git_read.cpp src/git_uri.cpp src/text_diff.cpp src/git_history.cpp src/git_status.cpp)
1919

2020
build_static_extension(${TARGET_NAME} ${EXTENSION_SOURCES})
2121
build_loadable_extension(${TARGET_NAME} " " ${EXTENSION_SOURCES})

src/git_context_manager.cpp

Lines changed: 22 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,13 +8,14 @@ namespace duckdb {
88
// GitContext Implementation
99
//===--------------------------------------------------------------------===//
1010

11-
GitContextManager::GitContext::GitContext(git_object *obj, const string &rp, const string &fp, const string &ref)
12-
: resolved_object(obj), repo_path(rp), file_path(fp), final_ref(ref) {
11+
GitContextManager::GitContext::GitContext(git_object *obj, const string &rp, const string &fp, const string &ref,
12+
RefKind kind)
13+
: resolved_object(obj), repo_path(rp), file_path(fp), final_ref(ref), ref_kind(kind) {
1314
}
1415

1516
GitContextManager::GitContext::GitContext(GitContext &&other) noexcept
1617
: resolved_object(other.resolved_object), repo_path(std::move(other.repo_path)),
17-
file_path(std::move(other.file_path)), final_ref(std::move(other.final_ref)) {
18+
file_path(std::move(other.file_path)), final_ref(std::move(other.final_ref)), ref_kind(other.ref_kind) {
1819
other.resolved_object = nullptr;
1920
}
2021

@@ -40,6 +41,11 @@ GitContextManager::GitContext GitContextManager::ProcessGitUri(const string &uri
4041
try {
4142
if (StringUtil::StartsWith(uri_or_path, "git://")) {
4243
git_path = GitPath::Parse(uri_or_path);
44+
} else if (StringUtil::StartsWith(uri_or_path, "@")) {
45+
// Shorthand: @WORKDIR, @STAGED, etc. → current dir with pseudo-ref
46+
string ref_part = uri_or_path.substr(1);
47+
string constructed_uri = "git://.@" + ref_part;
48+
git_path = GitPath::Parse(constructed_uri);
4349
} else {
4450
// Filesystem path - construct git URI for uniform processing
4551
string constructed_uri = "git://" + uri_or_path + "@" + fallback_ref;
@@ -49,11 +55,22 @@ GitContextManager::GitContext GitContextManager::ProcessGitUri(const string &uri
4955
throw IOException("GitContextManager: Failed to parse URI '%s': %s", uri_or_path, e.what());
5056
}
5157

52-
// Phase 2: Reference Resolution (opens repo temporarily, validates, then closes)
58+
// Phase 2: Reference Resolution
5359
string final_ref = git_path.revision.empty() ? fallback_ref : git_path.revision;
60+
61+
// Check for pseudo-refs (WORKDIR/WORKTREE, STAGED/INDEX) before git_revparse
62+
string upper_ref = StringUtil::Upper(final_ref);
63+
if (upper_ref == "WORKDIR" || upper_ref == "WORKTREE") {
64+
return GitContext(nullptr, git_path.repository_path, git_path.file_path, final_ref, RefKind::WORKDIR);
65+
}
66+
if (upper_ref == "STAGED" || upper_ref == "INDEX") {
67+
return GitContext(nullptr, git_path.repository_path, git_path.file_path, final_ref, RefKind::INDEX);
68+
}
69+
70+
// Normal commit ref: opens repo temporarily, validates, then closes
5471
git_object *resolved_object = ValidateAndResolveReference(git_path.repository_path, final_ref);
5572

56-
return GitContext(resolved_object, git_path.repository_path, git_path.file_path, final_ref);
73+
return GitContext(resolved_object, git_path.repository_path, git_path.file_path, final_ref, RefKind::COMMIT);
5774
}
5875

5976
git_object *GitContextManager::ValidateAndResolveReference(const string &repo_path, const string &ref) {

src/git_filesystem.cpp

Lines changed: 193 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
11
#include "git_filesystem.hpp"
2+
#include "git_context_manager.hpp"
3+
#include "git_utils.hpp"
24
#include "duckdb/common/string_util.hpp"
35
#include "duckdb/common/exception.hpp"
46
#include "duckdb/common/local_file_system.hpp"
@@ -216,6 +218,70 @@ bool GitFileSystem::CanHandleFile(const string &fpath) {
216218
return StringUtil::StartsWith(fpath, "git://");
217219
}
218220

221+
// Check if a revision string is a pseudo-ref
222+
static bool IsPseudoRef(const string &revision, RefKind &out_kind) {
223+
string upper = StringUtil::Upper(revision);
224+
if (upper == "WORKDIR" || upper == "WORKTREE") {
225+
out_kind = RefKind::WORKDIR;
226+
return true;
227+
}
228+
if (upper == "STAGED" || upper == "INDEX") {
229+
out_kind = RefKind::INDEX;
230+
return true;
231+
}
232+
return false;
233+
}
234+
235+
// Safe workdir path — delegates to shared SafeWorkdirPath for traversal protection
236+
// For Glob which needs the workdir root separately, use GetWorkdirRoot
237+
238+
// Get blob content from the git index
239+
static string GetIndexBlobContent(const string &repo_path, const string &file_path) {
240+
git_repository *repo = nullptr;
241+
int error = git_repository_open_ext(&repo, repo_path.c_str(), GIT_REPOSITORY_OPEN_NO_SEARCH, nullptr);
242+
if (error != 0) {
243+
throw IOException("Failed to open repository '%s'", repo_path);
244+
}
245+
246+
git_index *index = nullptr;
247+
error = git_repository_index(&index, repo);
248+
if (error != 0) {
249+
git_repository_free(repo);
250+
throw IOException("Failed to get index for repository '%s'", repo_path);
251+
}
252+
253+
error = git_index_read(index, 0);
254+
if (error != 0) {
255+
git_index_free(index);
256+
git_repository_free(repo);
257+
throw IOException("Failed to read index for repository '%s'", repo_path);
258+
}
259+
260+
const git_index_entry *entry = git_index_get_bypath(index, file_path.c_str(), 0);
261+
if (!entry) {
262+
git_index_free(index);
263+
git_repository_free(repo);
264+
throw IOException("File '%s' not found in staging area", file_path);
265+
}
266+
267+
git_blob *blob = nullptr;
268+
error = git_blob_lookup(&blob, repo, &entry->id);
269+
if (error != 0) {
270+
git_index_free(index);
271+
git_repository_free(repo);
272+
throw IOException("Failed to load blob from index for '%s'", file_path);
273+
}
274+
275+
const void *content = git_blob_rawcontent(blob);
276+
git_off_t size = git_blob_rawsize(blob);
277+
string result(static_cast<const char *>(content), size);
278+
279+
git_blob_free(blob);
280+
git_index_free(index);
281+
git_repository_free(repo);
282+
return result;
283+
}
284+
219285
unique_ptr<FileHandle> GitFileSystem::OpenFile(const string &path, FileOpenFlags flags,
220286
optional_ptr<FileOpener> opener) {
221287
if (flags.OpenForWriting()) {
@@ -224,6 +290,30 @@ unique_ptr<FileHandle> GitFileSystem::OpenFile(const string &path, FileOpenFlags
224290

225291
try {
226292
auto git_path = GitPath::Parse(path);
293+
294+
// Check for pseudo-refs
295+
RefKind ref_kind;
296+
if (IsPseudoRef(git_path.revision, ref_kind)) {
297+
if (ref_kind == RefKind::WORKDIR) {
298+
// Delegate to LocalFileSystem
299+
string abs_path = SafeWorkdirPath(git_path.repository_path, git_path.file_path);
300+
LocalFileSystem local_fs;
301+
auto local_handle = local_fs.OpenFile(abs_path, flags, opener);
302+
int64_t file_size = local_fs.GetFileSize(*local_handle);
303+
auto content = make_shared_ptr<string>();
304+
content->resize(static_cast<size_t>(file_size));
305+
if (file_size > 0) {
306+
local_fs.Read(*local_handle, const_cast<char *>(content->data()), file_size);
307+
}
308+
return make_uniq<GitFileHandle>(*this, path, content, flags);
309+
} else {
310+
// INDEX: read from staging area
311+
auto content = GetIndexBlobContent(git_path.repository_path, git_path.file_path);
312+
auto content_ptr = make_shared_ptr<string>(std::move(content));
313+
return make_uniq<GitFileHandle>(*this, path, content_ptr, flags);
314+
}
315+
}
316+
227317
try {
228318
auto repo = OpenRepository(git_path.repository_path);
229319
auto commit_obj = ResolveRevision(repo, git_path.revision);
@@ -253,6 +343,68 @@ vector<OpenFileInfo> GitFileSystem::Glob(const string &pattern, FileOpener *open
253343
try {
254344
auto git_path = GitPath::Parse(pattern);
255345

346+
RefKind ref_kind;
347+
if (IsPseudoRef(git_path.revision, ref_kind)) {
348+
vector<OpenFileInfo> results;
349+
if (ref_kind == RefKind::WORKDIR) {
350+
// Delegate glob to local filesystem within workdir
351+
try {
352+
string workdir_root = GetWorkdirRoot(git_path.repository_path);
353+
string abs_pattern = workdir_root + git_path.file_path;
354+
LocalFileSystem local_fs;
355+
auto local_results = local_fs.Glob(abs_pattern, opener);
356+
// Convert back to git:// URIs
357+
string workdir_prefix = workdir_root;
358+
for (auto &info : local_results) {
359+
string rel_path = info.path;
360+
if (StringUtil::StartsWith(rel_path, workdir_prefix)) {
361+
rel_path = rel_path.substr(workdir_prefix.length());
362+
}
363+
results.emplace_back(
364+
OpenFileInfo {"git://" + git_path.repository_path + "/" + rel_path + "@WORKDIR"});
365+
}
366+
} catch (...) {
367+
// Return empty on error
368+
}
369+
} else {
370+
// INDEX: enumerate index entries matching pattern
371+
try {
372+
git_repository *repo_ptr = nullptr;
373+
int error = git_repository_open_ext(&repo_ptr, git_path.repository_path.c_str(),
374+
GIT_REPOSITORY_OPEN_NO_SEARCH, nullptr);
375+
if (error == 0) {
376+
git_index *index = nullptr;
377+
error = git_repository_index(&index, repo_ptr);
378+
if (error == 0) {
379+
if (git_index_read(index, 0) != 0) {
380+
git_index_free(index);
381+
git_repository_free(repo_ptr);
382+
return results;
383+
}
384+
size_t entry_count = git_index_entrycount(index);
385+
for (size_t i = 0; i < entry_count; i++) {
386+
const git_index_entry *entry = git_index_get_byindex(index, i);
387+
if (entry && entry->path) {
388+
string entry_path(entry->path);
389+
// Simple prefix match for now
390+
if (git_path.file_path.empty() ||
391+
StringUtil::StartsWith(entry_path, git_path.file_path)) {
392+
results.emplace_back(OpenFileInfo {"git://" + git_path.repository_path + "/" +
393+
entry_path + "@STAGED"});
394+
}
395+
}
396+
}
397+
git_index_free(index);
398+
}
399+
git_repository_free(repo_ptr);
400+
}
401+
} catch (...) {
402+
// Return empty on error
403+
}
404+
}
405+
return results;
406+
}
407+
256408
try {
257409
auto repo = OpenRepository(git_path.repository_path);
258410
auto commit_obj = ResolveRevision(repo, git_path.revision);
@@ -275,6 +427,47 @@ bool GitFileSystem::FileExists(const string &filename, optional_ptr<FileOpener>
275427
try {
276428
auto git_path = GitPath::Parse(filename);
277429

430+
RefKind ref_kind;
431+
if (IsPseudoRef(git_path.revision, ref_kind)) {
432+
if (ref_kind == RefKind::WORKDIR) {
433+
try {
434+
string abs_path = SafeWorkdirPath(git_path.repository_path, git_path.file_path);
435+
LocalFileSystem local_fs;
436+
return local_fs.FileExists(abs_path);
437+
} catch (...) {
438+
return false;
439+
}
440+
} else {
441+
// INDEX: check via git_index_get_bypath
442+
try {
443+
git_repository *repo = nullptr;
444+
int error = git_repository_open_ext(&repo, git_path.repository_path.c_str(),
445+
GIT_REPOSITORY_OPEN_NO_SEARCH, nullptr);
446+
if (error != 0) {
447+
return false;
448+
}
449+
git_index *index = nullptr;
450+
error = git_repository_index(&index, repo);
451+
if (error != 0) {
452+
git_repository_free(repo);
453+
return false;
454+
}
455+
if (git_index_read(index, 0) != 0) {
456+
git_index_free(index);
457+
git_repository_free(repo);
458+
return false;
459+
}
460+
const git_index_entry *entry = git_index_get_bypath(index, git_path.file_path.c_str(), 0);
461+
bool exists = (entry != nullptr);
462+
git_index_free(index);
463+
git_repository_free(repo);
464+
return exists;
465+
} catch (...) {
466+
return false;
467+
}
468+
}
469+
}
470+
278471
try {
279472
auto repo = OpenRepository(git_path.repository_path);
280473
auto commit_obj = ResolveRevision(repo, git_path.revision);
@@ -287,8 +480,6 @@ bool GitFileSystem::FileExists(const string &filename, optional_ptr<FileOpener>
287480
return false;
288481
}
289482
} catch (const IOException &e) {
290-
// For repository discovery errors, we still return false for FileExists
291-
// but this maintains consistency with other methods
292483
return false;
293484
}
294485
}

src/git_functions.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ void RegisterGitTreeFunction(ExtensionLoader &loader);
2727
void RegisterGitParentsFunction(ExtensionLoader &loader);
2828
void RegisterGitReadFunction(ExtensionLoader &loader);
2929
void RegisterGitUriFunction(ExtensionLoader &loader);
30+
void RegisterGitStatusFunction(ExtensionLoader &loader);
3031

3132
void RegisterGitFunctions(ExtensionLoader &loader) {
3233
RegisterGitLogFunction(loader);
@@ -36,6 +37,7 @@ void RegisterGitFunctions(ExtensionLoader &loader) {
3637
RegisterGitParentsFunction(loader);
3738
RegisterGitReadFunction(loader);
3839
RegisterGitUriFunction(loader);
40+
RegisterGitStatusFunction(loader);
3941
}
4042

4143
} // namespace duckdb

0 commit comments

Comments
 (0)