object-file: split out functions relating to index subsystem

pks-t · gitster · commit 7498a964f386 · 2025-04-11T07:28:06.000-07:00
Split out functions relating to the index subsystem from "object-file.c"
to help us separate concerns.

Signed-off-by: Patrick Steinhardt &lt;ps@pks.im&gt;
Signed-off-by: Junio C Hamano &lt;gitster@pobox.com&gt;
diff --git a/builtin/difftool.c b/builtin/difftool.c
@@ -22,7 +22,7 @@
 #include "gettext.h"
 #include "hex.h"
 #include "parse-options.h"
-#include "read-cache-ll.h"
+#include "read-cache.h"
 #include "repository.h"
 #include "sparse-index.h"
 #include "strvec.h"
diff --git a/builtin/hash-object.c b/builtin/hash-object.c
@@ -15,6 +15,7 @@
 #include "blob.h"
 #include "quote.h"
 #include "parse-options.h"
+#include "read-cache.h"
 #include "setup.h"
 #include "strbuf.h"
 #include "write-or-die.h"
diff --git a/builtin/replace.c b/builtin/replace.c
@@ -20,6 +20,7 @@
 #include "object-file.h"
 #include "object-name.h"
 #include "object-store-ll.h"
+#include "read-cache.h"
 #include "replace-object.h"
 #include "tag.h"
 #include "wildmatch.h"
diff --git a/bulk-checkin.c b/bulk-checkin.c
@@ -11,6 +11,7 @@
 #include "gettext.h"
 #include "hex.h"
 #include "lockfile.h"
+#include "read-cache.h"
 #include "repository.h"
 #include "csum-file.h"
 #include "pack.h"
diff --git a/diff.c b/diff.c
@@ -42,7 +42,7 @@
 #include "dir.h"
 #include "object-file.h"
 #include "object-name.h"
-#include "read-cache-ll.h"
+#include "read-cache.h"
 #include "setup.h"
 #include "strmap.h"
 #include "ws.h"
diff --git a/notes-merge.c b/notes-merge.c
@@ -10,6 +10,7 @@
 #include "object-name.h"
 #include "object-store-ll.h"
 #include "path.h"
+#include "read-cache.h"
 #include "repository.h"
 #include "diff.h"
 #include "diffcore.h"
diff --git a/object-file.c b/object-file.c
@@ -12,9 +12,7 @@
 
 #include "git-compat-util.h"
 #include "bulk-checkin.h"
-#include "convert.h"
 #include "environment.h"
-#include "fsck.h"
 #include "gettext.h"
 #include "hex.h"
 #include "loose.h"
@@ -25,22 +23,11 @@
 #include "pack.h"
 #include "packfile.h"
 #include "path.h"
-#include "setup.h"
 #include "streaming.h"
 
 /* The maximum size for an object header. */
 #define MAX_HEADER_LEN 32
 
-static int get_conv_flags(unsigned flags)
-{
-	if (flags & INDEX_RENORMALIZE)
-		return CONV_EOL_RENORMALIZE;
-	else if (flags & INDEX_WRITE_OBJECT)
-		return global_conv_flags_eol | CONV_WRITE_OBJECT;
-	else
-		return 0;
-}
-
 static void fill_loose_path(struct strbuf *buf, const struct object_id *oid)
 {
 	int i;
@@ -1225,218 +1212,6 @@ int force_object_loose(const struct object_id *oid, time_t mtime)
 	return ret;
 }
 
-/*
- * We can't use the normal fsck_error_function() for index_mem(),
- * because we don't yet have a valid oid for it to report. Instead,
- * report the minimal fsck error here, and rely on the caller to
- * give more context.
- */
-static int hash_format_check_report(struct fsck_options *opts UNUSED,
-				    void *fsck_report UNUSED,
-				    enum fsck_msg_type msg_type UNUSED,
-				    enum fsck_msg_id msg_id UNUSED,
-				    const char *message)
-{
-	error(_("object fails fsck: %s"), message);
-	return 1;
-}
-
-static int index_mem(struct index_state *istate,
-		     struct object_id *oid,
-		     const void *buf, size_t size,
-		     enum object_type type,
-		     const char *path, unsigned flags)
-{
-	struct strbuf nbuf = STRBUF_INIT;
-	int ret = 0;
-	int write_object = flags & INDEX_WRITE_OBJECT;
-
-	if (!type)
-		type = OBJ_BLOB;
-
-	/*
-	 * Convert blobs to git internal format
-	 */
-	if ((type == OBJ_BLOB) && path) {
-		if (convert_to_git(istate, path, buf, size, &nbuf,
-				   get_conv_flags(flags))) {
-			buf = nbuf.buf;
-			size = nbuf.len;
-		}
-	}
-	if (flags & INDEX_FORMAT_CHECK) {
-		struct fsck_options opts = FSCK_OPTIONS_DEFAULT;
-
-		opts.strict = 1;
-		opts.error_func = hash_format_check_report;
-		if (fsck_buffer(null_oid(the_hash_algo), type, buf, size, &opts))
-			die(_("refusing to create malformed object"));
-		fsck_finish(&opts);
-	}
-
-	if (write_object)
-		ret = write_object_file(buf, size, type, oid);
-	else
-		hash_object_file(the_hash_algo, buf, size, type, oid);
-
-	strbuf_release(&nbuf);
-	return ret;
-}
-
-static int index_stream_convert_blob(struct index_state *istate,
-				     struct object_id *oid,
-				     int fd,
-				     const char *path,
-				     unsigned flags)
-{
-	int ret = 0;
-	const int write_object = flags & INDEX_WRITE_OBJECT;
-	struct strbuf sbuf = STRBUF_INIT;
-
-	assert(path);
-	assert(would_convert_to_git_filter_fd(istate, path));
-
-	convert_to_git_filter_fd(istate, path, fd, &sbuf,
-				 get_conv_flags(flags));
-
-	if (write_object)
-		ret = write_object_file(sbuf.buf, sbuf.len, OBJ_BLOB,
-					oid);
-	else
-		hash_object_file(the_hash_algo, sbuf.buf, sbuf.len, OBJ_BLOB,
-				 oid);
-	strbuf_release(&sbuf);
-	return ret;
-}
-
-static int index_pipe(struct index_state *istate, struct object_id *oid,
-		      int fd, enum object_type type,
-		      const char *path, unsigned flags)
-{
-	struct strbuf sbuf = STRBUF_INIT;
-	int ret;
-
-	if (strbuf_read(&sbuf, fd, 4096) >= 0)
-		ret = index_mem(istate, oid, sbuf.buf, sbuf.len, type, path, flags);
-	else
-		ret = -1;
-	strbuf_release(&sbuf);
-	return ret;
-}
-
-#define SMALL_FILE_SIZE (32*1024)
-
-static int index_core(struct index_state *istate,
-		      struct object_id *oid, int fd, size_t size,
-		      enum object_type type, const char *path,
-		      unsigned flags)
-{
-	int ret;
-
-	if (!size) {
-		ret = index_mem(istate, oid, "", size, type, path, flags);
-	} else if (size <= SMALL_FILE_SIZE) {
-		char *buf = xmalloc(size);
-		ssize_t read_result = read_in_full(fd, buf, size);
-		if (read_result < 0)
-			ret = error_errno(_("read error while indexing %s"),
-					  path ? path : "<unknown>");
-		else if (read_result != size)
-			ret = error(_("short read while indexing %s"),
-				    path ? path : "<unknown>");
-		else
-			ret = index_mem(istate, oid, buf, size, type, path, flags);
-		free(buf);
-	} else {
-		void *buf = xmmap(NULL, size, PROT_READ, MAP_PRIVATE, fd, 0);
-		ret = index_mem(istate, oid, buf, size, type, path, flags);
-		munmap(buf, size);
-	}
-	return ret;
-}
-
-/*
- * This creates one packfile per large blob unless bulk-checkin
- * machinery is "plugged".
- *
- * This also bypasses the usual "convert-to-git" dance, and that is on
- * purpose. We could write a streaming version of the converting
- * functions and insert that before feeding the data to fast-import
- * (or equivalent in-core API described above). However, that is
- * somewhat complicated, as we do not know the size of the filter
- * result, which we need to know beforehand when writing a git object.
- * Since the primary motivation for trying to stream from the working
- * tree file and to avoid mmaping it in core is to deal with large
- * binary blobs, they generally do not want to get any conversion, and
- * callers should avoid this code path when filters are requested.
- */
-static int index_blob_stream(struct object_id *oid, int fd, size_t size,
-			     const char *path,
-			     unsigned flags)
-{
-	return index_blob_bulk_checkin(oid, fd, size, path, flags);
-}
-
-int index_fd(struct index_state *istate, struct object_id *oid,
-	     int fd, struct stat *st,
-	     enum object_type type, const char *path, unsigned flags)
-{
-	int ret;
-
-	/*
-	 * Call xsize_t() only when needed to avoid potentially unnecessary
-	 * die() for large files.
-	 */
-	if (type == OBJ_BLOB && path && would_convert_to_git_filter_fd(istate, path))
-		ret = index_stream_convert_blob(istate, oid, fd, path, flags);
-	else if (!S_ISREG(st->st_mode))
-		ret = index_pipe(istate, oid, fd, type, path, flags);
-	else if (st->st_size <= repo_settings_get_big_file_threshold(the_repository) ||
-		 type != OBJ_BLOB ||
-		 (path && would_convert_to_git(istate, path)))
-		ret = index_core(istate, oid, fd, xsize_t(st->st_size),
-				 type, path, flags);
-	else
-		ret = index_blob_stream(oid, fd, xsize_t(st->st_size), path,
-					flags);
-	close(fd);
-	return ret;
-}
-
-int index_path(struct index_state *istate, struct object_id *oid,
-	       const char *path, struct stat *st, unsigned flags)
-{
-	int fd;
-	struct strbuf sb = STRBUF_INIT;
-	int rc = 0;
-
-	switch (st->st_mode & S_IFMT) {
-	case S_IFREG:
-		fd = open(path, O_RDONLY);
-		if (fd < 0)
-			return error_errno("open(\"%s\")", path);
-		if (index_fd(istate, oid, fd, st, OBJ_BLOB, path, flags) < 0)
-			return error(_("%s: failed to insert into database"),
-				     path);
-		break;
-	case S_IFLNK:
-		if (strbuf_readlink(&sb, path, st->st_size))
-			return error_errno("readlink(\"%s\")", path);
-		if (!(flags & INDEX_WRITE_OBJECT))
-			hash_object_file(the_hash_algo, sb.buf, sb.len,
-					 OBJ_BLOB, oid);
-		else if (write_object_file(sb.buf, sb.len, OBJ_BLOB, oid))
-			rc = error(_("%s: failed to insert into database"), path);
-		strbuf_release(&sb);
-		break;
-	case S_IFDIR:
-		return repo_resolve_gitlink_ref(the_repository, path, "HEAD", oid);
-	default:
-		return error(_("%s: unsupported file type"), path);
-	}
-	return rc;
-}
-
 int read_pack_header(int fd, struct pack_header *header)
 {
 	if (read_in_full(fd, header, sizeof(*header)) != sizeof(*header))
diff --git a/object-file.h b/object-file.h
@@ -4,8 +4,6 @@
 #include "git-zlib.h"
 #include "object.h"
 
-struct index_state;
-
 /*
  * Set this to 0 to prevent oid_object_info_extended() from fetching missing
  * blobs. This has a difference only if extensions.partialClone is set.
@@ -14,15 +12,6 @@ struct index_state;
  */
 extern int fetch_if_missing;
 
-enum {
-	INDEX_WRITE_OBJECT = (1 << 0),
-	INDEX_FORMAT_CHECK = (1 << 1),
-	INDEX_RENORMALIZE  = (1 << 2),
-};
-
-int index_fd(struct index_state *istate, struct object_id *oid, int fd, struct stat *st, enum object_type type, const char *path, unsigned flags);
-int index_path(struct index_state *istate, struct object_id *oid, const char *path, struct stat *st, unsigned flags);
-
 struct object_directory;
 
 const char *odb_loose_path(struct object_directory *odb,
diff --git a/read-cache.c b/read-cache.c
diff --git a/read-cache.h b/read-cache.h