Skip to content

Commit 82a3462

Browse files
authored
Merge pull request #26 from edgett/feature/configurable-root-path
feat: configurable root path for read-only environments
2 parents 78cfad1 + 87b071a commit 82a3462

File tree

6 files changed

+96
-6
lines changed

6 files changed

+96
-6
lines changed

AGENTS.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ If I tell you to remember something, you do the same, update
4646
- Multi-page tables must emit `<!-- Table spans pages X-Y -->` comments, continuation markers for each affected page, and populate `table.pageStart`, `table.pageEnd`, and `table.pageRange` metadata so downstream systems can align tables with their source pages.
4747
- PDF converters must honour `SegmentOptions.Pdf.TreatPagesAsImages`, rendering each page to PNG, running OCR/vision enrichment, and composing page segments with image placeholders plus recognized text whenever the option is enabled.
4848
- Persist conversion workspaces through `ManagedCode.Storage` by allocating a unique, sanitized folder per document, copy the source file, store every extracted artifact via `IStorage`, and emit the final Markdown into the same folder.
49+
- Root path configurability: `MarkItDownPathResolver` must support a configurable root via `MarkItDownOptions.RootPath` (non-DI) or `MarkItDownServiceBuilder.UseRootPath()` (DI); the resolver uses a lock-guarded double-check (not `Lazy<string>`) so `Configure()` and first access are atomic, and conflicting paths throw `InvalidOperationException` instead of being silently ignored.
4950

5051
# Repository Guidelines
5152

Directory.Build.props

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,8 +22,8 @@
2222
<PackageLicenseExpression>MIT</PackageLicenseExpression>
2323
<PackageReadmeFile>README.md</PackageReadmeFile>
2424
<Product>Managed Code - MarkItDown</Product>
25-
<Version>10.0.1</Version>
26-
<PackageVersion>10.0.1</PackageVersion>
25+
<Version>10.0.2</Version>
26+
<PackageVersion>10.0.2</PackageVersion>
2727
</PropertyGroup>
2828

2929
<PropertyGroup Condition="'$(GITHUB_ACTIONS)' == 'true'">

src/MarkItDown/Core/MarkItDownClient.cs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,13 @@ public MarkItDownClient(ILogger? logger = null, HttpClient? httpClient = null)
6666
public MarkItDownClient(MarkItDownOptions? options, ILogger? logger = null, HttpClient? httpClient = null)
6767
{
6868
_options = options ?? new MarkItDownOptions();
69+
70+
// Configure path resolver before anything materialises the root.
71+
if (!string.IsNullOrWhiteSpace(_options.RootPath))
72+
{
73+
MarkItDownPathResolver.Configure(_options.RootPath);
74+
}
75+
6976
_logger = logger ?? _options.LoggerFactory?.CreateLogger<MarkItDownClient>();
7077
_httpClient = httpClient;
7178
_converters = [];

src/MarkItDown/Core/MarkItDownOptions.cs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,13 @@ namespace MarkItDown;
1616
/// </summary>
1717
public sealed record MarkItDownOptions
1818
{
19+
/// <summary>
20+
/// Optional root directory for all MarkItDown workspaces and buffers.
21+
/// Defaults to <c>.markitdown</c> under <see cref="Environment.CurrentDirectory"/>.
22+
/// Set to a writable path in read-only environments (e.g. Azure Functions temp).
23+
/// </summary>
24+
public string? RootPath { get; set; }
25+
1926
/// <summary>
2027
/// Gets or sets a value indicating whether built-in converters should be registered. Defaults to <see langword="true"/>.
2128
/// </summary>

src/MarkItDown/DependencyInjection/MarkItDownServiceBuilder.cs

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,17 @@ public MarkItDownServiceBuilder AddConverter<TConverter>(Func<IServiceProvider,
4747
return this;
4848
}
4949

50+
/// <summary>
51+
/// Sets the root directory for MarkItDown disk workspaces and buffers.
52+
/// Call this in <c>Program.cs</c> for read-only environments like Azure Functions.
53+
/// </summary>
54+
public MarkItDownServiceBuilder UseRootPath(string rootPath)
55+
{
56+
ArgumentException.ThrowIfNullOrWhiteSpace(rootPath);
57+
Services.Configure<MarkItDownOptions>(o => o.RootPath = rootPath);
58+
return this;
59+
}
60+
5061
/// <summary>
5162
/// Registers a conversion middleware component that will be included in the pipeline.
5263
/// </summary>

src/MarkItDown/Utilities/MarkItDownPathResolver.cs

Lines changed: 68 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,19 +8,74 @@ namespace MarkItDown;
88
/// </summary>
99
internal static class MarkItDownPathResolver
1010
{
11-
private static readonly Lazy<string> root = new(CreateRootPath, isThreadSafe: true);
11+
private static readonly object _gate = new();
12+
private static string? _configuredRootPath;
13+
private static string? _resolvedRootPath;
1214

1315
/// <summary>
1416
/// Gets the absolute root directory used for MarkItDown workspaces.
17+
/// Thread-safe; the value is resolved on first access and cached.
1518
/// </summary>
16-
public static string RootPath => root.Value;
19+
public static string RootPath
20+
{
21+
get
22+
{
23+
if (_resolvedRootPath is not null)
24+
{
25+
return _resolvedRootPath;
26+
}
27+
28+
lock (_gate)
29+
{
30+
_resolvedRootPath ??= CreateRootPath();
31+
return _resolvedRootPath;
32+
}
33+
}
34+
}
35+
36+
/// <summary>
37+
/// Override the default root directory.
38+
/// Must be called before any code accesses <see cref="RootPath"/> (typically
39+
/// by setting <c>MarkItDownOptions.RootPath</c> before constructing a client).
40+
/// Throws if the root has already resolved to a different path.
41+
/// </summary>
42+
internal static void Configure(string rootPath)
43+
{
44+
ArgumentException.ThrowIfNullOrWhiteSpace(rootPath);
45+
46+
var normalized = Path.GetFullPath(rootPath);
47+
48+
lock (_gate)
49+
{
50+
// Already resolved -- only allow if it matches.
51+
if (_resolvedRootPath is not null)
52+
{
53+
if (!PathEquals(_resolvedRootPath, normalized))
54+
{
55+
throw new InvalidOperationException(
56+
$"Root already resolved to '{_resolvedRootPath}'; cannot change to '{normalized}'.");
57+
}
58+
59+
return;
60+
}
61+
62+
// Not yet resolved -- only allow if no prior Configure set a different path.
63+
if (_configuredRootPath is not null && !PathEquals(_configuredRootPath, normalized))
64+
{
65+
throw new InvalidOperationException(
66+
$"Root already configured as '{_configuredRootPath}'; cannot change to '{normalized}'.");
67+
}
68+
69+
_configuredRootPath = normalized;
70+
}
71+
}
1772

1873
/// <summary>
1974
/// Ensure the root directory exists (also invoked by lazy initialization).
2075
/// </summary>
2176
public static void EnsureRootExists()
2277
{
23-
_ = root.Value;
78+
_ = RootPath;
2479
}
2580

2681
/// <summary>
@@ -56,8 +111,17 @@ public static string Ensure(params string[] segments)
56111

57112
private static string CreateRootPath()
58113
{
59-
var candidate = Path.Combine(Environment.CurrentDirectory, ".markitdown");
114+
var candidate = _configuredRootPath
115+
?? Path.GetFullPath(Path.Combine(Environment.CurrentDirectory, ".markitdown"));
60116
Directory.CreateDirectory(candidate);
61117
return candidate;
62118
}
119+
120+
private static bool PathEquals(string a, string b) =>
121+
string.Equals(
122+
Path.GetFullPath(a),
123+
Path.GetFullPath(b),
124+
OperatingSystem.IsWindows()
125+
? StringComparison.OrdinalIgnoreCase
126+
: StringComparison.Ordinal);
63127
}

0 commit comments

Comments
 (0)