diff --git a/AGENTS.md b/AGENTS.md index 14d3775a0..7559c8def 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -46,6 +46,7 @@ If I tell you to remember something, you do the same, update - Multi-page tables must emit `` comments, continuation markers for each affected page, and populate `table.pageStart`, `table.pageEnd`, and `table.pageRange` metadata so downstream systems can align tables with their source pages. - PDF converters must honour `SegmentOptions.Pdf.TreatPagesAsImages`, rendering each page to PNG, running OCR/vision enrichment, and composing page segments with image placeholders plus recognized text whenever the option is enabled. - Persist conversion workspaces through `ManagedCode.Storage` by allocating a unique, sanitized folder per document, copy the source file, store every extracted artifact via `IStorage`, and emit the final Markdown into the same folder. +- Root path configurability: `MarkItDownPathResolver` must support a configurable root via `MarkItDownOptions.RootPath` (non-DI) or `MarkItDownServiceBuilder.UseRootPath()` (DI); the resolver uses a lock-guarded double-check (not `Lazy`) so `Configure()` and first access are atomic, and conflicting paths throw `InvalidOperationException` instead of being silently ignored. # Repository Guidelines diff --git a/src/MarkItDown/Core/MarkItDownClient.cs b/src/MarkItDown/Core/MarkItDownClient.cs index 3076c0e9b..8b0277e08 100644 --- a/src/MarkItDown/Core/MarkItDownClient.cs +++ b/src/MarkItDown/Core/MarkItDownClient.cs @@ -66,6 +66,13 @@ public MarkItDownClient(ILogger? logger = null, HttpClient? httpClient = null) public MarkItDownClient(MarkItDownOptions? options, ILogger? logger = null, HttpClient? httpClient = null) { _options = options ?? new MarkItDownOptions(); + + // Configure path resolver before anything materialises the root. + if (!string.IsNullOrWhiteSpace(_options.RootPath)) + { + MarkItDownPathResolver.Configure(_options.RootPath); + } + _logger = logger ?? _options.LoggerFactory?.CreateLogger(); _httpClient = httpClient; _converters = []; diff --git a/src/MarkItDown/Core/MarkItDownOptions.cs b/src/MarkItDown/Core/MarkItDownOptions.cs index bcb54f611..7fcbf2799 100644 --- a/src/MarkItDown/Core/MarkItDownOptions.cs +++ b/src/MarkItDown/Core/MarkItDownOptions.cs @@ -16,6 +16,13 @@ namespace MarkItDown; /// public sealed record MarkItDownOptions { + /// + /// Optional root directory for all MarkItDown workspaces and buffers. + /// Defaults to .markitdown under . + /// Set to a writable path in read-only environments (e.g. Azure Functions temp). + /// + public string? RootPath { get; set; } + /// /// Gets or sets a value indicating whether built-in converters should be registered. Defaults to . /// diff --git a/src/MarkItDown/DependencyInjection/MarkItDownServiceBuilder.cs b/src/MarkItDown/DependencyInjection/MarkItDownServiceBuilder.cs index 397501f96..86d9f9fd8 100644 --- a/src/MarkItDown/DependencyInjection/MarkItDownServiceBuilder.cs +++ b/src/MarkItDown/DependencyInjection/MarkItDownServiceBuilder.cs @@ -47,6 +47,17 @@ public MarkItDownServiceBuilder AddConverter(Func + /// Sets the root directory for MarkItDown disk workspaces and buffers. + /// Call this in Program.cs for read-only environments like Azure Functions. + /// + public MarkItDownServiceBuilder UseRootPath(string rootPath) + { + ArgumentException.ThrowIfNullOrWhiteSpace(rootPath); + Services.Configure(o => o.RootPath = rootPath); + return this; + } + /// /// Registers a conversion middleware component that will be included in the pipeline. /// diff --git a/src/MarkItDown/Utilities/MarkItDownPathResolver.cs b/src/MarkItDown/Utilities/MarkItDownPathResolver.cs index 26d8ecd55..20230cc36 100644 --- a/src/MarkItDown/Utilities/MarkItDownPathResolver.cs +++ b/src/MarkItDown/Utilities/MarkItDownPathResolver.cs @@ -8,19 +8,74 @@ namespace MarkItDown; /// internal static class MarkItDownPathResolver { - private static readonly Lazy root = new(CreateRootPath, isThreadSafe: true); + private static readonly object _gate = new(); + private static string? _configuredRootPath; + private static string? _resolvedRootPath; /// /// Gets the absolute root directory used for MarkItDown workspaces. + /// Thread-safe; the value is resolved on first access and cached. /// - public static string RootPath => root.Value; + public static string RootPath + { + get + { + if (_resolvedRootPath is not null) + { + return _resolvedRootPath; + } + + lock (_gate) + { + _resolvedRootPath ??= CreateRootPath(); + return _resolvedRootPath; + } + } + } + + /// + /// Override the default root directory. + /// Must be called before any code accesses (typically + /// by setting MarkItDownOptions.RootPath before constructing a client). + /// Throws if the root has already resolved to a different path. + /// + internal static void Configure(string rootPath) + { + ArgumentException.ThrowIfNullOrWhiteSpace(rootPath); + + var normalized = Path.GetFullPath(rootPath); + + lock (_gate) + { + // Already resolved -- only allow if it matches. + if (_resolvedRootPath is not null) + { + if (!PathEquals(_resolvedRootPath, normalized)) + { + throw new InvalidOperationException( + $"Root already resolved to '{_resolvedRootPath}'; cannot change to '{normalized}'."); + } + + return; + } + + // Not yet resolved -- only allow if no prior Configure set a different path. + if (_configuredRootPath is not null && !PathEquals(_configuredRootPath, normalized)) + { + throw new InvalidOperationException( + $"Root already configured as '{_configuredRootPath}'; cannot change to '{normalized}'."); + } + + _configuredRootPath = normalized; + } + } /// /// Ensure the root directory exists (also invoked by lazy initialization). /// public static void EnsureRootExists() { - _ = root.Value; + _ = RootPath; } /// @@ -56,8 +111,17 @@ public static string Ensure(params string[] segments) private static string CreateRootPath() { - var candidate = Path.Combine(Environment.CurrentDirectory, ".markitdown"); + var candidate = _configuredRootPath + ?? Path.GetFullPath(Path.Combine(Environment.CurrentDirectory, ".markitdown")); Directory.CreateDirectory(candidate); return candidate; } + + private static bool PathEquals(string a, string b) => + string.Equals( + Path.GetFullPath(a), + Path.GetFullPath(b), + OperatingSystem.IsWindows() + ? StringComparison.OrdinalIgnoreCase + : StringComparison.Ordinal); }