Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions AGENTS.md
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ If I tell you to remember something, you do the same, update
- Multi-page tables must emit `<!-- Table spans pages X-Y -->` comments, continuation markers for each affected page, and populate `table.pageStart`, `table.pageEnd`, and `table.pageRange` metadata so downstream systems can align tables with their source pages.
- PDF converters must honour `SegmentOptions.Pdf.TreatPagesAsImages`, rendering each page to PNG, running OCR/vision enrichment, and composing page segments with image placeholders plus recognized text whenever the option is enabled.
- Persist conversion workspaces through `ManagedCode.Storage` by allocating a unique, sanitized folder per document, copy the source file, store every extracted artifact via `IStorage`, and emit the final Markdown into the same folder.
- Root path configurability: `MarkItDownPathResolver` must support a configurable root via `MarkItDownOptions.RootPath` (non-DI) or `MarkItDownServiceBuilder.UseRootPath()` (DI); the resolver uses a lock-guarded double-check (not `Lazy<string>`) so `Configure()` and first access are atomic, and conflicting paths throw `InvalidOperationException` instead of being silently ignored.

# Repository Guidelines

Expand Down
7 changes: 7 additions & 0 deletions src/MarkItDown/Core/MarkItDownClient.cs
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,13 @@ public MarkItDownClient(ILogger? logger = null, HttpClient? httpClient = null)
public MarkItDownClient(MarkItDownOptions? options, ILogger? logger = null, HttpClient? httpClient = null)
{
_options = options ?? new MarkItDownOptions();

// Configure path resolver before anything materialises the root.
if (!string.IsNullOrWhiteSpace(_options.RootPath))
{
MarkItDownPathResolver.Configure(_options.RootPath);
}

_logger = logger ?? _options.LoggerFactory?.CreateLogger<MarkItDownClient>();
_httpClient = httpClient;
_converters = [];
Expand Down
7 changes: 7 additions & 0 deletions src/MarkItDown/Core/MarkItDownOptions.cs
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,13 @@ namespace MarkItDown;
/// </summary>
public sealed record MarkItDownOptions
{
/// <summary>
/// Optional root directory for all MarkItDown workspaces and buffers.
/// Defaults to <c>.markitdown</c> under <see cref="Environment.CurrentDirectory"/>.
/// Set to a writable path in read-only environments (e.g. Azure Functions temp).
/// </summary>
public string? RootPath { get; set; }

/// <summary>
/// Gets or sets a value indicating whether built-in converters should be registered. Defaults to <see langword="true"/>.
/// </summary>
Expand Down
11 changes: 11 additions & 0 deletions src/MarkItDown/DependencyInjection/MarkItDownServiceBuilder.cs
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,17 @@ public MarkItDownServiceBuilder AddConverter<TConverter>(Func<IServiceProvider,
return this;
}

/// <summary>
/// Sets the root directory for MarkItDown disk workspaces and buffers.
/// Call this in <c>Program.cs</c> for read-only environments like Azure Functions.
/// </summary>
public MarkItDownServiceBuilder UseRootPath(string rootPath)
{
ArgumentException.ThrowIfNullOrWhiteSpace(rootPath);
Services.Configure<MarkItDownOptions>(o => o.RootPath = rootPath);
return this;
}

/// <summary>
/// Registers a conversion middleware component that will be included in the pipeline.
/// </summary>
Expand Down
72 changes: 68 additions & 4 deletions src/MarkItDown/Utilities/MarkItDownPathResolver.cs
Original file line number Diff line number Diff line change
Expand Up @@ -8,19 +8,74 @@ namespace MarkItDown;
/// </summary>
internal static class MarkItDownPathResolver
{
private static readonly Lazy<string> root = new(CreateRootPath, isThreadSafe: true);
private static readonly object _gate = new();
private static string? _configuredRootPath;
private static string? _resolvedRootPath;

/// <summary>
/// Gets the absolute root directory used for MarkItDown workspaces.
/// Thread-safe; the value is resolved on first access and cached.
/// </summary>
public static string RootPath => root.Value;
public static string RootPath
{
get
{
if (_resolvedRootPath is not null)
{
return _resolvedRootPath;
}

lock (_gate)
{
_resolvedRootPath ??= CreateRootPath();
return _resolvedRootPath;
}
}
}

/// <summary>
/// Override the default root directory.
/// Must be called before any code accesses <see cref="RootPath"/> (typically
/// by setting <c>MarkItDownOptions.RootPath</c> before constructing a client).
/// Throws if the root has already resolved to a different path.
/// </summary>
internal static void Configure(string rootPath)
{
ArgumentException.ThrowIfNullOrWhiteSpace(rootPath);

var normalized = Path.GetFullPath(rootPath);

lock (_gate)
{
// Already resolved -- only allow if it matches.
if (_resolvedRootPath is not null)
{
if (!PathEquals(_resolvedRootPath, normalized))
{
throw new InvalidOperationException(
$"Root already resolved to '{_resolvedRootPath}'; cannot change to '{normalized}'.");
}

return;
}

// Not yet resolved -- only allow if no prior Configure set a different path.
if (_configuredRootPath is not null && !PathEquals(_configuredRootPath, normalized))
{
throw new InvalidOperationException(
$"Root already configured as '{_configuredRootPath}'; cannot change to '{normalized}'.");
}

_configuredRootPath = normalized;
}
}

/// <summary>
/// Ensure the root directory exists (also invoked by lazy initialization).
/// </summary>
public static void EnsureRootExists()
{
_ = root.Value;
_ = RootPath;
}

/// <summary>
Expand Down Expand Up @@ -56,8 +111,17 @@ public static string Ensure(params string[] segments)

private static string CreateRootPath()
{
var candidate = Path.Combine(Environment.CurrentDirectory, ".markitdown");
var candidate = _configuredRootPath
?? Path.GetFullPath(Path.Combine(Environment.CurrentDirectory, ".markitdown"));
Directory.CreateDirectory(candidate);
return candidate;
}

private static bool PathEquals(string a, string b) =>
string.Equals(
Path.GetFullPath(a),
Path.GetFullPath(b),
OperatingSystem.IsWindows()
? StringComparison.OrdinalIgnoreCase
: StringComparison.Ordinal);
}