diff --git a/.release-please-manifest.json b/.release-please-manifest.json index fea34540..141e7cde 100644 --- a/.release-please-manifest.json +++ b/.release-please-manifest.json @@ -1,3 +1,3 @@ { - ".": "1.0.0" + ".": "0.47.0" } \ No newline at end of file diff --git a/.stats.yml b/.stats.yml index 9f60014e..8e94f914 100644 --- a/.stats.yml +++ b/.stats.yml @@ -1,4 +1,4 @@ -configured_endpoints: 51 -openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/mixedbread%2Fmixedbread-68449c5f406b2b3bc2b94ef7f94c47b3724aa0a69e1033bef1a84477f79420f5.yml -openapi_spec_hash: 2b6de88d9a14a977a66f37c066d0b47a -config_hash: fb2cfcdc5ef83ff03407a8b66c26a59b +configured_endpoints: 56 +openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/mixedbread%2Fmixedbread-83afe8d98b70a903eb9aa13b512b18aa3df9e5ec4c784bded17ac78d0a163c3c.yml +openapi_spec_hash: 90b33b757e12f21c94705b6243054b5f +config_hash: c32ffa6858a02d7f23f6f3dda0b461ed diff --git a/CHANGELOG.md b/CHANGELOG.md index 844a5b1c..61f02e05 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,44 @@ # Changelog +## 0.47.0 (2026-03-05) + +Full Changelog: [v1.0.0...v0.47.0](https://github.com/mixedbread-ai/mixedbread-python/compare/v1.0.0...v0.47.0) + +### Features + +* **api:** add uploads (multipart) subresource to files ([f510cc9](https://github.com/mixedbread-ai/mixedbread-python/commit/f510cc9d7ae96b0874ee2e7ed9b6f4bf37fe8a07)) +* **api:** api update ([e95f5f1](https://github.com/mixedbread-ai/mixedbread-python/commit/e95f5f1ca1ad4215a5d2bc2fa3e5e6838bd17c3c)) +* **api:** api update ([eb27c60](https://github.com/mixedbread-ai/mixedbread-python/commit/eb27c606437eea717438344e343c1adfa5ebff16)) +* **api:** api update ([c4be5ae](https://github.com/mixedbread-ai/mixedbread-python/commit/c4be5ae024212e1fd1feaabdc1d2b07c14228eb7)) +* **api:** api update ([153c4ba](https://github.com/mixedbread-ai/mixedbread-python/commit/153c4ba201048975a882983f882491352a1d7bd3)) +* **api:** api update ([216baf6](https://github.com/mixedbread-ai/mixedbread-python/commit/216baf6093ec37db5b57d0fb675a7c7212bea750)) +* **api:** api update ([8fef561](https://github.com/mixedbread-ai/mixedbread-python/commit/8fef5613483e10652fd1e35ba3e2097a26a5cfcd)) +* **api:** api update ([10a541d](https://github.com/mixedbread-ai/mixedbread-python/commit/10a541d3fa4015fdb5e2b6720fdf1f97322b5439)) +* **api:** files uploads (multipart) subresource ([93a98e6](https://github.com/mixedbread-ai/mixedbread-python/commit/93a98e6c38cce16f1de10df080e6a81e9be25f93)) +* **api:** manual updates ([e38a505](https://github.com/mixedbread-ai/mixedbread-python/commit/e38a505829b962ad8115859cfb3eff149e3c6cb3)) +* **api:** manual updates ([860868e](https://github.com/mixedbread-ai/mixedbread-python/commit/860868e1b9d9c56570fe05e9525bd17bcf427515)) +* **api:** manual updates ([33abdc9](https://github.com/mixedbread-ai/mixedbread-python/commit/33abdc949c57c09804841b7d53c0fec22a02133a)) +* use multipart uploads API for large files automatically with optional config ([#18](https://github.com/mixedbread-ai/mixedbread-python/issues/18)) ([ff0f83f](https://github.com/mixedbread-ai/mixedbread-python/commit/ff0f83fda625c0c67d478bd4cccd6cd179fea44a)) + + +### Bug Fixes + +* cancel orphaned upload tasks and fix file size calc for io bytes ([#21](https://github.com/mixedbread-ai/mixedbread-python/issues/21)) ([46aea64](https://github.com/mixedbread-ai/mixedbread-python/commit/46aea647e9aa19ee8095dd1bafe8c54d9f1d7bb9)) +* count uploaded bytes correctly even if parts finish out of order ([#20](https://github.com/mixedbread-ai/mixedbread-python/issues/20)) ([eb8af86](https://github.com/mixedbread-ai/mixedbread-python/commit/eb8af86bcc757e50199d5fc963cb203897b704b6)) +* do not block event loop for read_part on async coroutine in multipart upload ([#19](https://github.com/mixedbread-ai/mixedbread-python/issues/19)) ([537f825](https://github.com/mixedbread-ai/mixedbread-python/commit/537f8254bfa0e11fe3a32c14d5705fdac5b9bc04)) +* do not drop request uptions on multipart upload path ([#23](https://github.com/mixedbread-ai/mixedbread-python/issues/23)) ([2859304](https://github.com/mixedbread-ai/mixedbread-python/commit/2859304036ccc25bcee28d6f5a1c564c6a70ca9e)) +* explicit multipart upload config in store files APIs ([#22](https://github.com/mixedbread-ai/mixedbread-python/issues/22)) ([0ebb7b0](https://github.com/mixedbread-ai/mixedbread-python/commit/0ebb7b0d4cbdd0fd41842e5ac2caa8a4b0d3d9aa)) + + +### Chores + +* format all `api.md` files ([d167e2d](https://github.com/mixedbread-ai/mixedbread-python/commit/d167e2d222b34334f86313213bb01d7c8a003f18)) +* **internal:** add request options to SSE classes ([f241bf6](https://github.com/mixedbread-ai/mixedbread-python/commit/f241bf6d47c31faf166869ef96219d7c7d4c8e10)) +* **internal:** fix lint error on Python 3.14 ([72f3d05](https://github.com/mixedbread-ai/mixedbread-python/commit/72f3d05d8e2b6bea43db9001270d836f21254a9e)) +* **internal:** make `test_proxy_environment_variables` more resilient ([d0ec2d9](https://github.com/mixedbread-ai/mixedbread-python/commit/d0ec2d97d74bbf31468fd8ef537f8d142dc92fc7)) +* **internal:** make `test_proxy_environment_variables` more resilient to env ([c287c24](https://github.com/mixedbread-ai/mixedbread-python/commit/c287c24551007387ccd78ce9faea8d4bae02719e)) +* update mock server docs ([b3523db](https://github.com/mixedbread-ai/mixedbread-python/commit/b3523db2445a8a389ca9828deea5d991c095bd0f)) + ## 1.0.0 (2026-02-12) Full Changelog: [v0.46.0...v1.0.0](https://github.com/mixedbread-ai/mixedbread-python/compare/v0.46.0...v1.0.0) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 0b147f05..ed6fa9a1 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -88,8 +88,7 @@ $ pip install ./path-to-wheel-file.whl Most tests require you to [set up a mock server](https://github.com/stoplightio/prism) against the OpenAPI spec to run the tests. ```sh -# you will need npm installed -$ npx prism mock path/to/your/openapi.yml +$ ./scripts/mock ``` ```sh diff --git a/api.md b/api.md index f42e0be4..53e03831 100644 --- a/api.md +++ b/api.md @@ -115,12 +115,35 @@ from mixedbread.types import FileObject, PaginationWithTotal, FileDeleteResponse Methods: -- client.files.create(\*\*params) -> FileObject -- client.files.retrieve(file_id) -> FileObject -- client.files.update(file_id, \*\*params) -> FileObject -- client.files.list(\*\*params) -> SyncCursor[FileObject] -- client.files.delete(file_id) -> FileDeleteResponse -- client.files.content(file_id) -> BinaryAPIResponse +- client.files.create(\*\*params) -> FileObject +- client.files.retrieve(file_id) -> FileObject +- client.files.update(file_id, \*\*params) -> FileObject +- client.files.list(\*\*params) -> SyncCursor[FileObject] +- client.files.delete(file_id) -> FileDeleteResponse +- client.files.content(file_id) -> BinaryAPIResponse + +## Uploads + +Types: + +```python +from mixedbread.types.files import ( + MultipartUploadPart, + MultipartUploadPartURL, + UploadCreateResponse, + UploadRetrieveResponse, + UploadListResponse, + UploadAbortResponse, +) +``` + +Methods: + +- client.files.uploads.create(\*\*params) -> UploadCreateResponse +- client.files.uploads.retrieve(upload_id) -> UploadRetrieveResponse +- client.files.uploads.list() -> UploadListResponse +- client.files.uploads.abort(upload_id) -> UploadAbortResponse +- client.files.uploads.complete(upload_id, \*\*params) -> FileObject # Extractions diff --git a/pyproject.toml b/pyproject.toml index babc8df7..42f91575 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "mixedbread" -version = "1.0.0" +version = "0.47.0" description = "The official Python library for the Mixedbread API" dynamic = ["readme"] license = "Apache-2.0" @@ -69,7 +69,7 @@ format = { chain = [ # run formatting again to fix any inconsistencies when imports are stripped "format:ruff", ]} -"format:docs" = "python scripts/utils/ruffen-docs.py README.md api.md" +"format:docs" = "bash -c 'python scripts/utils/ruffen-docs.py README.md $(find . -type f -name api.md)'" "format:ruff" = "ruff format" "lint" = { chain = [ diff --git a/src/mixedbread/__init__.py b/src/mixedbread/__init__.py index 82ae8048..165dae05 100644 --- a/src/mixedbread/__init__.py +++ b/src/mixedbread/__init__.py @@ -3,6 +3,7 @@ import typing as _t from . import types +from .lib import PartUploadEvent as PartUploadEvent, MultipartUploadOptions as MultipartUploadOptions from ._types import NOT_GIVEN, Omit, NoneType, NotGiven, Transport, ProxiesTypes, omit, not_given from ._utils import file_from_path from ._client import ( @@ -83,6 +84,8 @@ "DefaultHttpxClient", "DefaultAsyncHttpxClient", "DefaultAioHttpClient", + "MultipartUploadOptions", + "PartUploadEvent", ] if not _t.TYPE_CHECKING: diff --git a/src/mixedbread/_client.py b/src/mixedbread/_client.py index 3982ee4e..7391953a 100644 --- a/src/mixedbread/_client.py +++ b/src/mixedbread/_client.py @@ -55,9 +55,9 @@ if TYPE_CHECKING: from .resources import chat, files, stores, parsing, api_keys, embeddings, extractions, data_sources from .resources.chat import ChatResource, AsyncChatResource - from .resources.files import FilesResource, AsyncFilesResource from .resources.api_keys import APIKeysResource, AsyncAPIKeysResource from .resources.embeddings import EmbeddingsResource, AsyncEmbeddingsResource + from .resources.files.files import FilesResource, AsyncFilesResource from .resources.stores.stores import StoresResource, AsyncStoresResource from .resources.parsing.parsing import ParsingResource, AsyncParsingResource from .resources.extractions.extractions import ExtractionsResource, AsyncExtractionsResource diff --git a/src/mixedbread/_response.py b/src/mixedbread/_response.py index 543946b9..50547a3b 100644 --- a/src/mixedbread/_response.py +++ b/src/mixedbread/_response.py @@ -152,6 +152,7 @@ def _parse(self, *, to: type[_T] | None = None) -> R | _T: ), response=self.http_response, client=cast(Any, self._client), + options=self._options, ), ) @@ -162,6 +163,7 @@ def _parse(self, *, to: type[_T] | None = None) -> R | _T: cast_to=extract_stream_chunk_type(self._stream_cls), response=self.http_response, client=cast(Any, self._client), + options=self._options, ), ) @@ -175,6 +177,7 @@ def _parse(self, *, to: type[_T] | None = None) -> R | _T: cast_to=cast_to, response=self.http_response, client=cast(Any, self._client), + options=self._options, ), ) diff --git a/src/mixedbread/_streaming.py b/src/mixedbread/_streaming.py index e7f6340f..235a94a8 100644 --- a/src/mixedbread/_streaming.py +++ b/src/mixedbread/_streaming.py @@ -4,7 +4,7 @@ import json import inspect from types import TracebackType -from typing import TYPE_CHECKING, Any, Generic, TypeVar, Iterator, AsyncIterator, cast +from typing import TYPE_CHECKING, Any, Generic, TypeVar, Iterator, Optional, AsyncIterator, cast from typing_extensions import Self, Protocol, TypeGuard, override, get_origin, runtime_checkable import httpx @@ -13,6 +13,7 @@ if TYPE_CHECKING: from ._client import Mixedbread, AsyncMixedbread + from ._models import FinalRequestOptions _T = TypeVar("_T") @@ -22,7 +23,7 @@ class Stream(Generic[_T]): """Provides the core interface to iterate over a synchronous stream response.""" response: httpx.Response - + _options: Optional[FinalRequestOptions] = None _decoder: SSEBytesDecoder def __init__( @@ -31,10 +32,12 @@ def __init__( cast_to: type[_T], response: httpx.Response, client: Mixedbread, + options: Optional[FinalRequestOptions] = None, ) -> None: self.response = response self._cast_to = cast_to self._client = client + self._options = options self._decoder = client._make_sse_decoder() self._iterator = self.__stream__() @@ -85,7 +88,7 @@ class AsyncStream(Generic[_T]): """Provides the core interface to iterate over an asynchronous stream response.""" response: httpx.Response - + _options: Optional[FinalRequestOptions] = None _decoder: SSEDecoder | SSEBytesDecoder def __init__( @@ -94,10 +97,12 @@ def __init__( cast_to: type[_T], response: httpx.Response, client: AsyncMixedbread, + options: Optional[FinalRequestOptions] = None, ) -> None: self.response = response self._cast_to = cast_to self._client = client + self._options = options self._decoder = client._make_sse_decoder() self._iterator = self.__stream__() diff --git a/src/mixedbread/_utils/_compat.py b/src/mixedbread/_utils/_compat.py index dd703233..2c70b299 100644 --- a/src/mixedbread/_utils/_compat.py +++ b/src/mixedbread/_utils/_compat.py @@ -26,7 +26,7 @@ def is_union(tp: Optional[Type[Any]]) -> bool: else: import types - return tp is Union or tp is types.UnionType + return tp is Union or tp is types.UnionType # type: ignore[comparison-overlap] def is_typeddict(tp: Type[Any]) -> bool: diff --git a/src/mixedbread/_version.py b/src/mixedbread/_version.py index a6b0880b..4cf2c650 100644 --- a/src/mixedbread/_version.py +++ b/src/mixedbread/_version.py @@ -1,4 +1,4 @@ # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. __title__ = "mixedbread" -__version__ = "1.0.0" # x-release-please-version +__version__ = "0.47.0" # x-release-please-version diff --git a/src/mixedbread/lib/__init__.py b/src/mixedbread/lib/__init__.py new file mode 100644 index 00000000..99196c57 --- /dev/null +++ b/src/mixedbread/lib/__init__.py @@ -0,0 +1 @@ +from .multipart_upload import PartUploadEvent as PartUploadEvent, MultipartUploadOptions as MultipartUploadOptions diff --git a/src/mixedbread/lib/multipart_upload.py b/src/mixedbread/lib/multipart_upload.py new file mode 100644 index 00000000..6e51a4bf --- /dev/null +++ b/src/mixedbread/lib/multipart_upload.py @@ -0,0 +1,361 @@ +from __future__ import annotations + +import os +import math +import asyncio +import mimetypes +import threading +from typing import TYPE_CHECKING, Any, List, Union, Callable, Optional +from pathlib import Path +from dataclasses import dataclass +from concurrent.futures import ThreadPoolExecutor + +import httpx + +if TYPE_CHECKING: + from .._types import FileTypes, FileContent + from ..resources.files.uploads import UploadsResource, AsyncUploadsResource + +from .._types import Body, Query, Headers, NotGiven, not_given + +from ..types.file_object import FileObject +from ..types.files.multipart_upload_part_param import MultipartUploadPartParam + +DEFAULT_THRESHOLD = 100 * 1024 * 1024 # 100 MB +DEFAULT_PART_SIZE = 100 * 1024 * 1024 # 100 MB +DEFAULT_CONCURRENCY = 5 +UPLOAD_TIMEOUT = 300 # 5 minutes + + +@dataclass +class PartUploadEvent: + """Event emitted after each part is uploaded.""" + + part_number: int + total_parts: int + part_size: int + uploaded_bytes: int + total_bytes: int + + +@dataclass +class MultipartUploadOptions: + """Options for controlling multipart upload behavior.""" + + threshold: int = DEFAULT_THRESHOLD + part_size: int = DEFAULT_PART_SIZE + concurrency: int = DEFAULT_CONCURRENCY + on_part_upload: Optional[Callable[[PartUploadEvent], None]] = None + + +@dataclass +class _ResolvedFile: + """Internal resolved file representation.""" + + data: Union[bytes, Path] + file_size: int + filename: str + mime_type: str + + +def _get_file_size(file: FileTypes) -> int: + """Get file size without reading the entire file into memory. + + Raises TypeError if the size cannot be determined. + """ + # Handle tuple forms: (filename, content, ...) + if isinstance(file, tuple): + file_content = file[1] + else: + file_content = file + + if isinstance(file_content, bytes): + return len(file_content) + + if isinstance(file_content, os.PathLike): + return os.stat(file_content).st_size + + # IO[bytes] - measure remaining bytes from current position + if hasattr(file_content, "seek") and hasattr(file_content, "tell"): + current = file_content.tell() + file_content.seek(0, 2) + size = file_content.tell() - current + file_content.seek(current) + return size + + raise TypeError(f"Cannot determine file size for {type(file_content)}") + + +def _resolve_file_input(file: FileTypes) -> _ResolvedFile: + """Resolve a FileTypes input into a normalized representation.""" + filename: Optional[str] = None + mime_type: Optional[str] = None + file_content: FileContent + + if isinstance(file, tuple): + filename = file[0] + file_content = file[1] + if len(file) >= 3: + mime_type = file[2] # type: ignore[misc] + else: + file_content = file + + # Resolve file content to bytes or Path + data: Union[bytes, Path] + if isinstance(file_content, bytes): + data = file_content + file_size = len(file_content) + if filename is None: + filename = "upload" + elif isinstance(file_content, os.PathLike): + path = Path(file_content) + data = path + file_size = os.stat(path).st_size + if filename is None: + filename = path.name + elif hasattr(file_content, "read"): + # IO[bytes] - read into memory + data = file_content.read() + file_size = len(data) + if filename is None: + name = getattr(file_content, "name", None) + if name: + filename = os.path.basename(name) + else: + filename = "upload" + else: + raise TypeError(f"Unsupported file type: {type(file_content)}") + + # Resolve mime type + if not mime_type and filename: + guessed, _ = mimetypes.guess_type(filename) + mime_type = guessed or "application/octet-stream" + elif not mime_type: + mime_type = "application/octet-stream" + + return _ResolvedFile( + data=data, + file_size=file_size, + filename=filename or "upload", + mime_type=mime_type, + ) + + +def _read_part(resolved: _ResolvedFile, part_number: int, part_size: int) -> bytes: + """Read a specific part from the resolved file data. + + For bytes data, slices directly. For PathLike, opens its own file handle + (thread-safe for concurrent uploads). + """ + offset = (part_number - 1) * part_size # parts are 1-based + + if isinstance(resolved.data, bytes): + return resolved.data[offset : offset + part_size] + + # PathLike - each caller gets its own file handle + with open(resolved.data, "rb") as f: + f.seek(offset) + return f.read(part_size) + + +def _upload_single_part( + url: str, + data: bytes, + http_client: httpx.Client, +) -> str: + """Upload a single part to its presigned URL. Returns the ETag.""" + response = http_client.put(url, content=data) + response.raise_for_status() + return response.headers.get("etag", "") + + +async def _async_upload_single_part( + url: str, + data: bytes, + http_client: httpx.AsyncClient, +) -> str: + """Upload a single part to its presigned URL asynchronously. Returns the ETag.""" + response = await http_client.put(url, content=data) + response.raise_for_status() + return response.headers.get("etag", "") + + +def multipart_create_sync( + uploads: UploadsResource, + file: FileTypes, + options: MultipartUploadOptions, + *, + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = not_given, +) -> FileObject: + """Perform a multipart upload synchronously.""" + resolved = _resolve_file_input(file) + part_count = max(1, math.ceil(resolved.file_size / options.part_size)) + + # Step 1: Initiate the multipart upload + upload = uploads.create( + filename=resolved.filename, + file_size=resolved.file_size, + mime_type=resolved.mime_type, + part_count=part_count, + extra_headers=extra_headers, + extra_query=extra_query, + extra_body=extra_body, + timeout=timeout, + ) + upload_id = upload.id + + try: + # Step 2: Upload parts concurrently + completed_parts: List[MultipartUploadPartParam] = [] + + uploaded_bytes_total = 0 + upload_lock = threading.Lock() + + with httpx.Client(timeout=httpx.Timeout(UPLOAD_TIMEOUT)) as http_client: + + def _do_upload(part_url: Any) -> MultipartUploadPartParam: + nonlocal uploaded_bytes_total + part_data = _read_part(resolved, part_url.part_number, options.part_size) + etag = _upload_single_part(part_url.url, part_data, http_client) + + if options.on_part_upload: + with upload_lock: + uploaded_bytes_total += len(part_data) + uploaded_bytes = uploaded_bytes_total + options.on_part_upload( + PartUploadEvent( + part_number=part_url.part_number, + total_parts=part_count, + part_size=len(part_data), + uploaded_bytes=uploaded_bytes, + total_bytes=resolved.file_size, + ) + ) + + return MultipartUploadPartParam(part_number=part_url.part_number, etag=etag) + + with ThreadPoolExecutor(max_workers=options.concurrency) as executor: + futures = [executor.submit(_do_upload, pu) for pu in upload.part_urls] + for future in futures: + completed_parts.append(future.result()) + + # Sort by part number + completed_parts.sort(key=lambda p: p["part_number"]) + + # Step 3: Complete the upload + return uploads.complete( + upload_id=upload_id, + parts=completed_parts, + extra_headers=extra_headers, + extra_query=extra_query, + extra_body=extra_body, + timeout=timeout, + ) + + except BaseException: + # Abort on any failure (including KeyboardInterrupt, CancelledError) + try: + uploads.abort( + upload_id=upload_id, + extra_headers=extra_headers, + extra_query=extra_query, + extra_body=extra_body, + timeout=timeout, + ) + except Exception: + pass # Best effort abort + raise + + +async def multipart_create_async( + uploads: AsyncUploadsResource, + file: FileTypes, + options: MultipartUploadOptions, + *, + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = not_given, +) -> FileObject: + """Perform a multipart upload asynchronously.""" + resolved = await asyncio.to_thread(_resolve_file_input, file) + part_count = max(1, math.ceil(resolved.file_size / options.part_size)) + + # Step 1: Initiate the multipart upload + upload = await uploads.create( + filename=resolved.filename, + file_size=resolved.file_size, + mime_type=resolved.mime_type, + part_count=part_count, + extra_headers=extra_headers, + extra_query=extra_query, + extra_body=extra_body, + timeout=timeout, + ) + upload_id = upload.id + + try: + # Step 2: Upload parts concurrently + semaphore = asyncio.Semaphore(options.concurrency) + uploaded_bytes_total = 0 + + async with httpx.AsyncClient(timeout=httpx.Timeout(UPLOAD_TIMEOUT)) as http_client: + + async def _do_upload(part_url: Any) -> MultipartUploadPartParam: + nonlocal uploaded_bytes_total + async with semaphore: + part_data = await asyncio.to_thread(_read_part, resolved, part_url.part_number, options.part_size) + etag = await _async_upload_single_part(part_url.url, part_data, http_client) + + if options.on_part_upload: + uploaded_bytes_total += len(part_data) + options.on_part_upload( + PartUploadEvent( + part_number=part_url.part_number, + total_parts=part_count, + part_size=len(part_data), + uploaded_bytes=uploaded_bytes_total, + total_bytes=resolved.file_size, + ) + ) + + return MultipartUploadPartParam(part_number=part_url.part_number, etag=etag) + + tasks = [asyncio.ensure_future(_do_upload(pu)) for pu in upload.part_urls] + try: + completed_parts: List[MultipartUploadPartParam] = list(await asyncio.gather(*tasks)) + except BaseException: + for task in tasks: + task.cancel() + await asyncio.gather(*tasks, return_exceptions=True) + raise + + # Sort by part number + completed_parts.sort(key=lambda p: p["part_number"]) + + # Step 3: Complete the upload + return await uploads.complete( + upload_id=upload_id, + parts=completed_parts, + extra_headers=extra_headers, + extra_query=extra_query, + extra_body=extra_body, + timeout=timeout, + ) + + except BaseException: + # Abort on any failure (including KeyboardInterrupt, CancelledError) + try: + await uploads.abort( + upload_id=upload_id, + extra_headers=extra_headers, + extra_query=extra_query, + extra_body=extra_body, + timeout=timeout, + ) + except Exception: + pass # Best effort abort + raise diff --git a/src/mixedbread/resources/files/__init__.py b/src/mixedbread/resources/files/__init__.py new file mode 100644 index 00000000..13fe2123 --- /dev/null +++ b/src/mixedbread/resources/files/__init__.py @@ -0,0 +1,33 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from .files import ( + FilesResource, + AsyncFilesResource, + FilesResourceWithRawResponse, + AsyncFilesResourceWithRawResponse, + FilesResourceWithStreamingResponse, + AsyncFilesResourceWithStreamingResponse, +) +from .uploads import ( + UploadsResource, + AsyncUploadsResource, + UploadsResourceWithRawResponse, + AsyncUploadsResourceWithRawResponse, + UploadsResourceWithStreamingResponse, + AsyncUploadsResourceWithStreamingResponse, +) + +__all__ = [ + "UploadsResource", + "AsyncUploadsResource", + "UploadsResourceWithRawResponse", + "AsyncUploadsResourceWithRawResponse", + "UploadsResourceWithStreamingResponse", + "AsyncUploadsResourceWithStreamingResponse", + "FilesResource", + "AsyncFilesResource", + "FilesResourceWithRawResponse", + "AsyncFilesResourceWithRawResponse", + "FilesResourceWithStreamingResponse", + "AsyncFilesResourceWithStreamingResponse", +] diff --git a/src/mixedbread/resources/files.py b/src/mixedbread/resources/files/files.py similarity index 85% rename from src/mixedbread/resources/files.py rename to src/mixedbread/resources/files/files.py index d1e74a27..8536ba20 100644 --- a/src/mixedbread/resources/files.py +++ b/src/mixedbread/resources/files/files.py @@ -6,12 +6,20 @@ import httpx -from ..types import file_list_params, file_create_params, file_update_params -from .._types import Body, Omit, Query, Headers, NotGiven, FileTypes, omit, not_given -from .._utils import extract_files, maybe_transform, deepcopy_minimal, async_maybe_transform -from .._compat import cached_property -from .._resource import SyncAPIResource, AsyncAPIResource -from .._response import ( +from ...types import file_list_params, file_create_params, file_update_params +from .uploads import ( + UploadsResource, + AsyncUploadsResource, + UploadsResourceWithRawResponse, + AsyncUploadsResourceWithRawResponse, + UploadsResourceWithStreamingResponse, + AsyncUploadsResourceWithStreamingResponse, +) +from ..._types import Body, Omit, Query, Headers, NotGiven, FileTypes, omit, not_given +from ..._utils import extract_files, maybe_transform, deepcopy_minimal, async_maybe_transform +from ..._compat import cached_property +from ..._resource import SyncAPIResource, AsyncAPIResource +from ..._response import ( BinaryAPIResponse, AsyncBinaryAPIResponse, StreamedBinaryAPIResponse, @@ -25,15 +33,25 @@ async_to_custom_raw_response_wrapper, async_to_custom_streamed_response_wrapper, ) -from ..pagination import SyncCursor, AsyncCursor -from .._base_client import AsyncPaginator, make_request_options -from ..types.file_object import FileObject -from ..types.file_delete_response import FileDeleteResponse +from ...pagination import SyncCursor, AsyncCursor +from ..._base_client import AsyncPaginator, make_request_options +from ...types.file_object import FileObject +from ...lib.multipart_upload import ( + MultipartUploadOptions, + _get_file_size, + multipart_create_sync, + multipart_create_async, +) +from ...types.file_delete_response import FileDeleteResponse __all__ = ["FilesResource", "AsyncFilesResource"] class FilesResource(SyncAPIResource): + @cached_property + def uploads(self) -> UploadsResource: + return UploadsResource(self._client) + @cached_property def with_raw_response(self) -> FilesResourceWithRawResponse: """ @@ -57,6 +75,7 @@ def create( self, *, file: FileTypes, + multipart_upload: bool | MultipartUploadOptions | None = None, # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. # The extra values given here take precedence over values defined on the client or passed to this method. extra_headers: Headers | None = None, @@ -67,6 +86,8 @@ def create( """ Upload a new file. + Automatically uses multipart uploads for large files (>100MB by default). + Args: file: The file to upload. Returns: FileResponse: The response containing the details of the uploaded file. @@ -74,6 +95,12 @@ def create( Args: file: The file to upload + multipart_upload: Controls multipart upload behavior. + None (default) auto-detects based on file size. + True forces multipart with default options. + False disables multipart. + MultipartUploadOptions for custom settings. + extra_headers: Send extra headers extra_query: Add additional query parameters to the request @@ -82,6 +109,29 @@ def create( timeout: Override the client-level default timeout for this request, in seconds """ + if multipart_upload is not False: + if isinstance(multipart_upload, MultipartUploadOptions): + _opts = multipart_upload + _use_multipart = True + elif multipart_upload is True: + _opts = MultipartUploadOptions() + _use_multipart = True + else: # None — auto-detect + _opts = MultipartUploadOptions() + try: + _use_multipart = _get_file_size(file) >= _opts.threshold + except (TypeError, OSError): + _use_multipart = False + + if _use_multipart: + return multipart_create_sync( + self.uploads, file, _opts, + extra_headers=extra_headers, + extra_query=extra_query, + extra_body=extra_body, + timeout=timeout, + ) + body = deepcopy_minimal({"file": file}) files = extract_files(cast(Mapping[str, object], body), paths=[["file"]]) # It should be noted that the actual Content-Type header that will be @@ -333,6 +383,10 @@ def content( class AsyncFilesResource(AsyncAPIResource): + @cached_property + def uploads(self) -> AsyncUploadsResource: + return AsyncUploadsResource(self._client) + @cached_property def with_raw_response(self) -> AsyncFilesResourceWithRawResponse: """ @@ -356,6 +410,7 @@ async def create( self, *, file: FileTypes, + multipart_upload: bool | MultipartUploadOptions | None = None, # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. # The extra values given here take precedence over values defined on the client or passed to this method. extra_headers: Headers | None = None, @@ -366,6 +421,8 @@ async def create( """ Upload a new file. + Automatically uses multipart uploads for large files (>100MB by default). + Args: file: The file to upload. Returns: FileResponse: The response containing the details of the uploaded file. @@ -373,6 +430,12 @@ async def create( Args: file: The file to upload + multipart_upload: Controls multipart upload behavior. + None (default) auto-detects based on file size. + True forces multipart with default options. + False disables multipart. + MultipartUploadOptions for custom settings. + extra_headers: Send extra headers extra_query: Add additional query parameters to the request @@ -381,6 +444,29 @@ async def create( timeout: Override the client-level default timeout for this request, in seconds """ + if multipart_upload is not False: + if isinstance(multipart_upload, MultipartUploadOptions): + _opts = multipart_upload + _use_multipart = True + elif multipart_upload is True: + _opts = MultipartUploadOptions() + _use_multipart = True + else: # None — auto-detect + _opts = MultipartUploadOptions() + try: + _use_multipart = _get_file_size(file) >= _opts.threshold + except (TypeError, OSError): + _use_multipart = False + + if _use_multipart: + return await multipart_create_async( + self.uploads, file, _opts, + extra_headers=extra_headers, + extra_query=extra_query, + extra_body=extra_body, + timeout=timeout, + ) + body = deepcopy_minimal({"file": file}) files = extract_files(cast(Mapping[str, object], body), paths=[["file"]]) # It should be noted that the actual Content-Type header that will be @@ -655,6 +741,10 @@ def __init__(self, files: FilesResource) -> None: BinaryAPIResponse, ) + @cached_property + def uploads(self) -> UploadsResourceWithRawResponse: + return UploadsResourceWithRawResponse(self._files.uploads) + class AsyncFilesResourceWithRawResponse: def __init__(self, files: AsyncFilesResource) -> None: @@ -680,6 +770,10 @@ def __init__(self, files: AsyncFilesResource) -> None: AsyncBinaryAPIResponse, ) + @cached_property + def uploads(self) -> AsyncUploadsResourceWithRawResponse: + return AsyncUploadsResourceWithRawResponse(self._files.uploads) + class FilesResourceWithStreamingResponse: def __init__(self, files: FilesResource) -> None: @@ -705,6 +799,10 @@ def __init__(self, files: FilesResource) -> None: StreamedBinaryAPIResponse, ) + @cached_property + def uploads(self) -> UploadsResourceWithStreamingResponse: + return UploadsResourceWithStreamingResponse(self._files.uploads) + class AsyncFilesResourceWithStreamingResponse: def __init__(self, files: AsyncFilesResource) -> None: @@ -729,3 +827,7 @@ def __init__(self, files: AsyncFilesResource) -> None: files.content, AsyncStreamedBinaryAPIResponse, ) + + @cached_property + def uploads(self) -> AsyncUploadsResourceWithStreamingResponse: + return AsyncUploadsResourceWithStreamingResponse(self._files.uploads) diff --git a/src/mixedbread/resources/files/uploads.py b/src/mixedbread/resources/files/uploads.py new file mode 100644 index 00000000..dc410621 --- /dev/null +++ b/src/mixedbread/resources/files/uploads.py @@ -0,0 +1,520 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from __future__ import annotations + +from typing import Iterable + +import httpx + +from ..._types import Body, Omit, Query, Headers, NotGiven, omit, not_given +from ..._utils import maybe_transform, async_maybe_transform +from ..._compat import cached_property +from ..._resource import SyncAPIResource, AsyncAPIResource +from ..._response import ( + to_raw_response_wrapper, + to_streamed_response_wrapper, + async_to_raw_response_wrapper, + async_to_streamed_response_wrapper, +) +from ...types.files import upload_create_params, upload_complete_params +from ..._base_client import make_request_options +from ...types.file_object import FileObject +from ...types.files.upload_list_response import UploadListResponse +from ...types.files.upload_abort_response import UploadAbortResponse +from ...types.files.upload_create_response import UploadCreateResponse +from ...types.files.upload_retrieve_response import UploadRetrieveResponse +from ...types.files.multipart_upload_part_param import MultipartUploadPartParam + +__all__ = ["UploadsResource", "AsyncUploadsResource"] + + +class UploadsResource(SyncAPIResource): + @cached_property + def with_raw_response(self) -> UploadsResourceWithRawResponse: + """ + This property can be used as a prefix for any HTTP method call to return + the raw response object instead of the parsed content. + + For more information, see https://www.github.com/mixedbread-ai/mixedbread-python#accessing-raw-response-data-eg-headers + """ + return UploadsResourceWithRawResponse(self) + + @cached_property + def with_streaming_response(self) -> UploadsResourceWithStreamingResponse: + """ + An alternative to `.with_raw_response` that doesn't eagerly read the response body. + + For more information, see https://www.github.com/mixedbread-ai/mixedbread-python#with_streaming_response + """ + return UploadsResourceWithStreamingResponse(self) + + def create( + self, + *, + filename: str, + file_size: int, + mime_type: str, + part_count: int | Omit = omit, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = not_given, + ) -> UploadCreateResponse: + """ + Initiate a multipart upload and receive presigned URLs for uploading parts + directly to storage. + + Args: + filename: Name of the file including extension + + file_size: Total size of the file in bytes + + mime_type: MIME type of the file + + part_count: Number of parts to split the upload into + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + return self._post( + "/v1/files/uploads", + body=maybe_transform( + { + "filename": filename, + "file_size": file_size, + "mime_type": mime_type, + "part_count": part_count, + }, + upload_create_params.UploadCreateParams, + ), + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=UploadCreateResponse, + ) + + def retrieve( + self, + upload_id: str, + *, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = not_given, + ) -> UploadRetrieveResponse: + """ + Get a multipart upload's details with fresh presigned URLs for any parts not yet + uploaded. + + Args: + upload_id: The ID of the multipart upload + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + if not upload_id: + raise ValueError(f"Expected a non-empty value for `upload_id` but received {upload_id!r}") + return self._get( + f"/v1/files/uploads/{upload_id}", + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=UploadRetrieveResponse, + ) + + def list( + self, + *, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = not_given, + ) -> UploadListResponse: + """List all in-progress multipart uploads for the authenticated organization.""" + return self._get( + "/v1/files/uploads", + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=UploadListResponse, + ) + + def abort( + self, + upload_id: str, + *, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = not_given, + ) -> UploadAbortResponse: + """ + Abort a multipart upload and clean up any uploaded parts. + + Args: + upload_id: The ID of the multipart upload to abort + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + if not upload_id: + raise ValueError(f"Expected a non-empty value for `upload_id` but received {upload_id!r}") + return self._post( + f"/v1/files/uploads/{upload_id}/abort", + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=UploadAbortResponse, + ) + + def complete( + self, + upload_id: str, + *, + parts: Iterable[MultipartUploadPartParam], + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = not_given, + ) -> FileObject: + """Complete a multipart upload after all parts have been uploaded. + + Creates the file + object and returns it. + + Args: + upload_id: The ID of the multipart upload + + parts: List of completed parts with their ETags + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + if not upload_id: + raise ValueError(f"Expected a non-empty value for `upload_id` but received {upload_id!r}") + return self._post( + f"/v1/files/uploads/{upload_id}/complete", + body=maybe_transform({"parts": parts}, upload_complete_params.UploadCompleteParams), + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=FileObject, + ) + + +class AsyncUploadsResource(AsyncAPIResource): + @cached_property + def with_raw_response(self) -> AsyncUploadsResourceWithRawResponse: + """ + This property can be used as a prefix for any HTTP method call to return + the raw response object instead of the parsed content. + + For more information, see https://www.github.com/mixedbread-ai/mixedbread-python#accessing-raw-response-data-eg-headers + """ + return AsyncUploadsResourceWithRawResponse(self) + + @cached_property + def with_streaming_response(self) -> AsyncUploadsResourceWithStreamingResponse: + """ + An alternative to `.with_raw_response` that doesn't eagerly read the response body. + + For more information, see https://www.github.com/mixedbread-ai/mixedbread-python#with_streaming_response + """ + return AsyncUploadsResourceWithStreamingResponse(self) + + async def create( + self, + *, + filename: str, + file_size: int, + mime_type: str, + part_count: int | Omit = omit, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = not_given, + ) -> UploadCreateResponse: + """ + Initiate a multipart upload and receive presigned URLs for uploading parts + directly to storage. + + Args: + filename: Name of the file including extension + + file_size: Total size of the file in bytes + + mime_type: MIME type of the file + + part_count: Number of parts to split the upload into + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + return await self._post( + "/v1/files/uploads", + body=await async_maybe_transform( + { + "filename": filename, + "file_size": file_size, + "mime_type": mime_type, + "part_count": part_count, + }, + upload_create_params.UploadCreateParams, + ), + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=UploadCreateResponse, + ) + + async def retrieve( + self, + upload_id: str, + *, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = not_given, + ) -> UploadRetrieveResponse: + """ + Get a multipart upload's details with fresh presigned URLs for any parts not yet + uploaded. + + Args: + upload_id: The ID of the multipart upload + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + if not upload_id: + raise ValueError(f"Expected a non-empty value for `upload_id` but received {upload_id!r}") + return await self._get( + f"/v1/files/uploads/{upload_id}", + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=UploadRetrieveResponse, + ) + + async def list( + self, + *, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = not_given, + ) -> UploadListResponse: + """List all in-progress multipart uploads for the authenticated organization.""" + return await self._get( + "/v1/files/uploads", + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=UploadListResponse, + ) + + async def abort( + self, + upload_id: str, + *, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = not_given, + ) -> UploadAbortResponse: + """ + Abort a multipart upload and clean up any uploaded parts. + + Args: + upload_id: The ID of the multipart upload to abort + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + if not upload_id: + raise ValueError(f"Expected a non-empty value for `upload_id` but received {upload_id!r}") + return await self._post( + f"/v1/files/uploads/{upload_id}/abort", + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=UploadAbortResponse, + ) + + async def complete( + self, + upload_id: str, + *, + parts: Iterable[MultipartUploadPartParam], + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | httpx.Timeout | None | NotGiven = not_given, + ) -> FileObject: + """Complete a multipart upload after all parts have been uploaded. + + Creates the file + object and returns it. + + Args: + upload_id: The ID of the multipart upload + + parts: List of completed parts with their ETags + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + if not upload_id: + raise ValueError(f"Expected a non-empty value for `upload_id` but received {upload_id!r}") + return await self._post( + f"/v1/files/uploads/{upload_id}/complete", + body=await async_maybe_transform({"parts": parts}, upload_complete_params.UploadCompleteParams), + options=make_request_options( + extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout + ), + cast_to=FileObject, + ) + + +class UploadsResourceWithRawResponse: + def __init__(self, uploads: UploadsResource) -> None: + self._uploads = uploads + + self.create = to_raw_response_wrapper( + uploads.create, + ) + self.retrieve = to_raw_response_wrapper( + uploads.retrieve, + ) + self.list = to_raw_response_wrapper( + uploads.list, + ) + self.abort = to_raw_response_wrapper( + uploads.abort, + ) + self.complete = to_raw_response_wrapper( + uploads.complete, + ) + + +class AsyncUploadsResourceWithRawResponse: + def __init__(self, uploads: AsyncUploadsResource) -> None: + self._uploads = uploads + + self.create = async_to_raw_response_wrapper( + uploads.create, + ) + self.retrieve = async_to_raw_response_wrapper( + uploads.retrieve, + ) + self.list = async_to_raw_response_wrapper( + uploads.list, + ) + self.abort = async_to_raw_response_wrapper( + uploads.abort, + ) + self.complete = async_to_raw_response_wrapper( + uploads.complete, + ) + + +class UploadsResourceWithStreamingResponse: + def __init__(self, uploads: UploadsResource) -> None: + self._uploads = uploads + + self.create = to_streamed_response_wrapper( + uploads.create, + ) + self.retrieve = to_streamed_response_wrapper( + uploads.retrieve, + ) + self.list = to_streamed_response_wrapper( + uploads.list, + ) + self.abort = to_streamed_response_wrapper( + uploads.abort, + ) + self.complete = to_streamed_response_wrapper( + uploads.complete, + ) + + +class AsyncUploadsResourceWithStreamingResponse: + def __init__(self, uploads: AsyncUploadsResource) -> None: + self._uploads = uploads + + self.create = async_to_streamed_response_wrapper( + uploads.create, + ) + self.retrieve = async_to_streamed_response_wrapper( + uploads.retrieve, + ) + self.list = async_to_streamed_response_wrapper( + uploads.list, + ) + self.abort = async_to_streamed_response_wrapper( + uploads.abort, + ) + self.complete = async_to_streamed_response_wrapper( + uploads.complete, + ) diff --git a/src/mixedbread/resources/parsing/jobs.py b/src/mixedbread/resources/parsing/jobs.py index 01715572..b797da1e 100644 --- a/src/mixedbread/resources/parsing/jobs.py +++ b/src/mixedbread/resources/parsing/jobs.py @@ -10,6 +10,7 @@ from ...lib import polling from ..._types import Body, Omit, Query, Headers, NotGiven, FileTypes, omit, not_given +from ...lib.multipart_upload import MultipartUploadOptions from ..._utils import maybe_transform, async_maybe_transform from ..._compat import cached_property from ..._resource import SyncAPIResource, AsyncAPIResource @@ -403,13 +404,14 @@ def upload( ] | NotGiven = not_given, return_format: Literal["html", "markdown", "plain"] | NotGiven = not_given, + multipart_upload: bool | MultipartUploadOptions | None = None, **kwargs: Any, ) -> ParsingJob: """Upload a file to the `files` API and then create a parsing job for it. Note the job will be asynchronously processed (you can use the alternative polling helper method to wait for processing to complete). """ - file_obj = self._client.files.create(file=file, **kwargs) + file_obj = self._client.files.create(file=file, multipart_upload=multipart_upload, **kwargs) return self.create( file_id=file_obj.id, chunking_strategy=chunking_strategy, @@ -442,11 +444,12 @@ def upload_and_poll( ] | NotGiven = not_given, return_format: Literal["html", "markdown", "plain"] | NotGiven = not_given, + multipart_upload: bool | MultipartUploadOptions | None = None, poll_interval_ms: int | NotGiven = not_given, **kwargs: Any, ) -> ParsingJob: """Upload a file and create a parsing job, then poll until processing is complete.""" - file_obj = self._client.files.create(file=file, **kwargs) + file_obj = self._client.files.create(file=file, multipart_upload=multipart_upload, **kwargs) return self.create_and_poll( file_id=file_obj.id, chunking_strategy=chunking_strategy, @@ -827,13 +830,14 @@ async def upload( ] | NotGiven = not_given, return_format: Literal["html", "markdown", "plain"] | NotGiven = not_given, + multipart_upload: bool | MultipartUploadOptions | None = None, **kwargs: Any, ) -> ParsingJob: """Upload a file to the `files` API and then create a parsing job for it. Note the job will be asynchronously processed (you can use the alternative polling helper method to wait for processing to complete). """ - file_obj = await self._client.files.create(file=file, **kwargs) + file_obj = await self._client.files.create(file=file, multipart_upload=multipart_upload, **kwargs) return await self.create( file_id=file_obj.id, chunking_strategy=chunking_strategy, @@ -866,11 +870,12 @@ async def upload_and_poll( ] | NotGiven = not_given, return_format: Literal["html", "markdown", "plain"] | NotGiven = not_given, + multipart_upload: bool | MultipartUploadOptions | None = None, poll_interval_ms: int | NotGiven = not_given, **kwargs: Any, ) -> ParsingJob: """Upload a file and create a parsing job, then poll until processing is complete.""" - file_obj = await self._client.files.create(file=file, **kwargs) + file_obj = await self._client.files.create(file=file, multipart_upload=multipart_upload, **kwargs) return await self.create_and_poll( file_id=file_obj.id, chunking_strategy=chunking_strategy, diff --git a/src/mixedbread/resources/stores/files.py b/src/mixedbread/resources/stores/files.py index 6edd79dd..5124082c 100644 --- a/src/mixedbread/resources/stores/files.py +++ b/src/mixedbread/resources/stores/files.py @@ -9,6 +9,7 @@ from ...lib import polling from ..._types import Body, Omit, Query, Headers, NotGiven, FileTypes, SequenceNotStr, omit, not_given +from ...lib.multipart_upload import MultipartUploadOptions from ..._utils import maybe_transform, async_maybe_transform from ..._compat import cached_property from ..._resource import SyncAPIResource, AsyncAPIResource @@ -271,7 +272,7 @@ def list( metadata_filter: Metadata filter to apply to the query - q: Search query for fuzzy matching over name and description fields + q: Search query for fuzzy matching over name and external_id fields extra_headers: Send extra headers @@ -495,6 +496,7 @@ def upload( external_id: Optional[str] | Omit = omit, overwrite: bool | Omit = omit, experimental: file_create_params.Experimental | Omit = omit, + multipart_upload: bool | MultipartUploadOptions | None = None, **kwargs: Any, ) -> StoreFile: """Upload a file to the `files` API and then attach it to the given store. @@ -509,6 +511,11 @@ def upload( external_id: External identifier for this file in the store overwrite: If true, overwrite an existing file with the same external_id experimental: Configuration for a file. + multipart_upload: Controls multipart upload behavior for the file upload. + None (default) auto-detects based on file size. + True forces multipart with default options. + False disables multipart. + MultipartUploadOptions for custom settings. extra_headers: Send extra headers extra_query: Add additional query parameters to the request extra_body: Add additional JSON properties to the request @@ -516,7 +523,7 @@ def upload( Returns: The file object once it reaches a terminal state """ - file_obj = self._client.files.create(file=file, **kwargs) + file_obj = self._client.files.create(file=file, multipart_upload=multipart_upload, **kwargs) return self.create( store_identifier=store_identifier, file_id=file_obj.id, @@ -538,12 +545,13 @@ def upload_and_poll( external_id: Optional[str] | Omit = omit, overwrite: bool | Omit = omit, experimental: file_create_params.Experimental | Omit = omit, + multipart_upload: bool | MultipartUploadOptions | None = None, poll_interval_ms: int | NotGiven = not_given, poll_timeout_ms: float | NotGiven = not_given, **kwargs: Any, ) -> StoreFile: """Add a file to a store and poll until processing is complete. - + Args: store_identifier: The ID or name of the store file: The file to upload @@ -552,12 +560,17 @@ def upload_and_poll( external_id: External identifier for this file in the store overwrite: If true, overwrite an existing file with the same external_id experimental: Configuration for a file. + multipart_upload: Controls multipart upload behavior for the file upload. + None (default) auto-detects based on file size. + True forces multipart with default options. + False disables multipart. + MultipartUploadOptions for custom settings. poll_interval_ms: The interval between polls in milliseconds poll_timeout_ms: The maximum time to poll for in milliseconds Returns: The file object once it reaches a terminal state """ - file_obj = self._client.files.create(file=file, **kwargs) + file_obj = self._client.files.create(file=file, multipart_upload=multipart_upload, **kwargs) return self.create_and_poll( store_identifier=store_identifier, file_id=file_obj.id, @@ -810,7 +823,7 @@ async def list( metadata_filter: Metadata filter to apply to the query - q: Search query for fuzzy matching over name and description fields + q: Search query for fuzzy matching over name and external_id fields extra_headers: Send extra headers @@ -1038,6 +1051,7 @@ async def upload( external_id: Optional[str] | Omit = omit, overwrite: bool | Omit = omit, experimental: file_create_params.Experimental | Omit = omit, + multipart_upload: bool | MultipartUploadOptions | None = None, **kwargs: Any, ) -> StoreFile: """Upload a file to the `files` API and then attach it to the given vector store. @@ -1052,12 +1066,15 @@ async def upload( external_id: External identifier for this file in the store overwrite: If true, overwrite an existing file with the same external_id experimental: Configuration for a file. - poll_interval_ms: The interval between polls in milliseconds - poll_timeout_ms: The maximum time to poll for in milliseconds + multipart_upload: Controls multipart upload behavior for the file upload. + None (default) auto-detects based on file size. + True forces multipart with default options. + False disables multipart. + MultipartUploadOptions for custom settings. Returns: The file object once it reaches a terminal state """ - file_obj = await self._client.files.create(file=file, **kwargs) + file_obj = await self._client.files.create(file=file, multipart_upload=multipart_upload, **kwargs) return await self.create( store_identifier=store_identifier, file_id=file_obj.id, @@ -1079,12 +1096,13 @@ async def upload_and_poll( external_id: Optional[str] | Omit = omit, overwrite: bool | Omit = omit, experimental: file_create_params.Experimental | Omit = omit, + multipart_upload: bool | MultipartUploadOptions | None = None, poll_interval_ms: int | NotGiven = not_given, poll_timeout_ms: float | NotGiven = not_given, **kwargs: Any, ) -> StoreFile: """Add a file to a store and poll until processing is complete. - + Args: store_identifier: The ID or name of the store file: The file to upload @@ -1093,12 +1111,17 @@ async def upload_and_poll( external_id: External identifier for this file in the store overwrite: If true, overwrite an existing file with the same external_id experimental: Configuration for a file. + multipart_upload: Controls multipart upload behavior for the file upload. + None (default) auto-detects based on file size. + True forces multipart with default options. + False disables multipart. + MultipartUploadOptions for custom settings. poll_interval_ms: The interval between polls in milliseconds poll_timeout_ms: The maximum time to poll for in milliseconds Returns: The file object once it reaches a terminal state """ - file_obj = await self._client.files.create(file=file, **kwargs) + file_obj = await self._client.files.create(file=file, multipart_upload=multipart_upload, **kwargs) return await self.create_and_poll( store_identifier=store_identifier, file_id=file_obj.id, diff --git a/src/mixedbread/types/files/__init__.py b/src/mixedbread/types/files/__init__.py new file mode 100644 index 00000000..5b8dff28 --- /dev/null +++ b/src/mixedbread/types/files/__init__.py @@ -0,0 +1,13 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from __future__ import annotations + +from .upload_create_params import UploadCreateParams as UploadCreateParams +from .upload_list_response import UploadListResponse as UploadListResponse +from .multipart_upload_part import MultipartUploadPart as MultipartUploadPart +from .upload_abort_response import UploadAbortResponse as UploadAbortResponse +from .upload_complete_params import UploadCompleteParams as UploadCompleteParams +from .upload_create_response import UploadCreateResponse as UploadCreateResponse +from .upload_retrieve_response import UploadRetrieveResponse as UploadRetrieveResponse +from .multipart_upload_part_url import MultipartUploadPartURL as MultipartUploadPartURL +from .multipart_upload_part_param import MultipartUploadPartParam as MultipartUploadPartParam diff --git a/src/mixedbread/types/files/multipart_upload_part.py b/src/mixedbread/types/files/multipart_upload_part.py new file mode 100644 index 00000000..f3f6a226 --- /dev/null +++ b/src/mixedbread/types/files/multipart_upload_part.py @@ -0,0 +1,13 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from ..._models import BaseModel + +__all__ = ["MultipartUploadPart"] + + +class MultipartUploadPart(BaseModel): + part_number: int + """1-based part number""" + + etag: str + """ETag returned by the storage backend after uploading the part""" diff --git a/src/mixedbread/types/files/multipart_upload_part_param.py b/src/mixedbread/types/files/multipart_upload_part_param.py new file mode 100644 index 00000000..78552c46 --- /dev/null +++ b/src/mixedbread/types/files/multipart_upload_part_param.py @@ -0,0 +1,15 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from __future__ import annotations + +from typing_extensions import Required, TypedDict + +__all__ = ["MultipartUploadPartParam"] + + +class MultipartUploadPartParam(TypedDict, total=False): + part_number: Required[int] + """1-based part number""" + + etag: Required[str] + """ETag returned by the storage backend after uploading the part""" diff --git a/src/mixedbread/types/files/multipart_upload_part_url.py b/src/mixedbread/types/files/multipart_upload_part_url.py new file mode 100644 index 00000000..ed51231e --- /dev/null +++ b/src/mixedbread/types/files/multipart_upload_part_url.py @@ -0,0 +1,13 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from ..._models import BaseModel + +__all__ = ["MultipartUploadPartURL"] + + +class MultipartUploadPartURL(BaseModel): + part_number: int + """1-based part number""" + + url: str + """Presigned URL for uploading this part""" diff --git a/src/mixedbread/types/files/upload_abort_response.py b/src/mixedbread/types/files/upload_abort_response.py new file mode 100644 index 00000000..aa8f02db --- /dev/null +++ b/src/mixedbread/types/files/upload_abort_response.py @@ -0,0 +1,19 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from typing import Optional +from typing_extensions import Literal + +from ..._models import BaseModel + +__all__ = ["UploadAbortResponse"] + + +class UploadAbortResponse(BaseModel): + id: str + """The ID of the deleted file""" + + deleted: Optional[bool] = None + """Whether the file was deleted""" + + object: Optional[Literal["file"]] = None + """The type of the deleted object""" diff --git a/src/mixedbread/types/files/upload_complete_params.py b/src/mixedbread/types/files/upload_complete_params.py new file mode 100644 index 00000000..6e59c5a4 --- /dev/null +++ b/src/mixedbread/types/files/upload_complete_params.py @@ -0,0 +1,15 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from __future__ import annotations + +from typing import Iterable +from typing_extensions import Required, TypedDict + +from .multipart_upload_part_param import MultipartUploadPartParam + +__all__ = ["UploadCompleteParams"] + + +class UploadCompleteParams(TypedDict, total=False): + parts: Required[Iterable[MultipartUploadPartParam]] + """List of completed parts with their ETags""" diff --git a/src/mixedbread/types/files/upload_create_params.py b/src/mixedbread/types/files/upload_create_params.py new file mode 100644 index 00000000..8ce2656e --- /dev/null +++ b/src/mixedbread/types/files/upload_create_params.py @@ -0,0 +1,21 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from __future__ import annotations + +from typing_extensions import Required, TypedDict + +__all__ = ["UploadCreateParams"] + + +class UploadCreateParams(TypedDict, total=False): + filename: Required[str] + """Name of the file including extension""" + + file_size: Required[int] + """Total size of the file in bytes""" + + mime_type: Required[str] + """MIME type of the file""" + + part_count: int + """Number of parts to split the upload into""" diff --git a/src/mixedbread/types/files/upload_create_response.py b/src/mixedbread/types/files/upload_create_response.py new file mode 100644 index 00000000..30a6dc0c --- /dev/null +++ b/src/mixedbread/types/files/upload_create_response.py @@ -0,0 +1,16 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from typing import List + +from ..._models import BaseModel +from .multipart_upload_part_url import MultipartUploadPartURL + +__all__ = ["UploadCreateResponse"] + + +class UploadCreateResponse(BaseModel): + id: str + """The multipart upload ID (use this to complete or abort)""" + + part_urls: List[MultipartUploadPartURL] + """Presigned URLs for uploading parts""" diff --git a/src/mixedbread/types/files/upload_list_response.py b/src/mixedbread/types/files/upload_list_response.py new file mode 100644 index 00000000..f44a8e14 --- /dev/null +++ b/src/mixedbread/types/files/upload_list_response.py @@ -0,0 +1,32 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from typing import List + +from ..._models import BaseModel + +__all__ = ["UploadListResponse", "Data"] + + +class Data(BaseModel): + id: str + """The multipart upload record ID""" + + filename: str + """Original filename""" + + file_size: int + """Total file size in bytes""" + + mime_type: str + """MIME type of the file""" + + part_count: int + """Number of parts the file was split into""" + + created_at: str + """When the upload was initiated""" + + +class UploadListResponse(BaseModel): + data: List[Data] + """List of in-progress multipart uploads""" diff --git a/src/mixedbread/types/files/upload_retrieve_response.py b/src/mixedbread/types/files/upload_retrieve_response.py new file mode 100644 index 00000000..eda17521 --- /dev/null +++ b/src/mixedbread/types/files/upload_retrieve_response.py @@ -0,0 +1,35 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from typing import List + +from ..._models import BaseModel +from .multipart_upload_part import MultipartUploadPart +from .multipart_upload_part_url import MultipartUploadPartURL + +__all__ = ["UploadRetrieveResponse"] + + +class UploadRetrieveResponse(BaseModel): + id: str + """The multipart upload record ID""" + + filename: str + """Original filename""" + + file_size: int + """Total file size in bytes""" + + mime_type: str + """MIME type of the file""" + + part_count: int + """Number of parts the file was split into""" + + created_at: str + """When the upload was initiated""" + + completed_parts: List[MultipartUploadPart] + """Parts that have already been uploaded""" + + part_urls: List[MultipartUploadPartURL] + """Presigned URLs for the parts that still need to be uploaded""" diff --git a/src/mixedbread/types/parsing/parsing_job.py b/src/mixedbread/types/parsing/parsing_job.py index 56380010..2f3d250d 100644 --- a/src/mixedbread/types/parsing/parsing_job.py +++ b/src/mixedbread/types/parsing/parsing_job.py @@ -29,11 +29,14 @@ class ResultChunkElement(BaseModel): """The page number where the element was found""" content: str - """The full content of the extracted element""" + """The extracted text content of the element""" summary: Optional[str] = None """A brief summary of the element's content""" + image: Optional[str] = None + """The base64-encoded image data for figure elements""" + class ResultChunk(BaseModel): """A chunk of text extracted from a document page.""" diff --git a/src/mixedbread/types/scored_audio_url_input_chunk.py b/src/mixedbread/types/scored_audio_url_input_chunk.py index ce6ad67a..95292f9a 100644 --- a/src/mixedbread/types/scored_audio_url_input_chunk.py +++ b/src/mixedbread/types/scored_audio_url_input_chunk.py @@ -19,6 +19,7 @@ "GeneratedMetadataCodeChunkGeneratedMetadata", "GeneratedMetadataAudioChunkGeneratedMetadata", "GeneratedMetadataVideoChunkGeneratedMetadata", + "GeneratedMetadataImageChunkGeneratedMetadata", "AudioURL", ] @@ -54,6 +55,8 @@ class GeneratedMetadataMarkdownChunkGeneratedMetadata(BaseModel): num_lines: Optional[int] = None + file_extension: Optional[str] = None + frontmatter: Optional[Dict[str, object]] = None if TYPE_CHECKING: @@ -84,6 +87,8 @@ class GeneratedMetadataTextChunkGeneratedMetadata(BaseModel): num_lines: Optional[int] = None + file_extension: Optional[str] = None + if TYPE_CHECKING: # Some versions of Pydantic <2.8.0 have a bug and don’t allow assigning a # value to this field, so for compatibility we avoid doing it at runtime. @@ -106,6 +111,8 @@ class GeneratedMetadataPdfChunkGeneratedMetadata(BaseModel): total_size: int + file_extension: Optional[str] = None + if TYPE_CHECKING: # Some versions of Pydantic <2.8.0 have a bug and don’t allow assigning a # value to this field, so for compatibility we avoid doing it at runtime. @@ -134,6 +141,8 @@ class GeneratedMetadataCodeChunkGeneratedMetadata(BaseModel): num_lines: Optional[int] = None + file_extension: Optional[str] = None + if TYPE_CHECKING: # Some versions of Pydantic <2.8.0 have a bug and don’t allow assigning a # value to this field, so for compatibility we avoid doing it at runtime. @@ -162,6 +171,8 @@ class GeneratedMetadataAudioChunkGeneratedMetadata(BaseModel): audio_format: int + file_extension: Optional[str] = None + if TYPE_CHECKING: # Some versions of Pydantic <2.8.0 have a bug and don’t allow assigning a # value to this field, so for compatibility we avoid doing it at runtime. @@ -180,7 +191,7 @@ class GeneratedMetadataVideoChunkGeneratedMetadata(BaseModel): file_type: str - file_size: int + file_size: Optional[int] = None total_duration_seconds: float @@ -192,6 +203,36 @@ class GeneratedMetadataVideoChunkGeneratedMetadata(BaseModel): frame_count: int + has_audio_stream: Optional[bool] = None + + file_extension: Optional[str] = None + + if TYPE_CHECKING: + # Some versions of Pydantic <2.8.0 have a bug and don’t allow assigning a + # value to this field, so for compatibility we avoid doing it at runtime. + __pydantic_extra__: Dict[str, object] = FieldInfo(init=False) # pyright: ignore[reportIncompatibleVariableOverride] + + # Stub to indicate that arbitrary properties are accepted. + # To access properties that are not valid identifiers you can use `getattr`, e.g. + # `getattr(obj, '$type')` + def __getattr__(self, attr: str) -> object: ... + else: + __pydantic_extra__: Dict[str, object] + + +class GeneratedMetadataImageChunkGeneratedMetadata(BaseModel): + type: Optional[Literal["image"]] = None + + file_type: str + + file_size: int + + width: int + + height: int + + file_extension: Optional[str] = None + if TYPE_CHECKING: # Some versions of Pydantic <2.8.0 have a bug and don’t allow assigning a # value to this field, so for compatibility we avoid doing it at runtime. @@ -213,6 +254,7 @@ def __getattr__(self, attr: str) -> object: ... GeneratedMetadataCodeChunkGeneratedMetadata, GeneratedMetadataAudioChunkGeneratedMetadata, GeneratedMetadataVideoChunkGeneratedMetadata, + GeneratedMetadataImageChunkGeneratedMetadata, None, ], PropertyInfo(discriminator="type"), @@ -251,6 +293,9 @@ class ScoredAudioURLInputChunk(BaseModel): store_id: str """store id""" + external_id: Optional[str] = None + """external identifier for this file""" + metadata: Optional[object] = None """file metadata""" diff --git a/src/mixedbread/types/scored_image_url_input_chunk.py b/src/mixedbread/types/scored_image_url_input_chunk.py index 5c41db69..1a8cf548 100644 --- a/src/mixedbread/types/scored_image_url_input_chunk.py +++ b/src/mixedbread/types/scored_image_url_input_chunk.py @@ -19,6 +19,7 @@ "GeneratedMetadataCodeChunkGeneratedMetadata", "GeneratedMetadataAudioChunkGeneratedMetadata", "GeneratedMetadataVideoChunkGeneratedMetadata", + "GeneratedMetadataImageChunkGeneratedMetadata", "ImageURL", ] @@ -54,6 +55,8 @@ class GeneratedMetadataMarkdownChunkGeneratedMetadata(BaseModel): num_lines: Optional[int] = None + file_extension: Optional[str] = None + frontmatter: Optional[Dict[str, object]] = None if TYPE_CHECKING: @@ -84,6 +87,8 @@ class GeneratedMetadataTextChunkGeneratedMetadata(BaseModel): num_lines: Optional[int] = None + file_extension: Optional[str] = None + if TYPE_CHECKING: # Some versions of Pydantic <2.8.0 have a bug and don’t allow assigning a # value to this field, so for compatibility we avoid doing it at runtime. @@ -106,6 +111,8 @@ class GeneratedMetadataPdfChunkGeneratedMetadata(BaseModel): total_size: int + file_extension: Optional[str] = None + if TYPE_CHECKING: # Some versions of Pydantic <2.8.0 have a bug and don’t allow assigning a # value to this field, so for compatibility we avoid doing it at runtime. @@ -134,6 +141,8 @@ class GeneratedMetadataCodeChunkGeneratedMetadata(BaseModel): num_lines: Optional[int] = None + file_extension: Optional[str] = None + if TYPE_CHECKING: # Some versions of Pydantic <2.8.0 have a bug and don’t allow assigning a # value to this field, so for compatibility we avoid doing it at runtime. @@ -162,6 +171,8 @@ class GeneratedMetadataAudioChunkGeneratedMetadata(BaseModel): audio_format: int + file_extension: Optional[str] = None + if TYPE_CHECKING: # Some versions of Pydantic <2.8.0 have a bug and don’t allow assigning a # value to this field, so for compatibility we avoid doing it at runtime. @@ -180,7 +191,7 @@ class GeneratedMetadataVideoChunkGeneratedMetadata(BaseModel): file_type: str - file_size: int + file_size: Optional[int] = None total_duration_seconds: float @@ -192,6 +203,36 @@ class GeneratedMetadataVideoChunkGeneratedMetadata(BaseModel): frame_count: int + has_audio_stream: Optional[bool] = None + + file_extension: Optional[str] = None + + if TYPE_CHECKING: + # Some versions of Pydantic <2.8.0 have a bug and don’t allow assigning a + # value to this field, so for compatibility we avoid doing it at runtime. + __pydantic_extra__: Dict[str, object] = FieldInfo(init=False) # pyright: ignore[reportIncompatibleVariableOverride] + + # Stub to indicate that arbitrary properties are accepted. + # To access properties that are not valid identifiers you can use `getattr`, e.g. + # `getattr(obj, '$type')` + def __getattr__(self, attr: str) -> object: ... + else: + __pydantic_extra__: Dict[str, object] + + +class GeneratedMetadataImageChunkGeneratedMetadata(BaseModel): + type: Optional[Literal["image"]] = None + + file_type: str + + file_size: int + + width: int + + height: int + + file_extension: Optional[str] = None + if TYPE_CHECKING: # Some versions of Pydantic <2.8.0 have a bug and don’t allow assigning a # value to this field, so for compatibility we avoid doing it at runtime. @@ -213,6 +254,7 @@ def __getattr__(self, attr: str) -> object: ... GeneratedMetadataCodeChunkGeneratedMetadata, GeneratedMetadataAudioChunkGeneratedMetadata, GeneratedMetadataVideoChunkGeneratedMetadata, + GeneratedMetadataImageChunkGeneratedMetadata, None, ], PropertyInfo(discriminator="type"), @@ -254,6 +296,9 @@ class ScoredImageURLInputChunk(BaseModel): store_id: str """store id""" + external_id: Optional[str] = None + """external identifier for this file""" + metadata: Optional[object] = None """file metadata""" diff --git a/src/mixedbread/types/scored_text_input_chunk.py b/src/mixedbread/types/scored_text_input_chunk.py index 025eecdc..41b33ea5 100644 --- a/src/mixedbread/types/scored_text_input_chunk.py +++ b/src/mixedbread/types/scored_text_input_chunk.py @@ -19,6 +19,7 @@ "GeneratedMetadataCodeChunkGeneratedMetadata", "GeneratedMetadataAudioChunkGeneratedMetadata", "GeneratedMetadataVideoChunkGeneratedMetadata", + "GeneratedMetadataImageChunkGeneratedMetadata", ] @@ -53,6 +54,8 @@ class GeneratedMetadataMarkdownChunkGeneratedMetadata(BaseModel): num_lines: Optional[int] = None + file_extension: Optional[str] = None + frontmatter: Optional[Dict[str, object]] = None if TYPE_CHECKING: @@ -83,6 +86,8 @@ class GeneratedMetadataTextChunkGeneratedMetadata(BaseModel): num_lines: Optional[int] = None + file_extension: Optional[str] = None + if TYPE_CHECKING: # Some versions of Pydantic <2.8.0 have a bug and don’t allow assigning a # value to this field, so for compatibility we avoid doing it at runtime. @@ -105,6 +110,8 @@ class GeneratedMetadataPdfChunkGeneratedMetadata(BaseModel): total_size: int + file_extension: Optional[str] = None + if TYPE_CHECKING: # Some versions of Pydantic <2.8.0 have a bug and don’t allow assigning a # value to this field, so for compatibility we avoid doing it at runtime. @@ -133,6 +140,8 @@ class GeneratedMetadataCodeChunkGeneratedMetadata(BaseModel): num_lines: Optional[int] = None + file_extension: Optional[str] = None + if TYPE_CHECKING: # Some versions of Pydantic <2.8.0 have a bug and don’t allow assigning a # value to this field, so for compatibility we avoid doing it at runtime. @@ -161,6 +170,8 @@ class GeneratedMetadataAudioChunkGeneratedMetadata(BaseModel): audio_format: int + file_extension: Optional[str] = None + if TYPE_CHECKING: # Some versions of Pydantic <2.8.0 have a bug and don’t allow assigning a # value to this field, so for compatibility we avoid doing it at runtime. @@ -179,7 +190,7 @@ class GeneratedMetadataVideoChunkGeneratedMetadata(BaseModel): file_type: str - file_size: int + file_size: Optional[int] = None total_duration_seconds: float @@ -191,6 +202,36 @@ class GeneratedMetadataVideoChunkGeneratedMetadata(BaseModel): frame_count: int + has_audio_stream: Optional[bool] = None + + file_extension: Optional[str] = None + + if TYPE_CHECKING: + # Some versions of Pydantic <2.8.0 have a bug and don’t allow assigning a + # value to this field, so for compatibility we avoid doing it at runtime. + __pydantic_extra__: Dict[str, object] = FieldInfo(init=False) # pyright: ignore[reportIncompatibleVariableOverride] + + # Stub to indicate that arbitrary properties are accepted. + # To access properties that are not valid identifiers you can use `getattr`, e.g. + # `getattr(obj, '$type')` + def __getattr__(self, attr: str) -> object: ... + else: + __pydantic_extra__: Dict[str, object] + + +class GeneratedMetadataImageChunkGeneratedMetadata(BaseModel): + type: Optional[Literal["image"]] = None + + file_type: str + + file_size: int + + width: int + + height: int + + file_extension: Optional[str] = None + if TYPE_CHECKING: # Some versions of Pydantic <2.8.0 have a bug and don’t allow assigning a # value to this field, so for compatibility we avoid doing it at runtime. @@ -212,6 +253,7 @@ def __getattr__(self, attr: str) -> object: ... GeneratedMetadataCodeChunkGeneratedMetadata, GeneratedMetadataAudioChunkGeneratedMetadata, GeneratedMetadataVideoChunkGeneratedMetadata, + GeneratedMetadataImageChunkGeneratedMetadata, None, ], PropertyInfo(discriminator="type"), @@ -243,6 +285,9 @@ class ScoredTextInputChunk(BaseModel): store_id: str """store id""" + external_id: Optional[str] = None + """external identifier for this file""" + metadata: Optional[object] = None """file metadata""" diff --git a/src/mixedbread/types/scored_video_url_input_chunk.py b/src/mixedbread/types/scored_video_url_input_chunk.py index 878c5bdf..a33d0d3a 100644 --- a/src/mixedbread/types/scored_video_url_input_chunk.py +++ b/src/mixedbread/types/scored_video_url_input_chunk.py @@ -19,6 +19,7 @@ "GeneratedMetadataCodeChunkGeneratedMetadata", "GeneratedMetadataAudioChunkGeneratedMetadata", "GeneratedMetadataVideoChunkGeneratedMetadata", + "GeneratedMetadataImageChunkGeneratedMetadata", "VideoURL", ] @@ -54,6 +55,8 @@ class GeneratedMetadataMarkdownChunkGeneratedMetadata(BaseModel): num_lines: Optional[int] = None + file_extension: Optional[str] = None + frontmatter: Optional[Dict[str, object]] = None if TYPE_CHECKING: @@ -84,6 +87,8 @@ class GeneratedMetadataTextChunkGeneratedMetadata(BaseModel): num_lines: Optional[int] = None + file_extension: Optional[str] = None + if TYPE_CHECKING: # Some versions of Pydantic <2.8.0 have a bug and don’t allow assigning a # value to this field, so for compatibility we avoid doing it at runtime. @@ -106,6 +111,8 @@ class GeneratedMetadataPdfChunkGeneratedMetadata(BaseModel): total_size: int + file_extension: Optional[str] = None + if TYPE_CHECKING: # Some versions of Pydantic <2.8.0 have a bug and don’t allow assigning a # value to this field, so for compatibility we avoid doing it at runtime. @@ -134,6 +141,8 @@ class GeneratedMetadataCodeChunkGeneratedMetadata(BaseModel): num_lines: Optional[int] = None + file_extension: Optional[str] = None + if TYPE_CHECKING: # Some versions of Pydantic <2.8.0 have a bug and don’t allow assigning a # value to this field, so for compatibility we avoid doing it at runtime. @@ -162,6 +171,8 @@ class GeneratedMetadataAudioChunkGeneratedMetadata(BaseModel): audio_format: int + file_extension: Optional[str] = None + if TYPE_CHECKING: # Some versions of Pydantic <2.8.0 have a bug and don’t allow assigning a # value to this field, so for compatibility we avoid doing it at runtime. @@ -180,7 +191,7 @@ class GeneratedMetadataVideoChunkGeneratedMetadata(BaseModel): file_type: str - file_size: int + file_size: Optional[int] = None total_duration_seconds: float @@ -192,6 +203,36 @@ class GeneratedMetadataVideoChunkGeneratedMetadata(BaseModel): frame_count: int + has_audio_stream: Optional[bool] = None + + file_extension: Optional[str] = None + + if TYPE_CHECKING: + # Some versions of Pydantic <2.8.0 have a bug and don’t allow assigning a + # value to this field, so for compatibility we avoid doing it at runtime. + __pydantic_extra__: Dict[str, object] = FieldInfo(init=False) # pyright: ignore[reportIncompatibleVariableOverride] + + # Stub to indicate that arbitrary properties are accepted. + # To access properties that are not valid identifiers you can use `getattr`, e.g. + # `getattr(obj, '$type')` + def __getattr__(self, attr: str) -> object: ... + else: + __pydantic_extra__: Dict[str, object] + + +class GeneratedMetadataImageChunkGeneratedMetadata(BaseModel): + type: Optional[Literal["image"]] = None + + file_type: str + + file_size: int + + width: int + + height: int + + file_extension: Optional[str] = None + if TYPE_CHECKING: # Some versions of Pydantic <2.8.0 have a bug and don’t allow assigning a # value to this field, so for compatibility we avoid doing it at runtime. @@ -213,6 +254,7 @@ def __getattr__(self, attr: str) -> object: ... GeneratedMetadataCodeChunkGeneratedMetadata, GeneratedMetadataAudioChunkGeneratedMetadata, GeneratedMetadataVideoChunkGeneratedMetadata, + GeneratedMetadataImageChunkGeneratedMetadata, None, ], PropertyInfo(discriminator="type"), @@ -251,6 +293,9 @@ class ScoredVideoURLInputChunk(BaseModel): store_id: str """store id""" + external_id: Optional[str] = None + """external identifier for this file""" + metadata: Optional[object] = None """file metadata""" diff --git a/src/mixedbread/types/store_chunk_search_options_param.py b/src/mixedbread/types/store_chunk_search_options_param.py index 12e5f590..90bb9816 100644 --- a/src/mixedbread/types/store_chunk_search_options_param.py +++ b/src/mixedbread/types/store_chunk_search_options_param.py @@ -38,9 +38,6 @@ class AgenticAgenticSearchConfig(TypedDict, total=False): queries_per_round: int """Maximum queries per round""" - results_per_query: int - """Results to fetch per query""" - Agentic: TypeAlias = Union[bool, AgenticAgenticSearchConfig] diff --git a/src/mixedbread/types/stores/file_list_params.py b/src/mixedbread/types/stores/file_list_params.py index 2782573a..2089f9cf 100644 --- a/src/mixedbread/types/stores/file_list_params.py +++ b/src/mixedbread/types/stores/file_list_params.py @@ -37,7 +37,7 @@ class FileListParams(TypedDict, total=False): """Metadata filter to apply to the query""" q: Optional[str] - """Search query for fuzzy matching over name and description fields""" + """Search query for fuzzy matching over name and external_id fields""" MetadataFilterUnionMember2: TypeAlias = Union["SearchFilter", SearchFilterCondition] diff --git a/src/mixedbread/types/stores/file_search_params.py b/src/mixedbread/types/stores/file_search_params.py index e70bc15b..6f6aa2d1 100644 --- a/src/mixedbread/types/stores/file_search_params.py +++ b/src/mixedbread/types/stores/file_search_params.py @@ -78,9 +78,6 @@ class SearchOptionsAgenticAgenticSearchConfig(TypedDict, total=False): queries_per_round: int """Maximum queries per round""" - results_per_query: int - """Results to fetch per query""" - SearchOptionsAgentic: TypeAlias = Union[bool, SearchOptionsAgenticAgenticSearchConfig] diff --git a/src/mixedbread/types/stores/store_file.py b/src/mixedbread/types/stores/store_file.py index 7c3c0e2e..0fe097ba 100644 --- a/src/mixedbread/types/stores/store_file.py +++ b/src/mixedbread/types/stores/store_file.py @@ -24,6 +24,7 @@ "ChunkTextInputChunkGeneratedMetadataCodeChunkGeneratedMetadata", "ChunkTextInputChunkGeneratedMetadataAudioChunkGeneratedMetadata", "ChunkTextInputChunkGeneratedMetadataVideoChunkGeneratedMetadata", + "ChunkTextInputChunkGeneratedMetadataImageChunkGeneratedMetadata", "ChunkImageURLInputChunk", "ChunkImageURLInputChunkGeneratedMetadata", "ChunkImageURLInputChunkGeneratedMetadataMarkdownChunkGeneratedMetadata", @@ -34,6 +35,7 @@ "ChunkImageURLInputChunkGeneratedMetadataCodeChunkGeneratedMetadata", "ChunkImageURLInputChunkGeneratedMetadataAudioChunkGeneratedMetadata", "ChunkImageURLInputChunkGeneratedMetadataVideoChunkGeneratedMetadata", + "ChunkImageURLInputChunkGeneratedMetadataImageChunkGeneratedMetadata", "ChunkImageURLInputChunkImageURL", "ChunkAudioURLInputChunk", "ChunkAudioURLInputChunkGeneratedMetadata", @@ -45,6 +47,7 @@ "ChunkAudioURLInputChunkGeneratedMetadataCodeChunkGeneratedMetadata", "ChunkAudioURLInputChunkGeneratedMetadataAudioChunkGeneratedMetadata", "ChunkAudioURLInputChunkGeneratedMetadataVideoChunkGeneratedMetadata", + "ChunkAudioURLInputChunkGeneratedMetadataImageChunkGeneratedMetadata", "ChunkAudioURLInputChunkAudioURL", "ChunkVideoURLInputChunk", "ChunkVideoURLInputChunkGeneratedMetadata", @@ -56,6 +59,7 @@ "ChunkVideoURLInputChunkGeneratedMetadataCodeChunkGeneratedMetadata", "ChunkVideoURLInputChunkGeneratedMetadataAudioChunkGeneratedMetadata", "ChunkVideoURLInputChunkGeneratedMetadataVideoChunkGeneratedMetadata", + "ChunkVideoURLInputChunkGeneratedMetadataImageChunkGeneratedMetadata", "ChunkVideoURLInputChunkVideoURL", ] @@ -102,6 +106,8 @@ class ChunkTextInputChunkGeneratedMetadataMarkdownChunkGeneratedMetadata(BaseMod num_lines: Optional[int] = None + file_extension: Optional[str] = None + frontmatter: Optional[Dict[str, object]] = None if TYPE_CHECKING: @@ -132,6 +138,8 @@ class ChunkTextInputChunkGeneratedMetadataTextChunkGeneratedMetadata(BaseModel): num_lines: Optional[int] = None + file_extension: Optional[str] = None + if TYPE_CHECKING: # Some versions of Pydantic <2.8.0 have a bug and don’t allow assigning a # value to this field, so for compatibility we avoid doing it at runtime. @@ -154,6 +162,8 @@ class ChunkTextInputChunkGeneratedMetadataPdfChunkGeneratedMetadata(BaseModel): total_size: int + file_extension: Optional[str] = None + if TYPE_CHECKING: # Some versions of Pydantic <2.8.0 have a bug and don’t allow assigning a # value to this field, so for compatibility we avoid doing it at runtime. @@ -182,6 +192,8 @@ class ChunkTextInputChunkGeneratedMetadataCodeChunkGeneratedMetadata(BaseModel): num_lines: Optional[int] = None + file_extension: Optional[str] = None + if TYPE_CHECKING: # Some versions of Pydantic <2.8.0 have a bug and don’t allow assigning a # value to this field, so for compatibility we avoid doing it at runtime. @@ -210,6 +222,8 @@ class ChunkTextInputChunkGeneratedMetadataAudioChunkGeneratedMetadata(BaseModel) audio_format: int + file_extension: Optional[str] = None + if TYPE_CHECKING: # Some versions of Pydantic <2.8.0 have a bug and don’t allow assigning a # value to this field, so for compatibility we avoid doing it at runtime. @@ -228,7 +242,7 @@ class ChunkTextInputChunkGeneratedMetadataVideoChunkGeneratedMetadata(BaseModel) file_type: str - file_size: int + file_size: Optional[int] = None total_duration_seconds: float @@ -240,6 +254,36 @@ class ChunkTextInputChunkGeneratedMetadataVideoChunkGeneratedMetadata(BaseModel) frame_count: int + has_audio_stream: Optional[bool] = None + + file_extension: Optional[str] = None + + if TYPE_CHECKING: + # Some versions of Pydantic <2.8.0 have a bug and don’t allow assigning a + # value to this field, so for compatibility we avoid doing it at runtime. + __pydantic_extra__: Dict[str, object] = FieldInfo(init=False) # pyright: ignore[reportIncompatibleVariableOverride] + + # Stub to indicate that arbitrary properties are accepted. + # To access properties that are not valid identifiers you can use `getattr`, e.g. + # `getattr(obj, '$type')` + def __getattr__(self, attr: str) -> object: ... + else: + __pydantic_extra__: Dict[str, object] + + +class ChunkTextInputChunkGeneratedMetadataImageChunkGeneratedMetadata(BaseModel): + type: Optional[Literal["image"]] = None + + file_type: str + + file_size: int + + width: int + + height: int + + file_extension: Optional[str] = None + if TYPE_CHECKING: # Some versions of Pydantic <2.8.0 have a bug and don’t allow assigning a # value to this field, so for compatibility we avoid doing it at runtime. @@ -261,6 +305,7 @@ def __getattr__(self, attr: str) -> object: ... ChunkTextInputChunkGeneratedMetadataCodeChunkGeneratedMetadata, ChunkTextInputChunkGeneratedMetadataAudioChunkGeneratedMetadata, ChunkTextInputChunkGeneratedMetadataVideoChunkGeneratedMetadata, + ChunkTextInputChunkGeneratedMetadataImageChunkGeneratedMetadata, None, ], PropertyInfo(discriminator="type"), @@ -325,6 +370,8 @@ class ChunkImageURLInputChunkGeneratedMetadataMarkdownChunkGeneratedMetadata(Bas num_lines: Optional[int] = None + file_extension: Optional[str] = None + frontmatter: Optional[Dict[str, object]] = None if TYPE_CHECKING: @@ -355,6 +402,8 @@ class ChunkImageURLInputChunkGeneratedMetadataTextChunkGeneratedMetadata(BaseMod num_lines: Optional[int] = None + file_extension: Optional[str] = None + if TYPE_CHECKING: # Some versions of Pydantic <2.8.0 have a bug and don’t allow assigning a # value to this field, so for compatibility we avoid doing it at runtime. @@ -377,6 +426,8 @@ class ChunkImageURLInputChunkGeneratedMetadataPdfChunkGeneratedMetadata(BaseMode total_size: int + file_extension: Optional[str] = None + if TYPE_CHECKING: # Some versions of Pydantic <2.8.0 have a bug and don’t allow assigning a # value to this field, so for compatibility we avoid doing it at runtime. @@ -405,6 +456,8 @@ class ChunkImageURLInputChunkGeneratedMetadataCodeChunkGeneratedMetadata(BaseMod num_lines: Optional[int] = None + file_extension: Optional[str] = None + if TYPE_CHECKING: # Some versions of Pydantic <2.8.0 have a bug and don’t allow assigning a # value to this field, so for compatibility we avoid doing it at runtime. @@ -433,6 +486,8 @@ class ChunkImageURLInputChunkGeneratedMetadataAudioChunkGeneratedMetadata(BaseMo audio_format: int + file_extension: Optional[str] = None + if TYPE_CHECKING: # Some versions of Pydantic <2.8.0 have a bug and don’t allow assigning a # value to this field, so for compatibility we avoid doing it at runtime. @@ -451,7 +506,7 @@ class ChunkImageURLInputChunkGeneratedMetadataVideoChunkGeneratedMetadata(BaseMo file_type: str - file_size: int + file_size: Optional[int] = None total_duration_seconds: float @@ -463,6 +518,36 @@ class ChunkImageURLInputChunkGeneratedMetadataVideoChunkGeneratedMetadata(BaseMo frame_count: int + has_audio_stream: Optional[bool] = None + + file_extension: Optional[str] = None + + if TYPE_CHECKING: + # Some versions of Pydantic <2.8.0 have a bug and don’t allow assigning a + # value to this field, so for compatibility we avoid doing it at runtime. + __pydantic_extra__: Dict[str, object] = FieldInfo(init=False) # pyright: ignore[reportIncompatibleVariableOverride] + + # Stub to indicate that arbitrary properties are accepted. + # To access properties that are not valid identifiers you can use `getattr`, e.g. + # `getattr(obj, '$type')` + def __getattr__(self, attr: str) -> object: ... + else: + __pydantic_extra__: Dict[str, object] + + +class ChunkImageURLInputChunkGeneratedMetadataImageChunkGeneratedMetadata(BaseModel): + type: Optional[Literal["image"]] = None + + file_type: str + + file_size: int + + width: int + + height: int + + file_extension: Optional[str] = None + if TYPE_CHECKING: # Some versions of Pydantic <2.8.0 have a bug and don’t allow assigning a # value to this field, so for compatibility we avoid doing it at runtime. @@ -484,6 +569,7 @@ def __getattr__(self, attr: str) -> object: ... ChunkImageURLInputChunkGeneratedMetadataCodeChunkGeneratedMetadata, ChunkImageURLInputChunkGeneratedMetadataAudioChunkGeneratedMetadata, ChunkImageURLInputChunkGeneratedMetadataVideoChunkGeneratedMetadata, + ChunkImageURLInputChunkGeneratedMetadataImageChunkGeneratedMetadata, None, ], PropertyInfo(discriminator="type"), @@ -561,6 +647,8 @@ class ChunkAudioURLInputChunkGeneratedMetadataMarkdownChunkGeneratedMetadata(Bas num_lines: Optional[int] = None + file_extension: Optional[str] = None + frontmatter: Optional[Dict[str, object]] = None if TYPE_CHECKING: @@ -591,6 +679,8 @@ class ChunkAudioURLInputChunkGeneratedMetadataTextChunkGeneratedMetadata(BaseMod num_lines: Optional[int] = None + file_extension: Optional[str] = None + if TYPE_CHECKING: # Some versions of Pydantic <2.8.0 have a bug and don’t allow assigning a # value to this field, so for compatibility we avoid doing it at runtime. @@ -613,6 +703,8 @@ class ChunkAudioURLInputChunkGeneratedMetadataPdfChunkGeneratedMetadata(BaseMode total_size: int + file_extension: Optional[str] = None + if TYPE_CHECKING: # Some versions of Pydantic <2.8.0 have a bug and don’t allow assigning a # value to this field, so for compatibility we avoid doing it at runtime. @@ -641,6 +733,8 @@ class ChunkAudioURLInputChunkGeneratedMetadataCodeChunkGeneratedMetadata(BaseMod num_lines: Optional[int] = None + file_extension: Optional[str] = None + if TYPE_CHECKING: # Some versions of Pydantic <2.8.0 have a bug and don’t allow assigning a # value to this field, so for compatibility we avoid doing it at runtime. @@ -669,6 +763,8 @@ class ChunkAudioURLInputChunkGeneratedMetadataAudioChunkGeneratedMetadata(BaseMo audio_format: int + file_extension: Optional[str] = None + if TYPE_CHECKING: # Some versions of Pydantic <2.8.0 have a bug and don’t allow assigning a # value to this field, so for compatibility we avoid doing it at runtime. @@ -687,7 +783,7 @@ class ChunkAudioURLInputChunkGeneratedMetadataVideoChunkGeneratedMetadata(BaseMo file_type: str - file_size: int + file_size: Optional[int] = None total_duration_seconds: float @@ -699,6 +795,36 @@ class ChunkAudioURLInputChunkGeneratedMetadataVideoChunkGeneratedMetadata(BaseMo frame_count: int + has_audio_stream: Optional[bool] = None + + file_extension: Optional[str] = None + + if TYPE_CHECKING: + # Some versions of Pydantic <2.8.0 have a bug and don’t allow assigning a + # value to this field, so for compatibility we avoid doing it at runtime. + __pydantic_extra__: Dict[str, object] = FieldInfo(init=False) # pyright: ignore[reportIncompatibleVariableOverride] + + # Stub to indicate that arbitrary properties are accepted. + # To access properties that are not valid identifiers you can use `getattr`, e.g. + # `getattr(obj, '$type')` + def __getattr__(self, attr: str) -> object: ... + else: + __pydantic_extra__: Dict[str, object] + + +class ChunkAudioURLInputChunkGeneratedMetadataImageChunkGeneratedMetadata(BaseModel): + type: Optional[Literal["image"]] = None + + file_type: str + + file_size: int + + width: int + + height: int + + file_extension: Optional[str] = None + if TYPE_CHECKING: # Some versions of Pydantic <2.8.0 have a bug and don’t allow assigning a # value to this field, so for compatibility we avoid doing it at runtime. @@ -720,6 +846,7 @@ def __getattr__(self, attr: str) -> object: ... ChunkAudioURLInputChunkGeneratedMetadataCodeChunkGeneratedMetadata, ChunkAudioURLInputChunkGeneratedMetadataAudioChunkGeneratedMetadata, ChunkAudioURLInputChunkGeneratedMetadataVideoChunkGeneratedMetadata, + ChunkAudioURLInputChunkGeneratedMetadataImageChunkGeneratedMetadata, None, ], PropertyInfo(discriminator="type"), @@ -797,6 +924,8 @@ class ChunkVideoURLInputChunkGeneratedMetadataMarkdownChunkGeneratedMetadata(Bas num_lines: Optional[int] = None + file_extension: Optional[str] = None + frontmatter: Optional[Dict[str, object]] = None if TYPE_CHECKING: @@ -827,6 +956,8 @@ class ChunkVideoURLInputChunkGeneratedMetadataTextChunkGeneratedMetadata(BaseMod num_lines: Optional[int] = None + file_extension: Optional[str] = None + if TYPE_CHECKING: # Some versions of Pydantic <2.8.0 have a bug and don’t allow assigning a # value to this field, so for compatibility we avoid doing it at runtime. @@ -849,6 +980,8 @@ class ChunkVideoURLInputChunkGeneratedMetadataPdfChunkGeneratedMetadata(BaseMode total_size: int + file_extension: Optional[str] = None + if TYPE_CHECKING: # Some versions of Pydantic <2.8.0 have a bug and don’t allow assigning a # value to this field, so for compatibility we avoid doing it at runtime. @@ -877,6 +1010,8 @@ class ChunkVideoURLInputChunkGeneratedMetadataCodeChunkGeneratedMetadata(BaseMod num_lines: Optional[int] = None + file_extension: Optional[str] = None + if TYPE_CHECKING: # Some versions of Pydantic <2.8.0 have a bug and don’t allow assigning a # value to this field, so for compatibility we avoid doing it at runtime. @@ -905,6 +1040,8 @@ class ChunkVideoURLInputChunkGeneratedMetadataAudioChunkGeneratedMetadata(BaseMo audio_format: int + file_extension: Optional[str] = None + if TYPE_CHECKING: # Some versions of Pydantic <2.8.0 have a bug and don’t allow assigning a # value to this field, so for compatibility we avoid doing it at runtime. @@ -923,7 +1060,7 @@ class ChunkVideoURLInputChunkGeneratedMetadataVideoChunkGeneratedMetadata(BaseMo file_type: str - file_size: int + file_size: Optional[int] = None total_duration_seconds: float @@ -935,6 +1072,36 @@ class ChunkVideoURLInputChunkGeneratedMetadataVideoChunkGeneratedMetadata(BaseMo frame_count: int + has_audio_stream: Optional[bool] = None + + file_extension: Optional[str] = None + + if TYPE_CHECKING: + # Some versions of Pydantic <2.8.0 have a bug and don’t allow assigning a + # value to this field, so for compatibility we avoid doing it at runtime. + __pydantic_extra__: Dict[str, object] = FieldInfo(init=False) # pyright: ignore[reportIncompatibleVariableOverride] + + # Stub to indicate that arbitrary properties are accepted. + # To access properties that are not valid identifiers you can use `getattr`, e.g. + # `getattr(obj, '$type')` + def __getattr__(self, attr: str) -> object: ... + else: + __pydantic_extra__: Dict[str, object] + + +class ChunkVideoURLInputChunkGeneratedMetadataImageChunkGeneratedMetadata(BaseModel): + type: Optional[Literal["image"]] = None + + file_type: str + + file_size: int + + width: int + + height: int + + file_extension: Optional[str] = None + if TYPE_CHECKING: # Some versions of Pydantic <2.8.0 have a bug and don’t allow assigning a # value to this field, so for compatibility we avoid doing it at runtime. @@ -956,6 +1123,7 @@ def __getattr__(self, attr: str) -> object: ... ChunkVideoURLInputChunkGeneratedMetadataCodeChunkGeneratedMetadata, ChunkVideoURLInputChunkGeneratedMetadataAudioChunkGeneratedMetadata, ChunkVideoURLInputChunkGeneratedMetadataVideoChunkGeneratedMetadata, + ChunkVideoURLInputChunkGeneratedMetadataImageChunkGeneratedMetadata, None, ], PropertyInfo(discriminator="type"), diff --git a/tests/api_resources/files/__init__.py b/tests/api_resources/files/__init__.py new file mode 100644 index 00000000..fd8019a9 --- /dev/null +++ b/tests/api_resources/files/__init__.py @@ -0,0 +1 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. diff --git a/tests/api_resources/files/test_uploads.py b/tests/api_resources/files/test_uploads.py new file mode 100644 index 00000000..2dd222a5 --- /dev/null +++ b/tests/api_resources/files/test_uploads.py @@ -0,0 +1,450 @@ +# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. + +from __future__ import annotations + +import os +from typing import Any, cast + +import pytest + +from mixedbread import Mixedbread, AsyncMixedbread +from tests.utils import assert_matches_type +from mixedbread.types import FileObject +from mixedbread.types.files import ( + UploadListResponse, + UploadAbortResponse, + UploadCreateResponse, + UploadRetrieveResponse, +) + +base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010") + + +class TestUploads: + parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"]) + + @parametrize + def test_method_create(self, client: Mixedbread) -> None: + upload = client.files.uploads.create( + filename="document.pdf", + file_size=10485760, + mime_type="application/pdf", + ) + assert_matches_type(UploadCreateResponse, upload, path=["response"]) + + @parametrize + def test_method_create_with_all_params(self, client: Mixedbread) -> None: + upload = client.files.uploads.create( + filename="document.pdf", + file_size=10485760, + mime_type="application/pdf", + part_count=3, + ) + assert_matches_type(UploadCreateResponse, upload, path=["response"]) + + @parametrize + def test_raw_response_create(self, client: Mixedbread) -> None: + response = client.files.uploads.with_raw_response.create( + filename="document.pdf", + file_size=10485760, + mime_type="application/pdf", + ) + + assert response.is_closed is True + assert response.http_request.headers.get("X-Stainless-Lang") == "python" + upload = response.parse() + assert_matches_type(UploadCreateResponse, upload, path=["response"]) + + @parametrize + def test_streaming_response_create(self, client: Mixedbread) -> None: + with client.files.uploads.with_streaming_response.create( + filename="document.pdf", + file_size=10485760, + mime_type="application/pdf", + ) as response: + assert not response.is_closed + assert response.http_request.headers.get("X-Stainless-Lang") == "python" + + upload = response.parse() + assert_matches_type(UploadCreateResponse, upload, path=["response"]) + + assert cast(Any, response.is_closed) is True + + @parametrize + def test_method_retrieve(self, client: Mixedbread) -> None: + upload = client.files.uploads.retrieve( + "182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", + ) + assert_matches_type(UploadRetrieveResponse, upload, path=["response"]) + + @parametrize + def test_raw_response_retrieve(self, client: Mixedbread) -> None: + response = client.files.uploads.with_raw_response.retrieve( + "182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", + ) + + assert response.is_closed is True + assert response.http_request.headers.get("X-Stainless-Lang") == "python" + upload = response.parse() + assert_matches_type(UploadRetrieveResponse, upload, path=["response"]) + + @parametrize + def test_streaming_response_retrieve(self, client: Mixedbread) -> None: + with client.files.uploads.with_streaming_response.retrieve( + "182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", + ) as response: + assert not response.is_closed + assert response.http_request.headers.get("X-Stainless-Lang") == "python" + + upload = response.parse() + assert_matches_type(UploadRetrieveResponse, upload, path=["response"]) + + assert cast(Any, response.is_closed) is True + + @parametrize + def test_path_params_retrieve(self, client: Mixedbread) -> None: + with pytest.raises(ValueError, match=r"Expected a non-empty value for `upload_id` but received ''"): + client.files.uploads.with_raw_response.retrieve( + "", + ) + + @parametrize + def test_method_list(self, client: Mixedbread) -> None: + upload = client.files.uploads.list() + assert_matches_type(UploadListResponse, upload, path=["response"]) + + @parametrize + def test_raw_response_list(self, client: Mixedbread) -> None: + response = client.files.uploads.with_raw_response.list() + + assert response.is_closed is True + assert response.http_request.headers.get("X-Stainless-Lang") == "python" + upload = response.parse() + assert_matches_type(UploadListResponse, upload, path=["response"]) + + @parametrize + def test_streaming_response_list(self, client: Mixedbread) -> None: + with client.files.uploads.with_streaming_response.list() as response: + assert not response.is_closed + assert response.http_request.headers.get("X-Stainless-Lang") == "python" + + upload = response.parse() + assert_matches_type(UploadListResponse, upload, path=["response"]) + + assert cast(Any, response.is_closed) is True + + @parametrize + def test_method_abort(self, client: Mixedbread) -> None: + upload = client.files.uploads.abort( + "182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", + ) + assert_matches_type(UploadAbortResponse, upload, path=["response"]) + + @parametrize + def test_raw_response_abort(self, client: Mixedbread) -> None: + response = client.files.uploads.with_raw_response.abort( + "182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", + ) + + assert response.is_closed is True + assert response.http_request.headers.get("X-Stainless-Lang") == "python" + upload = response.parse() + assert_matches_type(UploadAbortResponse, upload, path=["response"]) + + @parametrize + def test_streaming_response_abort(self, client: Mixedbread) -> None: + with client.files.uploads.with_streaming_response.abort( + "182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", + ) as response: + assert not response.is_closed + assert response.http_request.headers.get("X-Stainless-Lang") == "python" + + upload = response.parse() + assert_matches_type(UploadAbortResponse, upload, path=["response"]) + + assert cast(Any, response.is_closed) is True + + @parametrize + def test_path_params_abort(self, client: Mixedbread) -> None: + with pytest.raises(ValueError, match=r"Expected a non-empty value for `upload_id` but received ''"): + client.files.uploads.with_raw_response.abort( + "", + ) + + @parametrize + def test_method_complete(self, client: Mixedbread) -> None: + upload = client.files.uploads.complete( + upload_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", + parts=[ + { + "part_number": 1, + "etag": "etag", + } + ], + ) + assert_matches_type(FileObject, upload, path=["response"]) + + @parametrize + def test_raw_response_complete(self, client: Mixedbread) -> None: + response = client.files.uploads.with_raw_response.complete( + upload_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", + parts=[ + { + "part_number": 1, + "etag": "etag", + } + ], + ) + + assert response.is_closed is True + assert response.http_request.headers.get("X-Stainless-Lang") == "python" + upload = response.parse() + assert_matches_type(FileObject, upload, path=["response"]) + + @parametrize + def test_streaming_response_complete(self, client: Mixedbread) -> None: + with client.files.uploads.with_streaming_response.complete( + upload_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", + parts=[ + { + "part_number": 1, + "etag": "etag", + } + ], + ) as response: + assert not response.is_closed + assert response.http_request.headers.get("X-Stainless-Lang") == "python" + + upload = response.parse() + assert_matches_type(FileObject, upload, path=["response"]) + + assert cast(Any, response.is_closed) is True + + @parametrize + def test_path_params_complete(self, client: Mixedbread) -> None: + with pytest.raises(ValueError, match=r"Expected a non-empty value for `upload_id` but received ''"): + client.files.uploads.with_raw_response.complete( + upload_id="", + parts=[ + { + "part_number": 1, + "etag": "etag", + } + ], + ) + + +class TestAsyncUploads: + parametrize = pytest.mark.parametrize( + "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"] + ) + + @parametrize + async def test_method_create(self, async_client: AsyncMixedbread) -> None: + upload = await async_client.files.uploads.create( + filename="document.pdf", + file_size=10485760, + mime_type="application/pdf", + ) + assert_matches_type(UploadCreateResponse, upload, path=["response"]) + + @parametrize + async def test_method_create_with_all_params(self, async_client: AsyncMixedbread) -> None: + upload = await async_client.files.uploads.create( + filename="document.pdf", + file_size=10485760, + mime_type="application/pdf", + part_count=3, + ) + assert_matches_type(UploadCreateResponse, upload, path=["response"]) + + @parametrize + async def test_raw_response_create(self, async_client: AsyncMixedbread) -> None: + response = await async_client.files.uploads.with_raw_response.create( + filename="document.pdf", + file_size=10485760, + mime_type="application/pdf", + ) + + assert response.is_closed is True + assert response.http_request.headers.get("X-Stainless-Lang") == "python" + upload = await response.parse() + assert_matches_type(UploadCreateResponse, upload, path=["response"]) + + @parametrize + async def test_streaming_response_create(self, async_client: AsyncMixedbread) -> None: + async with async_client.files.uploads.with_streaming_response.create( + filename="document.pdf", + file_size=10485760, + mime_type="application/pdf", + ) as response: + assert not response.is_closed + assert response.http_request.headers.get("X-Stainless-Lang") == "python" + + upload = await response.parse() + assert_matches_type(UploadCreateResponse, upload, path=["response"]) + + assert cast(Any, response.is_closed) is True + + @parametrize + async def test_method_retrieve(self, async_client: AsyncMixedbread) -> None: + upload = await async_client.files.uploads.retrieve( + "182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", + ) + assert_matches_type(UploadRetrieveResponse, upload, path=["response"]) + + @parametrize + async def test_raw_response_retrieve(self, async_client: AsyncMixedbread) -> None: + response = await async_client.files.uploads.with_raw_response.retrieve( + "182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", + ) + + assert response.is_closed is True + assert response.http_request.headers.get("X-Stainless-Lang") == "python" + upload = await response.parse() + assert_matches_type(UploadRetrieveResponse, upload, path=["response"]) + + @parametrize + async def test_streaming_response_retrieve(self, async_client: AsyncMixedbread) -> None: + async with async_client.files.uploads.with_streaming_response.retrieve( + "182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", + ) as response: + assert not response.is_closed + assert response.http_request.headers.get("X-Stainless-Lang") == "python" + + upload = await response.parse() + assert_matches_type(UploadRetrieveResponse, upload, path=["response"]) + + assert cast(Any, response.is_closed) is True + + @parametrize + async def test_path_params_retrieve(self, async_client: AsyncMixedbread) -> None: + with pytest.raises(ValueError, match=r"Expected a non-empty value for `upload_id` but received ''"): + await async_client.files.uploads.with_raw_response.retrieve( + "", + ) + + @parametrize + async def test_method_list(self, async_client: AsyncMixedbread) -> None: + upload = await async_client.files.uploads.list() + assert_matches_type(UploadListResponse, upload, path=["response"]) + + @parametrize + async def test_raw_response_list(self, async_client: AsyncMixedbread) -> None: + response = await async_client.files.uploads.with_raw_response.list() + + assert response.is_closed is True + assert response.http_request.headers.get("X-Stainless-Lang") == "python" + upload = await response.parse() + assert_matches_type(UploadListResponse, upload, path=["response"]) + + @parametrize + async def test_streaming_response_list(self, async_client: AsyncMixedbread) -> None: + async with async_client.files.uploads.with_streaming_response.list() as response: + assert not response.is_closed + assert response.http_request.headers.get("X-Stainless-Lang") == "python" + + upload = await response.parse() + assert_matches_type(UploadListResponse, upload, path=["response"]) + + assert cast(Any, response.is_closed) is True + + @parametrize + async def test_method_abort(self, async_client: AsyncMixedbread) -> None: + upload = await async_client.files.uploads.abort( + "182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", + ) + assert_matches_type(UploadAbortResponse, upload, path=["response"]) + + @parametrize + async def test_raw_response_abort(self, async_client: AsyncMixedbread) -> None: + response = await async_client.files.uploads.with_raw_response.abort( + "182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", + ) + + assert response.is_closed is True + assert response.http_request.headers.get("X-Stainless-Lang") == "python" + upload = await response.parse() + assert_matches_type(UploadAbortResponse, upload, path=["response"]) + + @parametrize + async def test_streaming_response_abort(self, async_client: AsyncMixedbread) -> None: + async with async_client.files.uploads.with_streaming_response.abort( + "182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", + ) as response: + assert not response.is_closed + assert response.http_request.headers.get("X-Stainless-Lang") == "python" + + upload = await response.parse() + assert_matches_type(UploadAbortResponse, upload, path=["response"]) + + assert cast(Any, response.is_closed) is True + + @parametrize + async def test_path_params_abort(self, async_client: AsyncMixedbread) -> None: + with pytest.raises(ValueError, match=r"Expected a non-empty value for `upload_id` but received ''"): + await async_client.files.uploads.with_raw_response.abort( + "", + ) + + @parametrize + async def test_method_complete(self, async_client: AsyncMixedbread) -> None: + upload = await async_client.files.uploads.complete( + upload_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", + parts=[ + { + "part_number": 1, + "etag": "etag", + } + ], + ) + assert_matches_type(FileObject, upload, path=["response"]) + + @parametrize + async def test_raw_response_complete(self, async_client: AsyncMixedbread) -> None: + response = await async_client.files.uploads.with_raw_response.complete( + upload_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", + parts=[ + { + "part_number": 1, + "etag": "etag", + } + ], + ) + + assert response.is_closed is True + assert response.http_request.headers.get("X-Stainless-Lang") == "python" + upload = await response.parse() + assert_matches_type(FileObject, upload, path=["response"]) + + @parametrize + async def test_streaming_response_complete(self, async_client: AsyncMixedbread) -> None: + async with async_client.files.uploads.with_streaming_response.complete( + upload_id="182bd5e5-6e1a-4fe4-a799-aa6d9a6ab26e", + parts=[ + { + "part_number": 1, + "etag": "etag", + } + ], + ) as response: + assert not response.is_closed + assert response.http_request.headers.get("X-Stainless-Lang") == "python" + + upload = await response.parse() + assert_matches_type(FileObject, upload, path=["response"]) + + assert cast(Any, response.is_closed) is True + + @parametrize + async def test_path_params_complete(self, async_client: AsyncMixedbread) -> None: + with pytest.raises(ValueError, match=r"Expected a non-empty value for `upload_id` but received ''"): + await async_client.files.uploads.with_raw_response.complete( + upload_id="", + parts=[ + { + "part_number": 1, + "etag": "etag", + } + ], + ) diff --git a/tests/test_client.py b/tests/test_client.py index 6696b539..8cc00170 100644 --- a/tests/test_client.py +++ b/tests/test_client.py @@ -967,6 +967,14 @@ def retry_handler(_request: httpx.Request) -> httpx.Response: def test_proxy_environment_variables(self, monkeypatch: pytest.MonkeyPatch) -> None: # Test that the proxy environment variables are set correctly monkeypatch.setenv("HTTPS_PROXY", "https://example.org") + # Delete in case our environment has any proxy env vars set + monkeypatch.delenv("HTTP_PROXY", raising=False) + monkeypatch.delenv("ALL_PROXY", raising=False) + monkeypatch.delenv("NO_PROXY", raising=False) + monkeypatch.delenv("http_proxy", raising=False) + monkeypatch.delenv("https_proxy", raising=False) + monkeypatch.delenv("all_proxy", raising=False) + monkeypatch.delenv("no_proxy", raising=False) client = DefaultHttpxClient() @@ -1887,6 +1895,14 @@ async def test_get_platform(self) -> None: async def test_proxy_environment_variables(self, monkeypatch: pytest.MonkeyPatch) -> None: # Test that the proxy environment variables are set correctly monkeypatch.setenv("HTTPS_PROXY", "https://example.org") + # Delete in case our environment has any proxy env vars set + monkeypatch.delenv("HTTP_PROXY", raising=False) + monkeypatch.delenv("ALL_PROXY", raising=False) + monkeypatch.delenv("NO_PROXY", raising=False) + monkeypatch.delenv("http_proxy", raising=False) + monkeypatch.delenv("https_proxy", raising=False) + monkeypatch.delenv("all_proxy", raising=False) + monkeypatch.delenv("no_proxy", raising=False) client = DefaultAsyncHttpxClient()