Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 28 additions & 1 deletion .ci/scripts/export_model_artifact.sh
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ Arguments:
- nvidia/diar_streaming_sortformer_4spk-v2
- nvidia/parakeet-tdt
- facebook/dinov2-small-imagenet1k-1-layer
- facebook/dinov3-vits16-pretrain-lvd1689m

quant_name Quantization type (optional, default: non-quantized)
Options:
Expand Down Expand Up @@ -176,6 +177,14 @@ case "$HF_MODEL" in
PREPROCESSOR_FEATURE_SIZE=""
PREPROCESSOR_OUTPUT=""
;;
facebook/dinov3-vits16-pretrain-lvd1689m)
MODEL_NAME="dinov3"
TASK=""
MAX_SEQ_LEN=""
EXTRA_PIP=""
PREPROCESSOR_FEATURE_SIZE=""
PREPROCESSOR_OUTPUT=""
;;
mistralai/Voxtral-Mini-4B-Realtime-2602)
MODEL_NAME="voxtral_realtime"
TASK=""
Expand All @@ -186,7 +195,7 @@ case "$HF_MODEL" in
;;
*)
echo "Error: Unsupported model '$HF_MODEL'"
echo "Supported models: mistralai/Voxtral-Mini-3B-2507, mistralai/Voxtral-Mini-4B-Realtime-2602, openai/whisper-{small, medium, large, large-v2, large-v3, large-v3-turbo}, google/gemma-3-4b-it, Qwen/Qwen3-0.6B, nvidia/diar_streaming_sortformer_4spk-v2, nvidia/parakeet-tdt, facebook/dinov2-small-imagenet1k-1-layer"
echo "Supported models: mistralai/Voxtral-Mini-3B-2507, mistralai/Voxtral-Mini-4B-Realtime-2602, openai/whisper-{small, medium, large, large-v2, large-v3, large-v3-turbo}, google/gemma-3-4b-it, Qwen/Qwen3-0.6B, nvidia/diar_streaming_sortformer_4spk-v2, nvidia/parakeet-tdt, facebook/dinov2-small-imagenet1k-1-layer, facebook/dinov3-vits16-pretrain-lvd1689m"
exit 1
;;
esac
Expand Down Expand Up @@ -319,6 +328,24 @@ if [ "$MODEL_NAME" = "dinov2" ]; then
exit 0
fi

# DINOv3 uses a custom export script (random weights since classifier head is untrained)
if [ "$MODEL_NAME" = "dinov3" ]; then
pip install -r examples/models/dinov3/install_requirements.txt

python -m executorch.examples.models.dinov3.export_dinov3 \
--backend "$DEVICE" \
--output-dir "${OUTPUT_DIR}" \
--random-weights

test -f "${OUTPUT_DIR}/model.pte"
if [ "$DEVICE" = "cuda" ] || [ "$DEVICE" = "cuda-windows" ]; then
test -f "${OUTPUT_DIR}/aoti_cuda_blob.ptd"
fi
ls -al "${OUTPUT_DIR}"
echo "::endgroup::"
exit 0
fi

# Voxtral Realtime uses a custom export script
if [ "$MODEL_NAME" = "voxtral_realtime" ]; then
pip install safetensors huggingface_hub
Expand Down
24 changes: 22 additions & 2 deletions .ci/scripts/test_model_e2e.sh
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ Arguments:
- Qwen/Qwen3-0.6B
- nvidia/parakeet-tdt
- facebook/dinov2-small-imagenet1k-1-layer
- facebook/dinov3-vits16-pretrain-lvd1689m
- mistralai/Voxtral-Mini-4B-Realtime-2602

quant_name Quantization type (required)
Expand Down Expand Up @@ -204,6 +205,19 @@ case "$HF_MODEL" in
IMAGE_URL="https://github.com/pytorch/hub/raw/master/images/dog.jpg"
IMAGE_PATH=""
;;
facebook/dinov3-vits16-pretrain-lvd1689m)
MODEL_NAME="dinov3"
RUNNER_TARGET="dinov3_runner"
RUNNER_PATH="dinov3"
EXPECTED_OUTPUT="predictions"
PREPROCESSOR=""
TOKENIZER_URL=""
TOKENIZER_FILE=""
AUDIO_URL=""
AUDIO_FILE=""
IMAGE_URL="https://github.com/pytorch/hub/raw/master/images/dog.jpg"
IMAGE_PATH=""
;;
mistralai/Voxtral-Mini-4B-Realtime-2602)
MODEL_NAME="voxtral_realtime"
RUNNER_TARGET="voxtral_realtime_runner"
Expand All @@ -218,7 +232,7 @@ case "$HF_MODEL" in
;;
*)
echo "Error: Unsupported model '$HF_MODEL'"
echo "Supported models: mistralai/Voxtral-Mini-3B-2507, mistralai/Voxtral-Mini-4B-Realtime-2602, nvidia/diar_streaming_sortformer_4spk-v2, openai/whisper series (whisper-{small, medium, large, large-v2, large-v3, large-v3-turbo}), google/gemma-3-4b-it, Qwen/Qwen3-0.6B, nvidia/parakeet-tdt, facebook/dinov2-small-imagenet1k-1-layer"
echo "Supported models: mistralai/Voxtral-Mini-3B-2507, mistralai/Voxtral-Mini-4B-Realtime-2602, nvidia/diar_streaming_sortformer_4spk-v2, openai/whisper series (whisper-{small, medium, large, large-v2, large-v3, large-v3-turbo}), google/gemma-3-4b-it, Qwen/Qwen3-0.6B, nvidia/parakeet-tdt, facebook/dinov2-small-imagenet1k-1-layer, facebook/dinov3-vits16-pretrain-lvd1689m"
exit 1
;;
esac
Expand All @@ -232,7 +246,7 @@ echo "::group::Prepare $MODEL_NAME Artifacts"


# Download tokenizer files (skip for models that bundle tokenizer in export or do not use one)
if [ "$MODEL_NAME" != "parakeet" ] && [ "$MODEL_NAME" != "voxtral_realtime" ] && [ "$MODEL_NAME" != "sortformer" ] && [ "$MODEL_NAME" != "dinov2" ]; then
if [ "$MODEL_NAME" != "parakeet" ] && [ "$MODEL_NAME" != "voxtral_realtime" ] && [ "$MODEL_NAME" != "sortformer" ] && [ "$MODEL_NAME" != "dinov2" ] && [ "$MODEL_NAME" != "dinov3" ]; then
if [ "$TOKENIZER_FILE" != "" ]; then
curl -L $TOKENIZER_URL/$TOKENIZER_FILE -o $MODEL_DIR/$TOKENIZER_FILE
else
Expand Down Expand Up @@ -341,6 +355,12 @@ EOF
RUNNER_ARGS="$RUNNER_ARGS --data_path ${MODEL_DIR}/aoti_cuda_blob.ptd"
fi
;;
dinov3)
RUNNER_ARGS="--model_path ${MODEL_DIR}/model.pte --image_path ${MODEL_DIR}/test_image.jpg"
if [ "$DEVICE" = "cuda" ]; then
RUNNER_ARGS="$RUNNER_ARGS --data_path ${MODEL_DIR}/aoti_cuda_blob.ptd"
fi
;;
voxtral_realtime)
RUNNER_ARGS="--model_path ${MODEL_DIR}/model.pte --tokenizer_path ${MODEL_DIR}/$TOKENIZER_FILE --preprocessor_path ${MODEL_DIR}/$PREPROCESSOR --audio_path ${MODEL_DIR}/$AUDIO_FILE --temperature 0"
# Add CUDA data path if present
Expand Down
22 changes: 21 additions & 1 deletion .ci/scripts/test_model_e2e_windows.ps1
Original file line number Diff line number Diff line change
Expand Up @@ -109,8 +109,21 @@ switch ($HfModel) {
$imageUrl = "https://github.com/pytorch/hub/raw/master/images/dog.jpg"
$imageFile = "test_image.jpg"
}
"facebook/dinov3-vits16-pretrain-lvd1689m" {
$runnerTarget = "dinov3_runner"
$runnerPath = "dinov3"
$runnerPreset = "dinov3-cuda"
$expectedOutput = "predictions"
$preprocessor = ""
$tokenizerUrl = ""
$tokenizerFile = ""
$audioUrl = ""
$audioFile = ""
$imageUrl = "https://github.com/pytorch/hub/raw/master/images/dog.jpg"
$imageFile = "test_image.jpg"
}
default {
throw "Unsupported model '$HfModel'. Supported: mistralai/Voxtral-Mini-3B-2507, mistralai/Voxtral-Mini-4B-Realtime-2602, nvidia/diar_streaming_sortformer_4spk-v2, nvidia/parakeet-tdt, facebook/dinov2-small-imagenet1k-1-layer"
throw "Unsupported model '$HfModel'. Supported: mistralai/Voxtral-Mini-3B-2507, mistralai/Voxtral-Mini-4B-Realtime-2602, nvidia/diar_streaming_sortformer_4spk-v2, nvidia/parakeet-tdt, facebook/dinov2-small-imagenet1k-1-layer, facebook/dinov3-vits16-pretrain-lvd1689m"
}
}

Expand Down Expand Up @@ -244,6 +257,13 @@ try {
"--image_path", (Join-Path -Path $resolvedModelDir -ChildPath $imageFile)
)
}
"facebook/dinov3-vits16-pretrain-lvd1689m" {
$runnerArgs = @(
"--model_path", $modelPte,
"--data_path", $cudaBlob,
"--image_path", (Join-Path -Path $resolvedModelDir -ChildPath $imageFile)
)
}
}

$stdoutFile = Join-Path -Path $env:TEMP -ChildPath ("et_runner_stdout_{0}.log" -f ([Guid]::NewGuid().ToString("N")))
Expand Down
10 changes: 8 additions & 2 deletions .github/workflows/cuda-windows.yml
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,9 @@ jobs:
- model_repo: "facebook"
model_name: "dinov2-small-imagenet1k-1-layer"
quant: "non-quantized"
- model_repo: "facebook"
model_name: "dinov3-vits16-pretrain-lvd1689m"
quant: "non-quantized"
with:
timeout: 90
secrets-env: EXECUTORCH_HF_TOKEN
Expand Down Expand Up @@ -86,8 +89,8 @@ jobs:
PYTHON_EXECUTABLE=python ./install_executorch.sh
echo "::endgroup::"

# Setup Huggingface only for models that need it (not dinov2)
if [ "${{ matrix.model_name }}" != "dinov2-small-imagenet1k-1-layer" ]; then
# Setup Huggingface only for models that need it (not dinov2 or dinov3)
if [ "${{ matrix.model_name }}" != "dinov2-small-imagenet1k-1-layer" ] && [ "${{ matrix.model_name }}" != "dinov3-vits16-pretrain-lvd1689m" ]; then
echo "::group::Setup Huggingface"
pip install -U "huggingface_hub[cli]<1.0" accelerate
huggingface-cli login --token $SECRET_EXECUTORCH_HF_TOKEN
Expand Down Expand Up @@ -131,6 +134,9 @@ jobs:
- model_repo: "facebook"
model_name: "dinov2-small-imagenet1k-1-layer"
quant: "non-quantized"
- model_repo: "facebook"
model_name: "dinov3-vits16-pretrain-lvd1689m"
quant: "non-quantized"
with:
timeout: 240
runner: windows.g5.4xlarge.nvidia.gpu
Expand Down
26 changes: 24 additions & 2 deletions .github/workflows/cuda.yml
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,8 @@ jobs:
name: "parakeet-tdt"
- repo: "facebook"
name: "dinov2-small-imagenet1k-1-layer"
- repo: "facebook"
name: "dinov3-vits16-pretrain-lvd1689m"
quant:
- "non-quantized"
- "quantized-int4-tile-packed"
Expand Down Expand Up @@ -190,6 +192,15 @@ jobs:
repo: "facebook"
name: "dinov2-small-imagenet1k-1-layer"
quant: "quantized-int4-weight-only"
# DINOv3 currently supports only non-quantized export
- model:
repo: "facebook"
name: "dinov3-vits16-pretrain-lvd1689m"
quant: "quantized-int4-tile-packed"
- model:
repo: "facebook"
name: "dinov3-vits16-pretrain-lvd1689m"
quant: "quantized-int4-weight-only"
with:
timeout: 90
secrets-env: EXECUTORCH_HF_TOKEN
Expand All @@ -209,8 +220,8 @@ jobs:
./install_executorch.sh
echo "::endgroup::"

# Setup Huggingface only for models that need it (not parakeet or dinov2)
if [ "${{ matrix.model.name }}" != "parakeet-tdt" ] && [ "${{ matrix.model.name }}" != "dinov2-small-imagenet1k-1-layer" ]; then
# Setup Huggingface only for models that need it (not parakeet, dinov2, or dinov3)
if [ "${{ matrix.model.name }}" != "parakeet-tdt" ] && [ "${{ matrix.model.name }}" != "dinov2-small-imagenet1k-1-layer" ] && [ "${{ matrix.model.name }}" != "dinov3-vits16-pretrain-lvd1689m" ]; then
echo "::group::Setup Huggingface"
pip install -U "huggingface_hub[cli]<1.0" accelerate
huggingface-cli login --token $SECRET_EXECUTORCH_HF_TOKEN
Expand Down Expand Up @@ -248,6 +259,8 @@ jobs:
name: "parakeet-tdt"
- repo: "facebook"
name: "dinov2-small-imagenet1k-1-layer"
- repo: "facebook"
name: "dinov3-vits16-pretrain-lvd1689m"
quant:
- "non-quantized"
- "quantized-int4-tile-packed"
Expand Down Expand Up @@ -285,6 +298,15 @@ jobs:
repo: "facebook"
name: "dinov2-small-imagenet1k-1-layer"
quant: "quantized-int4-weight-only"
# DINOv3 currently supports only non-quantized export
- model:
repo: "facebook"
name: "dinov3-vits16-pretrain-lvd1689m"
quant: "quantized-int4-tile-packed"
- model:
repo: "facebook"
name: "dinov3-vits16-pretrain-lvd1689m"
quant: "quantized-int4-weight-only"
with:
timeout: 90
runner: linux.g5.4xlarge.nvidia.gpu
Expand Down
22 changes: 21 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@
#
# ==============================================================================

.PHONY: voxtral-cuda voxtral-cpu voxtral-metal voxtral_realtime-cuda voxtral_realtime-cpu voxtral_realtime-metal whisper-cuda whisper-cuda-debug whisper-cpu whisper-metal parakeet-cuda parakeet-cuda-debug parakeet-cpu parakeet-metal dinov2-cuda dinov2-cuda-debug sortformer-cuda sortformer-cpu silero-vad-cpu llama-cuda llama-cuda-debug llama-cpu llava-cpu gemma3-cuda gemma3-cpu clean help
.PHONY: voxtral-cuda voxtral-cpu voxtral-metal voxtral_realtime-cuda voxtral_realtime-cpu voxtral_realtime-metal whisper-cuda whisper-cuda-debug whisper-cpu whisper-metal parakeet-cuda parakeet-cuda-debug parakeet-cpu parakeet-metal dinov2-cuda dinov2-cuda-debug dinov3-cuda dinov3-cuda-debug sortformer-cuda sortformer-cpu silero-vad-cpu llama-cuda llama-cuda-debug llama-cpu llava-cpu gemma3-cuda gemma3-cpu clean help

help:
@echo "This Makefile adds targets to build runners for various models on various backends. Run using \`make <target>\`. Available targets:"
Expand All @@ -111,6 +111,8 @@ help:
@echo " parakeet-metal - Build Parakeet runner with Metal backend (macOS only)"
@echo " dinov2-cuda - Build DINOv2 runner with CUDA backend"
@echo " dinov2-cuda-debug - Build DINOv2 runner with CUDA backend (debug mode)"
@echo " dinov3-cuda - Build DINOv3 runner with CUDA backend"
@echo " dinov3-cuda-debug - Build DINOv3 runner with CUDA backend (debug mode)"
@echo " sortformer-cuda - Build Sortformer runner with CUDA backend"
@echo " sortformer-cpu - Build Sortformer runner with CPU backend"
@echo " silero-vad-cpu - Build Silero VAD runner with CPU backend"
Expand Down Expand Up @@ -239,6 +241,24 @@ dinov2-cuda-debug:
@echo "✓ Build complete!"
@echo " Binary: cmake-out/examples/models/dinov2/dinov2_runner"

dinov3-cuda:
@echo "==> Building and installing ExecuTorch with CUDA..."
cmake --workflow --preset llm-release-cuda
@echo "==> Building DINOv3 runner with CUDA..."
cd examples/models/dinov3 && cmake --workflow --preset dinov3-cuda
@echo ""
@echo "✓ Build complete!"
@echo " Binary: cmake-out/examples/models/dinov3/dinov3_runner"

dinov3-cuda-debug:
@echo "==> Building and installing ExecuTorch with CUDA (debug mode)..."
cmake --workflow --preset llm-debug-cuda
@echo "==> Building DINOv3 runner with CUDA (debug mode)..."
cd examples/models/dinov3 && cmake --workflow --preset dinov3-cuda-debug
@echo ""
@echo "✓ Build complete!"
@echo " Binary: cmake-out/examples/models/dinov3/dinov3_runner"

sortformer-cuda:
@echo "==> Building and installing ExecuTorch with CUDA..."
cmake --workflow --preset llm-release-cuda
Expand Down
88 changes: 88 additions & 0 deletions examples/models/dinov3/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

cmake_minimum_required(VERSION 3.24)
project(dinov3_runner)

set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_STANDARD_REQUIRED ON)

set(EXECUTORCH_ROOT ${CMAKE_CURRENT_SOURCE_DIR}/../../..)

include(${EXECUTORCH_ROOT}/tools/cmake/Utils.cmake)

# Let files say "include <executorch/path/to/header.h>"
set(_common_include_directories ${EXECUTORCH_ROOT}/..)

# Need this for gflags
set(gflags_DIR ${CMAKE_CURRENT_BINARY_DIR}/../../../third-party/gflags)
find_package(gflags REQUIRED)

# Find executorch libraries
list(APPEND CMAKE_FIND_ROOT_PATH ${CMAKE_CURRENT_BINARY_DIR}/../../..)
find_package(executorch CONFIG REQUIRED FIND_ROOT_PATH_BOTH)
get_target_property(_executorch_imported executorch IMPORTED)
if(NOT _executorch_imported)
executorch_target_link_options_shared_lib(executorch)
endif()

set(link_libraries executorch gflags)

# Common ops
if(TARGET optimized_native_cpu_ops_lib)
list(APPEND link_libraries optimized_native_cpu_ops_lib cpublas eigen_blas)
get_target_property(_is_imported optimized_native_cpu_ops_lib IMPORTED)
if(NOT _is_imported)
executorch_target_link_options_shared_lib(optimized_native_cpu_ops_lib)
endif()
endif()

# Add the required ExecuTorch extensions
list(APPEND link_libraries extension_module extension_data_loader
extension_tensor extension_flat_tensor
)

# stb_image: lightweight library to load and resize images
include(FetchContent)
FetchContent_Declare(
stb
GIT_REPOSITORY https://github.com/nothings/stb.git
GIT_TAG f0569113c93ad095470c54bf34a17b36646bbbb5
)
FetchContent_MakeAvailable(stb)
list(APPEND _common_include_directories ${stb_SOURCE_DIR}
${stb_SOURCE_DIR}/deprecated
)

# Link CUDA backend
find_package(CUDAToolkit REQUIRED)
list(APPEND link_libraries aoti_cuda_backend)
if(NOT MSVC)
executorch_target_link_options_shared_lib(aoti_cuda_backend)
endif()

add_executable(dinov3_runner main.cpp)
if(NOT CMAKE_BUILD_TYPE STREQUAL "Debug")
target_link_options_gc_sections(dinov3_runner)
if(NOT MSVC)
target_link_options(dinov3_runner PRIVATE "LINKER:-s")
endif()
endif()

target_include_directories(dinov3_runner PUBLIC ${_common_include_directories})
target_link_libraries(dinov3_runner PUBLIC ${link_libraries})
target_compile_options(dinov3_runner PUBLIC ${_common_compile_options})

# On Windows, copy required DLLs to the executable directory
if(MSVC)
add_custom_command(
TARGET dinov3_runner
POST_BUILD
COMMAND ${CMAKE_COMMAND} -E copy_if_different $<TARGET_FILE:aoti_cuda_shims>
$<TARGET_FILE_DIR:dinov3_runner>
COMMENT "Copying aoti_cuda_shims.dll to dinov3_runner directory"
)
endif()
Loading
Loading