Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
114 changes: 111 additions & 3 deletions openapi.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7492,6 +7492,64 @@ paths:
schema:
description: Operation ID
type: string
/rl/training-sessions/{session_id}/operations/training-checkpoint:
post:
summary: Save training checkpoint
description: Submits an operation that will asynchronously save the full training state (adapter + optimizer + step).
operationId: createTrainingCheckpoint
tags: [RL]
responses:
"200":
description: Save training checkpoint operation details
content:
application/json:
schema:
$ref: '#/components/schemas/RL.TrainingCheckpointOperation'
default:
description: An unexpected error response.
content:
application/json:
schema:
$ref: '#/components/schemas/ErrorData'
parameters:
- name: session_id
in: path
required: true
schema:
description: Training session ID
type: string
/rl/training-sessions/{session_id}/operations/training-checkpoint/{operation_id}:
get:
summary: Get save training checkpoint operation
description: Retrieves the current status and result of a save training checkpoint operation.
operationId: getTrainingCheckpointOperation
tags: [RL]
responses:
"200":
description: Save training checkpoint operation details
content:
application/json:
schema:
$ref: '#/components/schemas/RL.TrainingCheckpointOperation'
default:
description: An unexpected error response.
content:
application/json:
schema:
$ref: '#/components/schemas/ErrorData'
parameters:
- name: session_id
in: path
required: true
schema:
description: Training session ID
type: string
- name: operation_id
in: path
required: true
schema:
description: Operation ID
type: string
/rl/checkpoints/{id}/download:
get:
summary: Download checkpoint
Expand Down Expand Up @@ -7976,10 +8034,10 @@ components:
description: Base model to use for the training session
type: string
example: meta-llama/Meta-Llama-3-8B-Instruct
checkpoint_id:
description: Checkpoint ID to use for the training session
resume_from_checkpoint_id:
description: Checkpoint ID to resume from
type: string
example: checkpoint-123
example: 123e4567-e89b-12d3-a456-426614174000
lora_config:
$ref: '#/components/schemas/RL.LoraConfig'
RL.TrainingSessionStatus:
Expand Down Expand Up @@ -8015,6 +8073,16 @@ components:
type: object
$ref: '#/components/schemas/RL.InferenceCheckpoint'
description: List of saved inference checkpoints for this session
training_checkpoints:
type: array
items:
type: object
$ref: '#/components/schemas/RL.TrainingCheckpoint'
description: List of saved training checkpoints for this session
resume_from_checkpoint_id:
type: string
example: 123e4567-e89b-12d3-a456-426614174000
description: Checkpoint ID this session was resumed from
step:
description: Current training step
type: string
Expand Down Expand Up @@ -8151,6 +8219,46 @@ components:
format: date-time
example: "2026-01-02T00:00:00Z"
description: Timestamp when the model was registered
RL.TrainingCheckpoint:
type: object
description: Saved training checkpoint
properties:
id:
type: string
example: 123e4567-e89b-12d3-a456-426614174000
description: Unique identifier for the checkpoint
step:
type: string
format: uint64
example: 42
description: Training step at time of save
created_at:
type: string
format: date-time
example: "2026-01-02T00:00:00Z"
description: Timestamp when the checkpoint was created
RL.TrainingCheckpointResult:
type: object
properties:
checkpoint_id:
type: string
example: 550e8400-e29b-41d4-a716-446655440000
description: ID of the saved training checkpoint (use for resume via Start)
RL.TrainingCheckpointOperation:
type: object
properties:
id:
type: string
example: 550e8400-e29b-41d4-a716-446655440000
description: Operation ID
status:
$ref: '#/components/schemas/RL.TrainingOperationStatus'
example: TRAINING_OPERATION_STATUS_PENDING
description: Operation status
output:
$ref: '#/components/schemas/RL.TrainingCheckpointResult'
error:
$ref: '#/components/schemas/RL.TrainingOperationError'
RL.CheckpointVariant:
type: string
enum:
Expand Down
Loading