From 27c2461e3cd5b761a244e1a8feeef98c973d5ba7 Mon Sep 17 00:00:00 2001 From: thepetk Date: Sun, 2 Nov 2025 19:05:11 +0000 Subject: [PATCH 1/5] Use regex for Optional, Annotated, FieldInfo and class cleanup Signed-off-by: thepetk --- scripts/provider_codegen.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/scripts/provider_codegen.py b/scripts/provider_codegen.py index de79b4d179..3013d54e3b 100755 --- a/scripts/provider_codegen.py +++ b/scripts/provider_codegen.py @@ -5,6 +5,7 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. +import re import subprocess import sys from pathlib import Path @@ -80,12 +81,15 @@ def get_config_class_info(config_class_path: str) -> dict[str, Any]: continue field_type = str(field.annotation) if field.annotation else "Any" - # this string replace is ridiculous - field_type = field_type.replace("typing.", "").replace("Optional[", "").replace("]", "") - field_type = field_type.replace("Annotated[", "").replace("FieldInfo(", "").replace(")", "") + field_type = field_type.replace("typing.", "") field_type = field_type.replace("llama_stack.apis.inference.inference.", "") field_type = field_type.replace("llama_stack.providers.", "") + field_type = re.sub(r"Optional\[([^\]]+)\]", r"\1 | None", field_type) + field_type = re.sub(r"Annotated\[([^,]+),.*?\]", r"\1", field_type) + field_type = re.sub(r"FieldInfo\([^)]*\)", "", field_type) + field_type = re.sub(r"", r"\1", field_type) + default_value = field.default if field.default_factory is not None: try: From ab7a8210635ab6454b7e36cb9bfb96a96111556e Mon Sep 17 00:00:00 2001 From: thepetk Date: Sun, 2 Nov 2025 19:09:57 +0000 Subject: [PATCH 2/5] Support multi-line YAML formatting in index docs Signed-off-by: thepetk --- scripts/provider_codegen.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/scripts/provider_codegen.py b/scripts/provider_codegen.py index 3013d54e3b..e0cb35bcd9 100755 --- a/scripts/provider_codegen.py +++ b/scripts/provider_codegen.py @@ -348,8 +348,16 @@ def generate_index_docs(api_name: str, api_docstring: str | None, provider_entri # Add YAML frontmatter for index md_lines.append("---") if api_docstring: - clean_desc = api_docstring.strip().replace('"', '\\"') - md_lines.append(f'description: "{clean_desc}"') + # Handle multi-line descriptions in YAML + if "\n" in api_docstring.strip(): + md_lines.append("description: |") + for line in api_docstring.strip().split("\n"): + # Avoid trailing whitespace by only adding spaces to non-empty lines + md_lines.append(f" {line}" if line.strip() else "") + else: + # For single line descriptions, format properly for YAML + clean_desc = api_docstring.strip().replace('"', '\\"') + md_lines.append(f'description: "{clean_desc}"') md_lines.append(f"sidebar_label: {sidebar_label}") md_lines.append(f"title: {api_name.title()}") md_lines.append("---") From b5b63776e92dba188ca80b4f55187ab5bbf965fe Mon Sep 17 00:00:00 2001 From: thepetk Date: Sun, 2 Nov 2025 19:10:51 +0000 Subject: [PATCH 3/5] Generate updated docs --- docs/docs/providers/agents/index.mdx | 7 ++-- .../agents/inline_meta-reference.mdx | 2 +- docs/docs/providers/batches/index.mdx | 27 ++++++++------- .../providers/batches/inline_reference.mdx | 6 ++-- .../providers/datasetio/inline_localfs.mdx | 2 +- .../datasetio/remote_huggingface.mdx | 2 +- .../providers/datasetio/remote_nvidia.mdx | 2 +- docs/docs/providers/eval/index.mdx | 7 ++-- .../providers/eval/inline_meta-reference.mdx | 2 +- docs/docs/providers/eval/remote_nvidia.mdx | 2 +- docs/docs/providers/files/index.mdx | 7 ++-- docs/docs/providers/files/inline_localfs.mdx | 6 ++-- docs/docs/providers/files/remote_openai.mdx | 4 +-- docs/docs/providers/files/remote_s3.mdx | 8 ++--- docs/docs/providers/inference/index.mdx | 23 +++++++------ .../inference/inline_meta-reference.mdx | 8 ++--- .../providers/inference/remote_anthropic.mdx | 4 +-- .../docs/providers/inference/remote_azure.mdx | 6 ++-- .../providers/inference/remote_bedrock.mdx | 6 ++-- .../providers/inference/remote_cerebras.mdx | 6 ++-- .../providers/inference/remote_databricks.mdx | 4 +-- .../providers/inference/remote_fireworks.mdx | 6 ++-- .../providers/inference/remote_gemini.mdx | 4 +-- docs/docs/providers/inference/remote_groq.mdx | 6 ++-- .../inference/remote_hf_endpoint.mdx | 2 +- .../inference/remote_hf_serverless.mdx | 2 +- .../inference/remote_llama-openai-compat.mdx | 6 ++-- .../providers/inference/remote_nvidia.mdx | 12 +++---- .../providers/inference/remote_ollama.mdx | 6 ++-- .../providers/inference/remote_openai.mdx | 6 ++-- .../inference/remote_passthrough.mdx | 6 ++-- .../providers/inference/remote_runpod.mdx | 4 +-- .../providers/inference/remote_sambanova.mdx | 6 ++-- docs/docs/providers/inference/remote_tgi.mdx | 6 ++-- .../providers/inference/remote_together.mdx | 6 ++-- .../providers/inference/remote_vertexai.mdx | 8 ++--- docs/docs/providers/inference/remote_vllm.mdx | 6 ++-- .../providers/inference/remote_watsonx.mdx | 8 ++--- .../post_training/inline_huggingface-gpu.mdx | 34 +++++++++---------- .../post_training/inline_torchtune-cpu.mdx | 2 +- .../post_training/inline_torchtune-gpu.mdx | 2 +- .../providers/post_training/remote_nvidia.mdx | 6 ++-- docs/docs/providers/safety/index.mdx | 7 ++-- .../providers/safety/inline_llama-guard.mdx | 2 +- .../providers/safety/inline_prompt-guard.mdx | 2 +- docs/docs/providers/safety/remote_bedrock.mdx | 6 ++-- docs/docs/providers/safety/remote_nvidia.mdx | 2 +- .../providers/safety/remote_sambanova.mdx | 2 +- .../tool_runtime/remote_bing-search.mdx | 2 +- .../tool_runtime/remote_brave-search.mdx | 2 +- .../tool_runtime/remote_tavily-search.mdx | 2 +- .../providers/vector_io/inline_chromadb.mdx | 4 +-- .../docs/providers/vector_io/inline_faiss.mdx | 2 +- .../vector_io/inline_meta-reference.mdx | 2 +- .../providers/vector_io/inline_milvus.mdx | 6 ++-- .../providers/vector_io/inline_qdrant.mdx | 4 +-- .../providers/vector_io/inline_sqlite-vec.mdx | 4 +-- .../providers/vector_io/inline_sqlite_vec.mdx | 4 +-- .../providers/vector_io/remote_chromadb.mdx | 2 +- .../providers/vector_io/remote_milvus.mdx | 6 ++-- .../providers/vector_io/remote_qdrant.mdx | 6 ++-- 61 files changed, 179 insertions(+), 173 deletions(-) diff --git a/docs/docs/providers/agents/index.mdx b/docs/docs/providers/agents/index.mdx index 06eb104afa..1f7e0c7888 100644 --- a/docs/docs/providers/agents/index.mdx +++ b/docs/docs/providers/agents/index.mdx @@ -1,7 +1,8 @@ --- -description: "Agents +description: | + Agents - APIs for creating and interacting with agentic systems." + APIs for creating and interacting with agentic systems. sidebar_label: Agents title: Agents --- @@ -12,6 +13,6 @@ title: Agents Agents - APIs for creating and interacting with agentic systems. +APIs for creating and interacting with agentic systems. This section contains documentation for all available providers for the **agents** API. diff --git a/docs/docs/providers/agents/inline_meta-reference.mdx b/docs/docs/providers/agents/inline_meta-reference.mdx index fac9b84063..91575b5138 100644 --- a/docs/docs/providers/agents/inline_meta-reference.mdx +++ b/docs/docs/providers/agents/inline_meta-reference.mdx @@ -14,7 +14,7 @@ Meta's reference implementation of an agent system that can use tools, access ve | Field | Type | Required | Default | Description | |-------|------|----------|---------|-------------| -| `persistence` | `` | No | | | +| `persistence` | `inline.agents.meta_reference.config.AgentPersistenceConfig` | No | | | ## Sample Configuration diff --git a/docs/docs/providers/batches/index.mdx b/docs/docs/providers/batches/index.mdx index 2c64b277f8..23b7df14b7 100644 --- a/docs/docs/providers/batches/index.mdx +++ b/docs/docs/providers/batches/index.mdx @@ -1,14 +1,15 @@ --- -description: "The Batches API enables efficient processing of multiple requests in a single operation, - particularly useful for processing large datasets, batch evaluation workflows, and - cost-effective inference at scale. +description: | + The Batches API enables efficient processing of multiple requests in a single operation, + particularly useful for processing large datasets, batch evaluation workflows, and + cost-effective inference at scale. - The API is designed to allow use of openai client libraries for seamless integration. + The API is designed to allow use of openai client libraries for seamless integration. - This API provides the following extensions: - - idempotent batch creation + This API provides the following extensions: + - idempotent batch creation - Note: This API is currently under active development and may undergo changes." + Note: This API is currently under active development and may undergo changes. sidebar_label: Batches title: Batches --- @@ -18,14 +19,14 @@ title: Batches ## Overview The Batches API enables efficient processing of multiple requests in a single operation, - particularly useful for processing large datasets, batch evaluation workflows, and - cost-effective inference at scale. +particularly useful for processing large datasets, batch evaluation workflows, and +cost-effective inference at scale. - The API is designed to allow use of openai client libraries for seamless integration. +The API is designed to allow use of openai client libraries for seamless integration. - This API provides the following extensions: - - idempotent batch creation +This API provides the following extensions: + - idempotent batch creation - Note: This API is currently under active development and may undergo changes. +Note: This API is currently under active development and may undergo changes. This section contains documentation for all available providers for the **batches** API. diff --git a/docs/docs/providers/batches/inline_reference.mdx b/docs/docs/providers/batches/inline_reference.mdx index 45304fbb1a..b994e0ce4e 100644 --- a/docs/docs/providers/batches/inline_reference.mdx +++ b/docs/docs/providers/batches/inline_reference.mdx @@ -14,9 +14,9 @@ Reference implementation of batches API with KVStore persistence. | Field | Type | Required | Default | Description | |-------|------|----------|---------|-------------| -| `kvstore` | `` | No | | Configuration for the key-value store backend. | -| `max_concurrent_batches` | `` | No | 1 | Maximum number of concurrent batches to process simultaneously. | -| `max_concurrent_requests_per_batch` | `` | No | 10 | Maximum number of concurrent requests to process per batch. | +| `kvstore` | `llama_stack.core.storage.datatypes.KVStoreReference` | No | | Configuration for the key-value store backend. | +| `max_concurrent_batches` | `int` | No | 1 | Maximum number of concurrent batches to process simultaneously. | +| `max_concurrent_requests_per_batch` | `int` | No | 10 | Maximum number of concurrent requests to process per batch. | ## Sample Configuration diff --git a/docs/docs/providers/datasetio/inline_localfs.mdx b/docs/docs/providers/datasetio/inline_localfs.mdx index a9363376c9..d5130abd55 100644 --- a/docs/docs/providers/datasetio/inline_localfs.mdx +++ b/docs/docs/providers/datasetio/inline_localfs.mdx @@ -14,7 +14,7 @@ Local filesystem-based dataset I/O provider for reading and writing datasets to | Field | Type | Required | Default | Description | |-------|------|----------|---------|-------------| -| `kvstore` | `` | No | | | +| `kvstore` | `llama_stack.core.storage.datatypes.KVStoreReference` | No | | | ## Sample Configuration diff --git a/docs/docs/providers/datasetio/remote_huggingface.mdx b/docs/docs/providers/datasetio/remote_huggingface.mdx index de3ffaaa66..d8ef7cd24c 100644 --- a/docs/docs/providers/datasetio/remote_huggingface.mdx +++ b/docs/docs/providers/datasetio/remote_huggingface.mdx @@ -14,7 +14,7 @@ HuggingFace datasets provider for accessing and managing datasets from the Huggi | Field | Type | Required | Default | Description | |-------|------|----------|---------|-------------| -| `kvstore` | `` | No | | | +| `kvstore` | `llama_stack.core.storage.datatypes.KVStoreReference` | No | | | ## Sample Configuration diff --git a/docs/docs/providers/datasetio/remote_nvidia.mdx b/docs/docs/providers/datasetio/remote_nvidia.mdx index 35a7dacee8..97c48d8104 100644 --- a/docs/docs/providers/datasetio/remote_nvidia.mdx +++ b/docs/docs/providers/datasetio/remote_nvidia.mdx @@ -17,7 +17,7 @@ NVIDIA's dataset I/O provider for accessing datasets from NVIDIA's data platform | `api_key` | `str \| None` | No | | The NVIDIA API key. | | `dataset_namespace` | `str \| None` | No | default | The NVIDIA dataset namespace. | | `project_id` | `str \| None` | No | test-project | The NVIDIA project ID. | -| `datasets_url` | `` | No | http://nemo.test | Base URL for the NeMo Dataset API | +| `datasets_url` | `str` | No | http://nemo.test | Base URL for the NeMo Dataset API | ## Sample Configuration diff --git a/docs/docs/providers/eval/index.mdx b/docs/docs/providers/eval/index.mdx index 94bafe15e7..a6e35d6118 100644 --- a/docs/docs/providers/eval/index.mdx +++ b/docs/docs/providers/eval/index.mdx @@ -1,7 +1,8 @@ --- -description: "Evaluations +description: | + Evaluations - Llama Stack Evaluation API for running evaluations on model and agent candidates." + Llama Stack Evaluation API for running evaluations on model and agent candidates. sidebar_label: Eval title: Eval --- @@ -12,6 +13,6 @@ title: Eval Evaluations - Llama Stack Evaluation API for running evaluations on model and agent candidates. +Llama Stack Evaluation API for running evaluations on model and agent candidates. This section contains documentation for all available providers for the **eval** API. diff --git a/docs/docs/providers/eval/inline_meta-reference.mdx b/docs/docs/providers/eval/inline_meta-reference.mdx index 2c86c18c9c..3ca105410d 100644 --- a/docs/docs/providers/eval/inline_meta-reference.mdx +++ b/docs/docs/providers/eval/inline_meta-reference.mdx @@ -14,7 +14,7 @@ Meta's reference implementation of evaluation tasks with support for multiple la | Field | Type | Required | Default | Description | |-------|------|----------|---------|-------------| -| `kvstore` | `` | No | | | +| `kvstore` | `llama_stack.core.storage.datatypes.KVStoreReference` | No | | | ## Sample Configuration diff --git a/docs/docs/providers/eval/remote_nvidia.mdx b/docs/docs/providers/eval/remote_nvidia.mdx index 36bb4726b4..3114967916 100644 --- a/docs/docs/providers/eval/remote_nvidia.mdx +++ b/docs/docs/providers/eval/remote_nvidia.mdx @@ -14,7 +14,7 @@ NVIDIA's evaluation provider for running evaluation tasks on NVIDIA's platform. | Field | Type | Required | Default | Description | |-------|------|----------|---------|-------------| -| `evaluator_url` | `` | No | http://0.0.0.0:7331 | The url for accessing the evaluator service | +| `evaluator_url` | `str` | No | http://0.0.0.0:7331 | The url for accessing the evaluator service | ## Sample Configuration diff --git a/docs/docs/providers/files/index.mdx b/docs/docs/providers/files/index.mdx index 19e338035b..0540c5c3e0 100644 --- a/docs/docs/providers/files/index.mdx +++ b/docs/docs/providers/files/index.mdx @@ -1,7 +1,8 @@ --- -description: "Files +description: | + Files - This API is used to upload documents that can be used with other Llama Stack APIs." + This API is used to upload documents that can be used with other Llama Stack APIs. sidebar_label: Files title: Files --- @@ -12,6 +13,6 @@ title: Files Files - This API is used to upload documents that can be used with other Llama Stack APIs. +This API is used to upload documents that can be used with other Llama Stack APIs. This section contains documentation for all available providers for the **files** API. diff --git a/docs/docs/providers/files/inline_localfs.mdx b/docs/docs/providers/files/inline_localfs.mdx index bff0c4eb94..8faf476e23 100644 --- a/docs/docs/providers/files/inline_localfs.mdx +++ b/docs/docs/providers/files/inline_localfs.mdx @@ -14,9 +14,9 @@ Local filesystem-based file storage provider for managing files and documents lo | Field | Type | Required | Default | Description | |-------|------|----------|---------|-------------| -| `storage_dir` | `` | No | | Directory to store uploaded files | -| `metadata_store` | `` | No | | SQL store configuration for file metadata | -| `ttl_secs` | `` | No | 31536000 | | +| `storage_dir` | `str` | No | | Directory to store uploaded files | +| `metadata_store` | `llama_stack.core.storage.datatypes.SqlStoreReference` | No | | SQL store configuration for file metadata | +| `ttl_secs` | `int` | No | 31536000 | | ## Sample Configuration diff --git a/docs/docs/providers/files/remote_openai.mdx b/docs/docs/providers/files/remote_openai.mdx index 3b5c40aada..9cf82643ed 100644 --- a/docs/docs/providers/files/remote_openai.mdx +++ b/docs/docs/providers/files/remote_openai.mdx @@ -14,8 +14,8 @@ OpenAI Files API provider for managing files through OpenAI's native file storag | Field | Type | Required | Default | Description | |-------|------|----------|---------|-------------| -| `api_key` | `` | No | | OpenAI API key for authentication | -| `metadata_store` | `` | No | | SQL store configuration for file metadata | +| `api_key` | `str` | No | | OpenAI API key for authentication | +| `metadata_store` | `llama_stack.core.storage.datatypes.SqlStoreReference` | No | | SQL store configuration for file metadata | ## Sample Configuration diff --git a/docs/docs/providers/files/remote_s3.mdx b/docs/docs/providers/files/remote_s3.mdx index 65cd545c57..3747bfa8da 100644 --- a/docs/docs/providers/files/remote_s3.mdx +++ b/docs/docs/providers/files/remote_s3.mdx @@ -14,13 +14,13 @@ AWS S3-based file storage provider for scalable cloud file management with metad | Field | Type | Required | Default | Description | |-------|------|----------|---------|-------------| -| `bucket_name` | `` | No | | S3 bucket name to store files | -| `region` | `` | No | us-east-1 | AWS region where the bucket is located | +| `bucket_name` | `str` | No | | S3 bucket name to store files | +| `region` | `str` | No | us-east-1 | AWS region where the bucket is located | | `aws_access_key_id` | `str \| None` | No | | AWS access key ID (optional if using IAM roles) | | `aws_secret_access_key` | `str \| None` | No | | AWS secret access key (optional if using IAM roles) | | `endpoint_url` | `str \| None` | No | | Custom S3 endpoint URL (for MinIO, LocalStack, etc.) | -| `auto_create_bucket` | `` | No | False | Automatically create the S3 bucket if it doesn't exist | -| `metadata_store` | `` | No | | SQL store configuration for file metadata | +| `auto_create_bucket` | `bool` | No | False | Automatically create the S3 bucket if it doesn't exist | +| `metadata_store` | `llama_stack.core.storage.datatypes.SqlStoreReference` | No | | SQL store configuration for file metadata | ## Sample Configuration diff --git a/docs/docs/providers/inference/index.mdx b/docs/docs/providers/inference/index.mdx index 478611420a..ad050e5013 100644 --- a/docs/docs/providers/inference/index.mdx +++ b/docs/docs/providers/inference/index.mdx @@ -1,12 +1,13 @@ --- -description: "Inference +description: | + Inference - Llama Stack Inference API for generating completions, chat completions, and embeddings. + Llama Stack Inference API for generating completions, chat completions, and embeddings. - This API provides the raw interface to the underlying models. Three kinds of models are supported: - - LLM models: these models generate \"raw\" and \"chat\" (conversational) completions. - - Embedding models: these models generate embeddings to be used for semantic search. - - Rerank models: these models reorder the documents based on their relevance to a query." + This API provides the raw interface to the underlying models. Three kinds of models are supported: + - LLM models: these models generate "raw" and "chat" (conversational) completions. + - Embedding models: these models generate embeddings to be used for semantic search. + - Rerank models: these models reorder the documents based on their relevance to a query. sidebar_label: Inference title: Inference --- @@ -17,11 +18,11 @@ title: Inference Inference - Llama Stack Inference API for generating completions, chat completions, and embeddings. +Llama Stack Inference API for generating completions, chat completions, and embeddings. - This API provides the raw interface to the underlying models. Three kinds of models are supported: - - LLM models: these models generate "raw" and "chat" (conversational) completions. - - Embedding models: these models generate embeddings to be used for semantic search. - - Rerank models: these models reorder the documents based on their relevance to a query. +This API provides the raw interface to the underlying models. Three kinds of models are supported: +- LLM models: these models generate "raw" and "chat" (conversational) completions. +- Embedding models: these models generate embeddings to be used for semantic search. +- Rerank models: these models reorder the documents based on their relevance to a query. This section contains documentation for all available providers for the **inference** API. diff --git a/docs/docs/providers/inference/inline_meta-reference.mdx b/docs/docs/providers/inference/inline_meta-reference.mdx index 328586f9a2..ffa24b7540 100644 --- a/docs/docs/providers/inference/inline_meta-reference.mdx +++ b/docs/docs/providers/inference/inline_meta-reference.mdx @@ -16,12 +16,12 @@ Meta's reference implementation of inference with support for various model form |-------|------|----------|---------|-------------| | `model` | `str \| None` | No | | | | `torch_seed` | `int \| None` | No | | | -| `max_seq_len` | `` | No | 4096 | | -| `max_batch_size` | `` | No | 1 | | +| `max_seq_len` | `int` | No | 4096 | | +| `max_batch_size` | `int` | No | 1 | | | `model_parallel_size` | `int \| None` | No | | | -| `create_distributed_process_group` | `` | No | True | | +| `create_distributed_process_group` | `bool` | No | True | | | `checkpoint_dir` | `str \| None` | No | | | -| `quantization` | `Bf16QuantizationConfig \| Fp8QuantizationConfig \| Int4QuantizationConfig, annotation=NoneType, required=True, discriminator='type'` | No | | | +| `quantization` | `Bf16QuantizationConfig \| Fp8QuantizationConfig \| Int4QuantizationConfig` | No | | | ## Sample Configuration diff --git a/docs/docs/providers/inference/remote_anthropic.mdx b/docs/docs/providers/inference/remote_anthropic.mdx index 4acbbac500..e01a5dc4dd 100644 --- a/docs/docs/providers/inference/remote_anthropic.mdx +++ b/docs/docs/providers/inference/remote_anthropic.mdx @@ -14,8 +14,8 @@ Anthropic inference provider for accessing Claude models and Anthropic's AI serv | Field | Type | Required | Default | Description | |-------|------|----------|---------|-------------| -| `allowed_models` | `list[str \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. | -| `refresh_models` | `` | No | False | Whether to refresh models periodically from the provider | +| `allowed_models` | `list[str] \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. | +| `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider | | `api_key` | `pydantic.types.SecretStr \| None` | No | | Authentication credential for the provider | ## Sample Configuration diff --git a/docs/docs/providers/inference/remote_azure.mdx b/docs/docs/providers/inference/remote_azure.mdx index b3041259ea..384c2a61a2 100644 --- a/docs/docs/providers/inference/remote_azure.mdx +++ b/docs/docs/providers/inference/remote_azure.mdx @@ -21,10 +21,10 @@ https://learn.microsoft.com/en-us/azure/ai-foundry/openai/overview | Field | Type | Required | Default | Description | |-------|------|----------|---------|-------------| -| `allowed_models` | `list[str \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. | -| `refresh_models` | `` | No | False | Whether to refresh models periodically from the provider | +| `allowed_models` | `list[str] \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. | +| `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider | | `api_key` | `pydantic.types.SecretStr \| None` | No | | Authentication credential for the provider | -| `api_base` | `` | No | | Azure API base for Azure (e.g., https://your-resource-name.openai.azure.com) | +| `api_base` | `pydantic.networks.HttpUrl` | No | | Azure API base for Azure (e.g., https://your-resource-name.openai.azure.com) | | `api_version` | `str \| None` | No | | Azure API version for Azure (e.g., 2024-12-01-preview) | | `api_type` | `str \| None` | No | azure | Azure API type for Azure (e.g., azure) | diff --git a/docs/docs/providers/inference/remote_bedrock.mdx b/docs/docs/providers/inference/remote_bedrock.mdx index 683ec12f8a..6903e6fb1f 100644 --- a/docs/docs/providers/inference/remote_bedrock.mdx +++ b/docs/docs/providers/inference/remote_bedrock.mdx @@ -14,13 +14,13 @@ AWS Bedrock inference provider for accessing various AI models through AWS's man | Field | Type | Required | Default | Description | |-------|------|----------|---------|-------------| -| `allowed_models` | `list[str \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. | -| `refresh_models` | `` | No | False | Whether to refresh models periodically from the provider | +| `allowed_models` | `list[str] \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. | +| `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider | | `aws_access_key_id` | `str \| None` | No | | The AWS access key to use. Default use environment variable: AWS_ACCESS_KEY_ID | | `aws_secret_access_key` | `str \| None` | No | | The AWS secret access key to use. Default use environment variable: AWS_SECRET_ACCESS_KEY | | `aws_session_token` | `str \| None` | No | | The AWS session token to use. Default use environment variable: AWS_SESSION_TOKEN | | `region_name` | `str \| None` | No | | The default AWS Region to use, for example, us-west-1 or us-west-2.Default use environment variable: AWS_DEFAULT_REGION | -| `profile_name` | `str \| None` | No | | The profile name that contains credentials to use.Default use environment variable: AWS_PROFILE | +| `profile_name` | `str \| None` | No | tpetkos | The profile name that contains credentials to use.Default use environment variable: AWS_PROFILE | | `total_max_attempts` | `int \| None` | No | | An integer representing the maximum number of attempts that will be made for a single request, including the initial attempt. Default use environment variable: AWS_MAX_ATTEMPTS | | `retry_mode` | `str \| None` | No | | A string representing the type of retries Boto3 will perform.Default use environment variable: AWS_RETRY_MODE | | `connect_timeout` | `float \| None` | No | 60.0 | The time in seconds till a timeout exception is thrown when attempting to make a connection. The default is 60 seconds. | diff --git a/docs/docs/providers/inference/remote_cerebras.mdx b/docs/docs/providers/inference/remote_cerebras.mdx index cda0be224b..4877467439 100644 --- a/docs/docs/providers/inference/remote_cerebras.mdx +++ b/docs/docs/providers/inference/remote_cerebras.mdx @@ -14,10 +14,10 @@ Cerebras inference provider for running models on Cerebras Cloud platform. | Field | Type | Required | Default | Description | |-------|------|----------|---------|-------------| -| `allowed_models` | `list[str \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. | -| `refresh_models` | `` | No | False | Whether to refresh models periodically from the provider | +| `allowed_models` | `list[str] \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. | +| `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider | | `api_key` | `pydantic.types.SecretStr \| None` | No | | Authentication credential for the provider | -| `base_url` | `` | No | https://api.cerebras.ai | Base URL for the Cerebras API | +| `base_url` | `str` | No | https://api.cerebras.ai | Base URL for the Cerebras API | ## Sample Configuration diff --git a/docs/docs/providers/inference/remote_databricks.mdx b/docs/docs/providers/inference/remote_databricks.mdx index f14fd01755..7feb77297f 100644 --- a/docs/docs/providers/inference/remote_databricks.mdx +++ b/docs/docs/providers/inference/remote_databricks.mdx @@ -14,8 +14,8 @@ Databricks inference provider for running models on Databricks' unified analytic | Field | Type | Required | Default | Description | |-------|------|----------|---------|-------------| -| `allowed_models` | `list[str \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. | -| `refresh_models` | `` | No | False | Whether to refresh models periodically from the provider | +| `allowed_models` | `list[str] \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. | +| `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider | | `api_token` | `pydantic.types.SecretStr \| None` | No | | The Databricks API token | | `url` | `str \| None` | No | | The URL for the Databricks model serving endpoint | diff --git a/docs/docs/providers/inference/remote_fireworks.mdx b/docs/docs/providers/inference/remote_fireworks.mdx index 71f16ccec3..a077adfc6f 100644 --- a/docs/docs/providers/inference/remote_fireworks.mdx +++ b/docs/docs/providers/inference/remote_fireworks.mdx @@ -14,10 +14,10 @@ Fireworks AI inference provider for Llama models and other AI models on the Fire | Field | Type | Required | Default | Description | |-------|------|----------|---------|-------------| -| `allowed_models` | `list[str \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. | -| `refresh_models` | `` | No | False | Whether to refresh models periodically from the provider | +| `allowed_models` | `list[str] \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. | +| `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider | | `api_key` | `pydantic.types.SecretStr \| None` | No | | Authentication credential for the provider | -| `url` | `` | No | https://api.fireworks.ai/inference/v1 | The URL for the Fireworks server | +| `url` | `str` | No | https://api.fireworks.ai/inference/v1 | The URL for the Fireworks server | ## Sample Configuration diff --git a/docs/docs/providers/inference/remote_gemini.mdx b/docs/docs/providers/inference/remote_gemini.mdx index 22b3c8cb77..10cf4d6a65 100644 --- a/docs/docs/providers/inference/remote_gemini.mdx +++ b/docs/docs/providers/inference/remote_gemini.mdx @@ -14,8 +14,8 @@ Google Gemini inference provider for accessing Gemini models and Google's AI ser | Field | Type | Required | Default | Description | |-------|------|----------|---------|-------------| -| `allowed_models` | `list[str \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. | -| `refresh_models` | `` | No | False | Whether to refresh models periodically from the provider | +| `allowed_models` | `list[str] \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. | +| `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider | | `api_key` | `pydantic.types.SecretStr \| None` | No | | Authentication credential for the provider | ## Sample Configuration diff --git a/docs/docs/providers/inference/remote_groq.mdx b/docs/docs/providers/inference/remote_groq.mdx index aaf1516cac..7646c57d51 100644 --- a/docs/docs/providers/inference/remote_groq.mdx +++ b/docs/docs/providers/inference/remote_groq.mdx @@ -14,10 +14,10 @@ Groq inference provider for ultra-fast inference using Groq's LPU technology. | Field | Type | Required | Default | Description | |-------|------|----------|---------|-------------| -| `allowed_models` | `list[str \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. | -| `refresh_models` | `` | No | False | Whether to refresh models periodically from the provider | +| `allowed_models` | `list[str] \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. | +| `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider | | `api_key` | `pydantic.types.SecretStr \| None` | No | | Authentication credential for the provider | -| `url` | `` | No | https://api.groq.com | The URL for the Groq AI server | +| `url` | `str` | No | https://api.groq.com | The URL for the Groq AI server | ## Sample Configuration diff --git a/docs/docs/providers/inference/remote_hf_endpoint.mdx b/docs/docs/providers/inference/remote_hf_endpoint.mdx index 771b24f8da..68044a51d6 100644 --- a/docs/docs/providers/inference/remote_hf_endpoint.mdx +++ b/docs/docs/providers/inference/remote_hf_endpoint.mdx @@ -14,7 +14,7 @@ HuggingFace Inference Endpoints provider for dedicated model serving. | Field | Type | Required | Default | Description | |-------|------|----------|---------|-------------| -| `endpoint_name` | `` | No | | The name of the Hugging Face Inference Endpoint in the format of '{namespace}/{endpoint_name}' (e.g. 'my-cool-org/meta-llama-3-1-8b-instruct-rce'). Namespace is optional and will default to the user account if not provided. | +| `endpoint_name` | `str` | No | | The name of the Hugging Face Inference Endpoint in the format of '{namespace}/{endpoint_name}' (e.g. 'my-cool-org/meta-llama-3-1-8b-instruct-rce'). Namespace is optional and will default to the user account if not provided. | | `api_token` | `pydantic.types.SecretStr \| None` | No | | Your Hugging Face user access token (will default to locally saved token if not provided) | ## Sample Configuration diff --git a/docs/docs/providers/inference/remote_hf_serverless.mdx b/docs/docs/providers/inference/remote_hf_serverless.mdx index 1a89b8e3e0..d3638e01d6 100644 --- a/docs/docs/providers/inference/remote_hf_serverless.mdx +++ b/docs/docs/providers/inference/remote_hf_serverless.mdx @@ -14,7 +14,7 @@ HuggingFace Inference API serverless provider for on-demand model inference. | Field | Type | Required | Default | Description | |-------|------|----------|---------|-------------| -| `huggingface_repo` | `` | No | | The model ID of the model on the Hugging Face Hub (e.g. 'meta-llama/Meta-Llama-3.1-70B-Instruct') | +| `huggingface_repo` | `str` | No | | The model ID of the model on the Hugging Face Hub (e.g. 'meta-llama/Meta-Llama-3.1-70B-Instruct') | | `api_token` | `pydantic.types.SecretStr \| None` | No | | Your Hugging Face user access token (will default to locally saved token if not provided) | ## Sample Configuration diff --git a/docs/docs/providers/inference/remote_llama-openai-compat.mdx b/docs/docs/providers/inference/remote_llama-openai-compat.mdx index 9769c0793e..1b8d80261e 100644 --- a/docs/docs/providers/inference/remote_llama-openai-compat.mdx +++ b/docs/docs/providers/inference/remote_llama-openai-compat.mdx @@ -14,10 +14,10 @@ Llama OpenAI-compatible provider for using Llama models with OpenAI API format. | Field | Type | Required | Default | Description | |-------|------|----------|---------|-------------| -| `allowed_models` | `list[str \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. | -| `refresh_models` | `` | No | False | Whether to refresh models periodically from the provider | +| `allowed_models` | `list[str] \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. | +| `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider | | `api_key` | `pydantic.types.SecretStr \| None` | No | | Authentication credential for the provider | -| `openai_compat_api_base` | `` | No | https://api.llama.com/compat/v1/ | The URL for the Llama API server | +| `openai_compat_api_base` | `str` | No | https://api.llama.com/compat/v1/ | The URL for the Llama API server | ## Sample Configuration diff --git a/docs/docs/providers/inference/remote_nvidia.mdx b/docs/docs/providers/inference/remote_nvidia.mdx index 57c64ab46b..77045f4899 100644 --- a/docs/docs/providers/inference/remote_nvidia.mdx +++ b/docs/docs/providers/inference/remote_nvidia.mdx @@ -14,13 +14,13 @@ NVIDIA inference provider for accessing NVIDIA NIM models and AI services. | Field | Type | Required | Default | Description | |-------|------|----------|---------|-------------| -| `allowed_models` | `list[str \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. | -| `refresh_models` | `` | No | False | Whether to refresh models periodically from the provider | +| `allowed_models` | `list[str] \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. | +| `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider | | `api_key` | `pydantic.types.SecretStr \| None` | No | | Authentication credential for the provider | -| `url` | `` | No | https://integrate.api.nvidia.com | A base url for accessing the NVIDIA NIM | -| `timeout` | `` | No | 60 | Timeout for the HTTP requests | -| `append_api_version` | `` | No | True | When set to false, the API version will not be appended to the base_url. By default, it is true. | -| `rerank_model_to_url` | `dict[str, str` | No | `{'nv-rerank-qa-mistral-4b:1': 'https://ai.api.nvidia.com/v1/retrieval/nvidia/reranking', 'nvidia/nv-rerankqa-mistral-4b-v3': 'https://ai.api.nvidia.com/v1/retrieval/nvidia/nv-rerankqa-mistral-4b-v3/reranking', 'nvidia/llama-3.2-nv-rerankqa-1b-v2': 'https://ai.api.nvidia.com/v1/retrieval/nvidia/llama-3_2-nv-rerankqa-1b-v2/reranking'}` | Mapping of rerank model identifiers to their API endpoints. | +| `url` | `str` | No | https://integrate.api.nvidia.com | A base url for accessing the NVIDIA NIM | +| `timeout` | `int` | No | 60 | Timeout for the HTTP requests | +| `append_api_version` | `bool` | No | True | When set to false, the API version will not be appended to the base_url. By default, it is true. | +| `rerank_model_to_url` | `dict[str, str]` | No | `{'nv-rerank-qa-mistral-4b:1': 'https://ai.api.nvidia.com/v1/retrieval/nvidia/reranking', 'nvidia/nv-rerankqa-mistral-4b-v3': 'https://ai.api.nvidia.com/v1/retrieval/nvidia/nv-rerankqa-mistral-4b-v3/reranking', 'nvidia/llama-3.2-nv-rerankqa-1b-v2': 'https://ai.api.nvidia.com/v1/retrieval/nvidia/llama-3_2-nv-rerankqa-1b-v2/reranking'}` | Mapping of rerank model identifiers to their API endpoints. | ## Sample Configuration diff --git a/docs/docs/providers/inference/remote_ollama.mdx b/docs/docs/providers/inference/remote_ollama.mdx index e00e34e4ae..497bfed52b 100644 --- a/docs/docs/providers/inference/remote_ollama.mdx +++ b/docs/docs/providers/inference/remote_ollama.mdx @@ -14,9 +14,9 @@ Ollama inference provider for running local models through the Ollama runtime. | Field | Type | Required | Default | Description | |-------|------|----------|---------|-------------| -| `allowed_models` | `list[str \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. | -| `refresh_models` | `` | No | False | Whether to refresh models periodically from the provider | -| `url` | `` | No | http://localhost:11434 | | +| `allowed_models` | `list[str] \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. | +| `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider | +| `url` | `str` | No | http://localhost:11434 | | ## Sample Configuration diff --git a/docs/docs/providers/inference/remote_openai.mdx b/docs/docs/providers/inference/remote_openai.mdx index 28c8ab7bfd..02b70bf507 100644 --- a/docs/docs/providers/inference/remote_openai.mdx +++ b/docs/docs/providers/inference/remote_openai.mdx @@ -14,10 +14,10 @@ OpenAI inference provider for accessing GPT models and other OpenAI services. | Field | Type | Required | Default | Description | |-------|------|----------|---------|-------------| -| `allowed_models` | `list[str \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. | -| `refresh_models` | `` | No | False | Whether to refresh models periodically from the provider | +| `allowed_models` | `list[str] \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. | +| `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider | | `api_key` | `pydantic.types.SecretStr \| None` | No | | Authentication credential for the provider | -| `base_url` | `` | No | https://api.openai.com/v1 | Base URL for OpenAI API | +| `base_url` | `str` | No | https://api.openai.com/v1 | Base URL for OpenAI API | ## Sample Configuration diff --git a/docs/docs/providers/inference/remote_passthrough.mdx b/docs/docs/providers/inference/remote_passthrough.mdx index 7a29316900..d6e23a51c8 100644 --- a/docs/docs/providers/inference/remote_passthrough.mdx +++ b/docs/docs/providers/inference/remote_passthrough.mdx @@ -14,10 +14,10 @@ Passthrough inference provider for connecting to any external inference service | Field | Type | Required | Default | Description | |-------|------|----------|---------|-------------| -| `allowed_models` | `list[str \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. | -| `refresh_models` | `` | No | False | Whether to refresh models periodically from the provider | +| `allowed_models` | `list[str] \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. | +| `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider | | `api_key` | `pydantic.types.SecretStr \| None` | No | | API Key for the passthrouth endpoint | -| `url` | `` | No | | The URL for the passthrough endpoint | +| `url` | `str` | No | | The URL for the passthrough endpoint | ## Sample Configuration diff --git a/docs/docs/providers/inference/remote_runpod.mdx b/docs/docs/providers/inference/remote_runpod.mdx index 3cbbd0322c..c0be428660 100644 --- a/docs/docs/providers/inference/remote_runpod.mdx +++ b/docs/docs/providers/inference/remote_runpod.mdx @@ -14,8 +14,8 @@ RunPod inference provider for running models on RunPod's cloud GPU platform. | Field | Type | Required | Default | Description | |-------|------|----------|---------|-------------| -| `allowed_models` | `list[str \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. | -| `refresh_models` | `` | No | False | Whether to refresh models periodically from the provider | +| `allowed_models` | `list[str] \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. | +| `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider | | `api_token` | `pydantic.types.SecretStr \| None` | No | | The API token | | `url` | `str \| None` | No | | The URL for the Runpod model serving endpoint | diff --git a/docs/docs/providers/inference/remote_sambanova.mdx b/docs/docs/providers/inference/remote_sambanova.mdx index 0ac4600b78..d67231ef4d 100644 --- a/docs/docs/providers/inference/remote_sambanova.mdx +++ b/docs/docs/providers/inference/remote_sambanova.mdx @@ -14,10 +14,10 @@ SambaNova inference provider for running models on SambaNova's dataflow architec | Field | Type | Required | Default | Description | |-------|------|----------|---------|-------------| -| `allowed_models` | `list[str \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. | -| `refresh_models` | `` | No | False | Whether to refresh models periodically from the provider | +| `allowed_models` | `list[str] \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. | +| `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider | | `api_key` | `pydantic.types.SecretStr \| None` | No | | Authentication credential for the provider | -| `url` | `` | No | https://api.sambanova.ai/v1 | The URL for the SambaNova AI server | +| `url` | `str` | No | https://api.sambanova.ai/v1 | The URL for the SambaNova AI server | ## Sample Configuration diff --git a/docs/docs/providers/inference/remote_tgi.mdx b/docs/docs/providers/inference/remote_tgi.mdx index 67fe6d2370..cd5ea7661d 100644 --- a/docs/docs/providers/inference/remote_tgi.mdx +++ b/docs/docs/providers/inference/remote_tgi.mdx @@ -14,9 +14,9 @@ Text Generation Inference (TGI) provider for HuggingFace model serving. | Field | Type | Required | Default | Description | |-------|------|----------|---------|-------------| -| `allowed_models` | `list[str \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. | -| `refresh_models` | `` | No | False | Whether to refresh models periodically from the provider | -| `url` | `` | No | | The URL for the TGI serving endpoint | +| `allowed_models` | `list[str] \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. | +| `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider | +| `url` | `str` | No | | The URL for the TGI serving endpoint | ## Sample Configuration diff --git a/docs/docs/providers/inference/remote_together.mdx b/docs/docs/providers/inference/remote_together.mdx index c8e3bcdcff..e9ac940e4b 100644 --- a/docs/docs/providers/inference/remote_together.mdx +++ b/docs/docs/providers/inference/remote_together.mdx @@ -14,10 +14,10 @@ Together AI inference provider for open-source models and collaborative AI devel | Field | Type | Required | Default | Description | |-------|------|----------|---------|-------------| -| `allowed_models` | `list[str \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. | -| `refresh_models` | `` | No | False | Whether to refresh models periodically from the provider | +| `allowed_models` | `list[str] \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. | +| `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider | | `api_key` | `pydantic.types.SecretStr \| None` | No | | Authentication credential for the provider | -| `url` | `` | No | https://api.together.xyz/v1 | The URL for the Together AI server | +| `url` | `str` | No | https://api.together.xyz/v1 | The URL for the Together AI server | ## Sample Configuration diff --git a/docs/docs/providers/inference/remote_vertexai.mdx b/docs/docs/providers/inference/remote_vertexai.mdx index c182ed4850..59b574561a 100644 --- a/docs/docs/providers/inference/remote_vertexai.mdx +++ b/docs/docs/providers/inference/remote_vertexai.mdx @@ -53,10 +53,10 @@ Available Models: | Field | Type | Required | Default | Description | |-------|------|----------|---------|-------------| -| `allowed_models` | `list[str \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. | -| `refresh_models` | `` | No | False | Whether to refresh models periodically from the provider | -| `project` | `` | No | | Google Cloud project ID for Vertex AI | -| `location` | `` | No | us-central1 | Google Cloud location for Vertex AI | +| `allowed_models` | `list[str] \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. | +| `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider | +| `project` | `str` | No | | Google Cloud project ID for Vertex AI | +| `location` | `str` | No | us-central1 | Google Cloud location for Vertex AI | ## Sample Configuration diff --git a/docs/docs/providers/inference/remote_vllm.mdx b/docs/docs/providers/inference/remote_vllm.mdx index f844bcee04..de5ecf8569 100644 --- a/docs/docs/providers/inference/remote_vllm.mdx +++ b/docs/docs/providers/inference/remote_vllm.mdx @@ -14,11 +14,11 @@ Remote vLLM inference provider for connecting to vLLM servers. | Field | Type | Required | Default | Description | |-------|------|----------|---------|-------------| -| `allowed_models` | `list[str \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. | -| `refresh_models` | `` | No | False | Whether to refresh models periodically from the provider | +| `allowed_models` | `list[str] \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. | +| `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider | | `api_token` | `pydantic.types.SecretStr \| None` | No | | The API token | | `url` | `str \| None` | No | | The URL for the vLLM model serving endpoint | -| `max_tokens` | `` | No | 4096 | Maximum number of tokens to generate. | +| `max_tokens` | `int` | No | 4096 | Maximum number of tokens to generate. | | `tls_verify` | `bool \| str` | No | True | Whether to verify TLS certificates. Can be a boolean or a path to a CA certificate file. | ## Sample Configuration diff --git a/docs/docs/providers/inference/remote_watsonx.mdx b/docs/docs/providers/inference/remote_watsonx.mdx index 2227aa1cce..b0ea8c1756 100644 --- a/docs/docs/providers/inference/remote_watsonx.mdx +++ b/docs/docs/providers/inference/remote_watsonx.mdx @@ -14,12 +14,12 @@ IBM WatsonX inference provider for accessing AI models on IBM's WatsonX platform | Field | Type | Required | Default | Description | |-------|------|----------|---------|-------------| -| `allowed_models` | `list[str \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. | -| `refresh_models` | `` | No | False | Whether to refresh models periodically from the provider | +| `allowed_models` | `list[str] \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. | +| `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider | | `api_key` | `pydantic.types.SecretStr \| None` | No | | Authentication credential for the provider | -| `url` | `` | No | https://us-south.ml.cloud.ibm.com | A base url for accessing the watsonx.ai | +| `url` | `str` | No | https://us-south.ml.cloud.ibm.com | A base url for accessing the watsonx.ai | | `project_id` | `str \| None` | No | | The watsonx.ai project ID | -| `timeout` | `` | No | 60 | Timeout for the HTTP requests | +| `timeout` | `int` | No | 60 | Timeout for the HTTP requests | ## Sample Configuration diff --git a/docs/docs/providers/post_training/inline_huggingface-gpu.mdx b/docs/docs/providers/post_training/inline_huggingface-gpu.mdx index ac7644de79..4a2f6a9ab8 100644 --- a/docs/docs/providers/post_training/inline_huggingface-gpu.mdx +++ b/docs/docs/providers/post_training/inline_huggingface-gpu.mdx @@ -14,23 +14,23 @@ HuggingFace-based post-training provider for fine-tuning models using the Huggin | Field | Type | Required | Default | Description | |-------|------|----------|---------|-------------| -| `device` | `` | No | cuda | | -| `distributed_backend` | `Literal['fsdp', 'deepspeed'` | No | | | -| `checkpoint_format` | `Literal['full_state', 'huggingface'` | No | huggingface | | -| `chat_template` | `` | No | `<|user|>`
`{input}`
`<|assistant|>`
`{output}` | | -| `model_specific_config` | `` | No | `{'trust_remote_code': True, 'attn_implementation': 'sdpa'}` | | -| `max_seq_length` | `` | No | 2048 | | -| `gradient_checkpointing` | `` | No | False | | -| `save_total_limit` | `` | No | 3 | | -| `logging_steps` | `` | No | 10 | | -| `warmup_ratio` | `` | No | 0.1 | | -| `weight_decay` | `` | No | 0.01 | | -| `dataloader_num_workers` | `` | No | 4 | | -| `dataloader_pin_memory` | `` | No | True | | -| `dpo_beta` | `` | No | 0.1 | | -| `use_reference_model` | `` | No | True | | -| `dpo_loss_type` | `Literal['sigmoid', 'hinge', 'ipo', 'kto_pair'` | No | sigmoid | | -| `dpo_output_dir` | `` | No | | | +| `device` | `str` | No | cuda | | +| `distributed_backend` | `Literal['fsdp', 'deepspeed' \| None]` | No | | | +| `checkpoint_format` | `Literal['full_state', 'huggingface' \| None]` | No | huggingface | | +| `chat_template` | `str` | No | `<|user|>`
`{input}`
`<|assistant|>`
`{output}` | | +| `model_specific_config` | `dict` | No | `{'trust_remote_code': True, 'attn_implementation': 'sdpa'}` | | +| `max_seq_length` | `int` | No | 2048 | | +| `gradient_checkpointing` | `bool` | No | False | | +| `save_total_limit` | `int` | No | 3 | | +| `logging_steps` | `int` | No | 10 | | +| `warmup_ratio` | `float` | No | 0.1 | | +| `weight_decay` | `float` | No | 0.01 | | +| `dataloader_num_workers` | `int` | No | 4 | | +| `dataloader_pin_memory` | `bool` | No | True | | +| `dpo_beta` | `float` | No | 0.1 | | +| `use_reference_model` | `bool` | No | True | | +| `dpo_loss_type` | `Literal['sigmoid', 'hinge', 'ipo', 'kto_pair']` | No | sigmoid | | +| `dpo_output_dir` | `str` | No | | | ## Sample Configuration diff --git a/docs/docs/providers/post_training/inline_torchtune-cpu.mdx b/docs/docs/providers/post_training/inline_torchtune-cpu.mdx index f789392fc0..3dc4bb0a90 100644 --- a/docs/docs/providers/post_training/inline_torchtune-cpu.mdx +++ b/docs/docs/providers/post_training/inline_torchtune-cpu.mdx @@ -15,7 +15,7 @@ TorchTune-based post-training provider for fine-tuning and optimizing models usi | Field | Type | Required | Default | Description | |-------|------|----------|---------|-------------| | `torch_seed` | `int \| None` | No | | | -| `checkpoint_format` | `Literal['meta', 'huggingface'` | No | meta | | +| `checkpoint_format` | `Literal['meta', 'huggingface' \| None]` | No | meta | | ## Sample Configuration diff --git a/docs/docs/providers/post_training/inline_torchtune-gpu.mdx b/docs/docs/providers/post_training/inline_torchtune-gpu.mdx index bd87797af3..8bf1acb8c5 100644 --- a/docs/docs/providers/post_training/inline_torchtune-gpu.mdx +++ b/docs/docs/providers/post_training/inline_torchtune-gpu.mdx @@ -15,7 +15,7 @@ TorchTune-based post-training provider for fine-tuning and optimizing models usi | Field | Type | Required | Default | Description | |-------|------|----------|---------|-------------| | `torch_seed` | `int \| None` | No | | | -| `checkpoint_format` | `Literal['meta', 'huggingface'` | No | meta | | +| `checkpoint_format` | `Literal['meta', 'huggingface' \| None]` | No | meta | | ## Sample Configuration diff --git a/docs/docs/providers/post_training/remote_nvidia.mdx b/docs/docs/providers/post_training/remote_nvidia.mdx index 448ac4c758..d0208f82ff 100644 --- a/docs/docs/providers/post_training/remote_nvidia.mdx +++ b/docs/docs/providers/post_training/remote_nvidia.mdx @@ -18,9 +18,9 @@ NVIDIA's post-training provider for fine-tuning models on NVIDIA's platform. | `dataset_namespace` | `str \| None` | No | default | The NVIDIA dataset namespace. | | `project_id` | `str \| None` | No | test-example-model@v1 | The NVIDIA project ID. | | `customizer_url` | `str \| None` | No | | Base URL for the NeMo Customizer API | -| `timeout` | `` | No | 300 | Timeout for the NVIDIA Post Training API | -| `max_retries` | `` | No | 3 | Maximum number of retries for the NVIDIA Post Training API | -| `output_model_dir` | `` | No | test-example-model@v1 | Directory to save the output model | +| `timeout` | `int` | No | 300 | Timeout for the NVIDIA Post Training API | +| `max_retries` | `int` | No | 3 | Maximum number of retries for the NVIDIA Post Training API | +| `output_model_dir` | `str` | No | test-example-model@v1 | Directory to save the output model | ## Sample Configuration diff --git a/docs/docs/providers/safety/index.mdx b/docs/docs/providers/safety/index.mdx index 4e2de4f331..e7205f4ada 100644 --- a/docs/docs/providers/safety/index.mdx +++ b/docs/docs/providers/safety/index.mdx @@ -1,7 +1,8 @@ --- -description: "Safety +description: | + Safety - OpenAI-compatible Moderations API." + OpenAI-compatible Moderations API. sidebar_label: Safety title: Safety --- @@ -12,6 +13,6 @@ title: Safety Safety - OpenAI-compatible Moderations API. +OpenAI-compatible Moderations API. This section contains documentation for all available providers for the **safety** API. diff --git a/docs/docs/providers/safety/inline_llama-guard.mdx b/docs/docs/providers/safety/inline_llama-guard.mdx index 65866c9b2e..d52e7289ab 100644 --- a/docs/docs/providers/safety/inline_llama-guard.mdx +++ b/docs/docs/providers/safety/inline_llama-guard.mdx @@ -14,7 +14,7 @@ Llama Guard safety provider for content moderation and safety filtering using Me | Field | Type | Required | Default | Description | |-------|------|----------|---------|-------------| -| `excluded_categories` | `list[str` | No | [] | | +| `excluded_categories` | `list[str]` | No | [] | | ## Sample Configuration diff --git a/docs/docs/providers/safety/inline_prompt-guard.mdx b/docs/docs/providers/safety/inline_prompt-guard.mdx index c52e03e4b5..dc57f8555a 100644 --- a/docs/docs/providers/safety/inline_prompt-guard.mdx +++ b/docs/docs/providers/safety/inline_prompt-guard.mdx @@ -14,7 +14,7 @@ Prompt Guard safety provider for detecting and filtering unsafe prompts and cont | Field | Type | Required | Default | Description | |-------|------|----------|---------|-------------| -| `guard_type` | `` | No | injection | | +| `guard_type` | `str` | No | injection | | ## Sample Configuration diff --git a/docs/docs/providers/safety/remote_bedrock.mdx b/docs/docs/providers/safety/remote_bedrock.mdx index 663a761f03..a543864e71 100644 --- a/docs/docs/providers/safety/remote_bedrock.mdx +++ b/docs/docs/providers/safety/remote_bedrock.mdx @@ -14,13 +14,13 @@ AWS Bedrock safety provider for content moderation using AWS's safety services. | Field | Type | Required | Default | Description | |-------|------|----------|---------|-------------| -| `allowed_models` | `list[str \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. | -| `refresh_models` | `` | No | False | Whether to refresh models periodically from the provider | +| `allowed_models` | `list[str] \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. | +| `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider | | `aws_access_key_id` | `str \| None` | No | | The AWS access key to use. Default use environment variable: AWS_ACCESS_KEY_ID | | `aws_secret_access_key` | `str \| None` | No | | The AWS secret access key to use. Default use environment variable: AWS_SECRET_ACCESS_KEY | | `aws_session_token` | `str \| None` | No | | The AWS session token to use. Default use environment variable: AWS_SESSION_TOKEN | | `region_name` | `str \| None` | No | | The default AWS Region to use, for example, us-west-1 or us-west-2.Default use environment variable: AWS_DEFAULT_REGION | -| `profile_name` | `str \| None` | No | | The profile name that contains credentials to use.Default use environment variable: AWS_PROFILE | +| `profile_name` | `str \| None` | No | tpetkos | The profile name that contains credentials to use.Default use environment variable: AWS_PROFILE | | `total_max_attempts` | `int \| None` | No | | An integer representing the maximum number of attempts that will be made for a single request, including the initial attempt. Default use environment variable: AWS_MAX_ATTEMPTS | | `retry_mode` | `str \| None` | No | | A string representing the type of retries Boto3 will perform.Default use environment variable: AWS_RETRY_MODE | | `connect_timeout` | `float \| None` | No | 60.0 | The time in seconds till a timeout exception is thrown when attempting to make a connection. The default is 60 seconds. | diff --git a/docs/docs/providers/safety/remote_nvidia.mdx b/docs/docs/providers/safety/remote_nvidia.mdx index 0f665e60a5..ac1fd0b03a 100644 --- a/docs/docs/providers/safety/remote_nvidia.mdx +++ b/docs/docs/providers/safety/remote_nvidia.mdx @@ -14,7 +14,7 @@ NVIDIA's safety provider for content moderation and safety filtering. | Field | Type | Required | Default | Description | |-------|------|----------|---------|-------------| -| `guardrails_service_url` | `` | No | http://0.0.0.0:7331 | The url for accessing the Guardrails service | +| `guardrails_service_url` | `str` | No | http://0.0.0.0:7331 | The url for accessing the Guardrails service | | `config_id` | `str \| None` | No | self-check | Guardrails configuration ID to use from the Guardrails configuration store | ## Sample Configuration diff --git a/docs/docs/providers/safety/remote_sambanova.mdx b/docs/docs/providers/safety/remote_sambanova.mdx index da70fce6cc..8c7cf4a879 100644 --- a/docs/docs/providers/safety/remote_sambanova.mdx +++ b/docs/docs/providers/safety/remote_sambanova.mdx @@ -14,7 +14,7 @@ SambaNova's safety provider for content moderation and safety filtering. | Field | Type | Required | Default | Description | |-------|------|----------|---------|-------------| -| `url` | `` | No | https://api.sambanova.ai/v1 | The URL for the SambaNova AI server | +| `url` | `str` | No | https://api.sambanova.ai/v1 | The URL for the SambaNova AI server | | `api_key` | `pydantic.types.SecretStr \| None` | No | | The SambaNova cloud API Key | ## Sample Configuration diff --git a/docs/docs/providers/tool_runtime/remote_bing-search.mdx b/docs/docs/providers/tool_runtime/remote_bing-search.mdx index ec06bc20fb..f97087d9e9 100644 --- a/docs/docs/providers/tool_runtime/remote_bing-search.mdx +++ b/docs/docs/providers/tool_runtime/remote_bing-search.mdx @@ -15,7 +15,7 @@ Bing Search tool for web search capabilities using Microsoft's search engine. | Field | Type | Required | Default | Description | |-------|------|----------|---------|-------------| | `api_key` | `str \| None` | No | | | -| `top_k` | `` | No | 3 | | +| `top_k` | `int` | No | 3 | | ## Sample Configuration diff --git a/docs/docs/providers/tool_runtime/remote_brave-search.mdx b/docs/docs/providers/tool_runtime/remote_brave-search.mdx index 3aeed67d56..987ce0e41c 100644 --- a/docs/docs/providers/tool_runtime/remote_brave-search.mdx +++ b/docs/docs/providers/tool_runtime/remote_brave-search.mdx @@ -15,7 +15,7 @@ Brave Search tool for web search capabilities with privacy-focused results. | Field | Type | Required | Default | Description | |-------|------|----------|---------|-------------| | `api_key` | `str \| None` | No | | The Brave Search API Key | -| `max_results` | `` | No | 3 | The maximum number of results to return | +| `max_results` | `int` | No | 3 | The maximum number of results to return | ## Sample Configuration diff --git a/docs/docs/providers/tool_runtime/remote_tavily-search.mdx b/docs/docs/providers/tool_runtime/remote_tavily-search.mdx index fdca31bbef..36ad636468 100644 --- a/docs/docs/providers/tool_runtime/remote_tavily-search.mdx +++ b/docs/docs/providers/tool_runtime/remote_tavily-search.mdx @@ -15,7 +15,7 @@ Tavily Search tool for AI-optimized web search with structured results. | Field | Type | Required | Default | Description | |-------|------|----------|---------|-------------| | `api_key` | `str \| None` | No | | The Tavily Search API Key | -| `max_results` | `` | No | 3 | The maximum number of results to return | +| `max_results` | `int` | No | 3 | The maximum number of results to return | ## Sample Configuration diff --git a/docs/docs/providers/vector_io/inline_chromadb.mdx b/docs/docs/providers/vector_io/inline_chromadb.mdx index 0be5cd5b39..fe085b1f0f 100644 --- a/docs/docs/providers/vector_io/inline_chromadb.mdx +++ b/docs/docs/providers/vector_io/inline_chromadb.mdx @@ -78,8 +78,8 @@ See [Chroma's documentation](https://docs.trychroma.com/docs/overview/introducti | Field | Type | Required | Default | Description | |-------|------|----------|---------|-------------| -| `db_path` | `` | No | | | -| `persistence` | `` | No | | Config for KV store backend | +| `db_path` | `str` | No | | | +| `persistence` | `llama_stack.core.storage.datatypes.KVStoreReference` | No | | Config for KV store backend | ## Sample Configuration diff --git a/docs/docs/providers/vector_io/inline_faiss.mdx b/docs/docs/providers/vector_io/inline_faiss.mdx index 3a1fba055f..19f8e4e463 100644 --- a/docs/docs/providers/vector_io/inline_faiss.mdx +++ b/docs/docs/providers/vector_io/inline_faiss.mdx @@ -95,7 +95,7 @@ more details about Faiss in general. | Field | Type | Required | Default | Description | |-------|------|----------|---------|-------------| -| `persistence` | `` | No | | | +| `persistence` | `llama_stack.core.storage.datatypes.KVStoreReference` | No | | | ## Sample Configuration diff --git a/docs/docs/providers/vector_io/inline_meta-reference.mdx b/docs/docs/providers/vector_io/inline_meta-reference.mdx index 17fd40cf55..a0aa9f253a 100644 --- a/docs/docs/providers/vector_io/inline_meta-reference.mdx +++ b/docs/docs/providers/vector_io/inline_meta-reference.mdx @@ -14,7 +14,7 @@ Meta's reference implementation of a vector database. | Field | Type | Required | Default | Description | |-------|------|----------|---------|-------------| -| `persistence` | `` | No | | | +| `persistence` | `llama_stack.core.storage.datatypes.KVStoreReference` | No | | | ## Sample Configuration diff --git a/docs/docs/providers/vector_io/inline_milvus.mdx b/docs/docs/providers/vector_io/inline_milvus.mdx index 6063edab14..9225906c7a 100644 --- a/docs/docs/providers/vector_io/inline_milvus.mdx +++ b/docs/docs/providers/vector_io/inline_milvus.mdx @@ -16,9 +16,9 @@ Please refer to the remote provider documentation. | Field | Type | Required | Default | Description | |-------|------|----------|---------|-------------| -| `db_path` | `` | No | | | -| `persistence` | `` | No | | Config for KV store backend (SQLite only for now) | -| `consistency_level` | `` | No | Strong | The consistency level of the Milvus server | +| `db_path` | `str` | No | | | +| `persistence` | `llama_stack.core.storage.datatypes.KVStoreReference` | No | | Config for KV store backend (SQLite only for now) | +| `consistency_level` | `str` | No | Strong | The consistency level of the Milvus server | ## Sample Configuration diff --git a/docs/docs/providers/vector_io/inline_qdrant.mdx b/docs/docs/providers/vector_io/inline_qdrant.mdx index 057d96761b..ef8e4c3087 100644 --- a/docs/docs/providers/vector_io/inline_qdrant.mdx +++ b/docs/docs/providers/vector_io/inline_qdrant.mdx @@ -97,8 +97,8 @@ See the [Qdrant documentation](https://qdrant.tech/documentation/) for more deta | Field | Type | Required | Default | Description | |-------|------|----------|---------|-------------| -| `path` | `` | No | | | -| `persistence` | `` | No | | | +| `path` | `str` | No | | | +| `persistence` | `llama_stack.core.storage.datatypes.KVStoreReference` | No | | | ## Sample Configuration diff --git a/docs/docs/providers/vector_io/inline_sqlite-vec.mdx b/docs/docs/providers/vector_io/inline_sqlite-vec.mdx index bfa2f29de9..128e64cea5 100644 --- a/docs/docs/providers/vector_io/inline_sqlite-vec.mdx +++ b/docs/docs/providers/vector_io/inline_sqlite-vec.mdx @@ -407,8 +407,8 @@ See [sqlite-vec's GitHub repo](https://github.com/asg017/sqlite-vec/tree/main) f | Field | Type | Required | Default | Description | |-------|------|----------|---------|-------------| -| `db_path` | `` | No | | Path to the SQLite database file | -| `persistence` | `` | No | | Config for KV store backend (SQLite only for now) | +| `db_path` | `str` | No | | Path to the SQLite database file | +| `persistence` | `llama_stack.core.storage.datatypes.KVStoreReference` | No | | Config for KV store backend (SQLite only for now) | ## Sample Configuration diff --git a/docs/docs/providers/vector_io/inline_sqlite_vec.mdx b/docs/docs/providers/vector_io/inline_sqlite_vec.mdx index 67cbd00211..77c045a494 100644 --- a/docs/docs/providers/vector_io/inline_sqlite_vec.mdx +++ b/docs/docs/providers/vector_io/inline_sqlite_vec.mdx @@ -16,8 +16,8 @@ Please refer to the sqlite-vec provider documentation. | Field | Type | Required | Default | Description | |-------|------|----------|---------|-------------| -| `db_path` | `` | No | | Path to the SQLite database file | -| `persistence` | `` | No | | Config for KV store backend (SQLite only for now) | +| `db_path` | `str` | No | | Path to the SQLite database file | +| `persistence` | `llama_stack.core.storage.datatypes.KVStoreReference` | No | | Config for KV store backend (SQLite only for now) | ## Sample Configuration diff --git a/docs/docs/providers/vector_io/remote_chromadb.mdx b/docs/docs/providers/vector_io/remote_chromadb.mdx index 2aee3eecab..b0ee11900f 100644 --- a/docs/docs/providers/vector_io/remote_chromadb.mdx +++ b/docs/docs/providers/vector_io/remote_chromadb.mdx @@ -78,7 +78,7 @@ See [Chroma's documentation](https://docs.trychroma.com/docs/overview/introducti | Field | Type | Required | Default | Description | |-------|------|----------|---------|-------------| | `url` | `str \| None` | No | | | -| `persistence` | `` | No | | Config for KV store backend | +| `persistence` | `llama_stack.core.storage.datatypes.KVStoreReference` | No | | Config for KV store backend | ## Sample Configuration diff --git a/docs/docs/providers/vector_io/remote_milvus.mdx b/docs/docs/providers/vector_io/remote_milvus.mdx index bf9935d619..2763ad063d 100644 --- a/docs/docs/providers/vector_io/remote_milvus.mdx +++ b/docs/docs/providers/vector_io/remote_milvus.mdx @@ -405,10 +405,10 @@ For more details on TLS configuration, refer to the [TLS setup guide](https://mi | Field | Type | Required | Default | Description | |-------|------|----------|---------|-------------| -| `uri` | `` | No | | The URI of the Milvus server | +| `uri` | `str` | No | | The URI of the Milvus server | | `token` | `str \| None` | No | | The token of the Milvus server | -| `consistency_level` | `` | No | Strong | The consistency level of the Milvus server | -| `persistence` | `` | No | | Config for KV store backend | +| `consistency_level` | `str` | No | Strong | The consistency level of the Milvus server | +| `persistence` | `llama_stack.core.storage.datatypes.KVStoreReference` | No | | Config for KV store backend | | `config` | `dict` | No | `{}` | This configuration allows additional fields to be passed through to the underlying Milvus client. See the [Milvus](https://milvus.io/docs/install-overview.md) documentation for more details about Milvus in general. | :::note diff --git a/docs/docs/providers/vector_io/remote_qdrant.mdx b/docs/docs/providers/vector_io/remote_qdrant.mdx index dff9642b54..d8070d63a9 100644 --- a/docs/docs/providers/vector_io/remote_qdrant.mdx +++ b/docs/docs/providers/vector_io/remote_qdrant.mdx @@ -19,14 +19,14 @@ Please refer to the inline provider documentation. | `location` | `str \| None` | No | | | | `url` | `str \| None` | No | | | | `port` | `int \| None` | No | 6333 | | -| `grpc_port` | `` | No | 6334 | | -| `prefer_grpc` | `` | No | False | | +| `grpc_port` | `int` | No | 6334 | | +| `prefer_grpc` | `bool` | No | False | | | `https` | `bool \| None` | No | | | | `api_key` | `str \| None` | No | | | | `prefix` | `str \| None` | No | | | | `timeout` | `int \| None` | No | | | | `host` | `str \| None` | No | | | -| `persistence` | `` | No | | | +| `persistence` | `llama_stack.core.storage.datatypes.KVStoreReference` | No | | | ## Sample Configuration From 4ae04f6ed83c551f23428c37d36b7cb685667ee0 Mon Sep 17 00:00:00 2001 From: thepetk Date: Fri, 7 Nov 2025 21:00:59 +0000 Subject: [PATCH 4/5] Introduce extract_type_annotation method Signed-off-by: thepetk --- .../agents/inline_meta-reference.mdx | 2 +- .../providers/batches/inline_reference.mdx | 2 +- .../providers/datasetio/inline_localfs.mdx | 2 +- .../datasetio/remote_huggingface.mdx | 2 +- .../providers/eval/inline_meta-reference.mdx | 2 +- docs/docs/providers/files/inline_localfs.mdx | 2 +- docs/docs/providers/files/remote_openai.mdx | 2 +- docs/docs/providers/files/remote_s3.mdx | 2 +- .../inference/inline_meta-reference.mdx | 2 +- .../providers/inference/remote_anthropic.mdx | 2 +- .../docs/providers/inference/remote_azure.mdx | 4 +- .../providers/inference/remote_cerebras.mdx | 2 +- .../providers/inference/remote_databricks.mdx | 2 +- .../providers/inference/remote_fireworks.mdx | 2 +- .../providers/inference/remote_gemini.mdx | 2 +- docs/docs/providers/inference/remote_groq.mdx | 2 +- .../inference/remote_hf_endpoint.mdx | 2 +- .../inference/remote_hf_serverless.mdx | 2 +- .../inference/remote_llama-openai-compat.mdx | 2 +- .../providers/inference/remote_nvidia.mdx | 2 +- .../providers/inference/remote_openai.mdx | 2 +- .../inference/remote_passthrough.mdx | 2 +- .../providers/inference/remote_runpod.mdx | 2 +- .../providers/inference/remote_sambanova.mdx | 2 +- .../providers/inference/remote_together.mdx | 2 +- docs/docs/providers/inference/remote_vllm.mdx | 2 +- .../providers/inference/remote_watsonx.mdx | 2 +- .../post_training/inline_huggingface-gpu.mdx | 6 +-- .../post_training/inline_torchtune-cpu.mdx | 2 +- .../post_training/inline_torchtune-gpu.mdx | 2 +- .../providers/safety/remote_sambanova.mdx | 2 +- .../providers/vector_io/inline_chromadb.mdx | 2 +- .../docs/providers/vector_io/inline_faiss.mdx | 2 +- .../vector_io/inline_meta-reference.mdx | 2 +- .../providers/vector_io/inline_milvus.mdx | 2 +- .../providers/vector_io/inline_qdrant.mdx | 2 +- .../providers/vector_io/inline_sqlite-vec.mdx | 2 +- .../providers/vector_io/inline_sqlite_vec.mdx | 2 +- .../providers/vector_io/remote_chromadb.mdx | 2 +- .../providers/vector_io/remote_milvus.mdx | 2 +- .../providers/vector_io/remote_pgvector.mdx | 2 +- .../providers/vector_io/remote_qdrant.mdx | 2 +- .../providers/vector_io/remote_weaviate.mdx | 2 +- scripts/provider_codegen.py | 49 ++++++++++++++----- 44 files changed, 84 insertions(+), 57 deletions(-) diff --git a/docs/docs/providers/agents/inline_meta-reference.mdx b/docs/docs/providers/agents/inline_meta-reference.mdx index 91575b5138..99a67feb44 100644 --- a/docs/docs/providers/agents/inline_meta-reference.mdx +++ b/docs/docs/providers/agents/inline_meta-reference.mdx @@ -14,7 +14,7 @@ Meta's reference implementation of an agent system that can use tools, access ve | Field | Type | Required | Default | Description | |-------|------|----------|---------|-------------| -| `persistence` | `inline.agents.meta_reference.config.AgentPersistenceConfig` | No | | | +| `persistence` | `AgentPersistenceConfig` | No | | | ## Sample Configuration diff --git a/docs/docs/providers/batches/inline_reference.mdx b/docs/docs/providers/batches/inline_reference.mdx index b994e0ce4e..0a062c2453 100644 --- a/docs/docs/providers/batches/inline_reference.mdx +++ b/docs/docs/providers/batches/inline_reference.mdx @@ -14,7 +14,7 @@ Reference implementation of batches API with KVStore persistence. | Field | Type | Required | Default | Description | |-------|------|----------|---------|-------------| -| `kvstore` | `llama_stack.core.storage.datatypes.KVStoreReference` | No | | Configuration for the key-value store backend. | +| `kvstore` | `KVStoreReference` | No | | Configuration for the key-value store backend. | | `max_concurrent_batches` | `int` | No | 1 | Maximum number of concurrent batches to process simultaneously. | | `max_concurrent_requests_per_batch` | `int` | No | 10 | Maximum number of concurrent requests to process per batch. | diff --git a/docs/docs/providers/datasetio/inline_localfs.mdx b/docs/docs/providers/datasetio/inline_localfs.mdx index d5130abd55..4314696c5e 100644 --- a/docs/docs/providers/datasetio/inline_localfs.mdx +++ b/docs/docs/providers/datasetio/inline_localfs.mdx @@ -14,7 +14,7 @@ Local filesystem-based dataset I/O provider for reading and writing datasets to | Field | Type | Required | Default | Description | |-------|------|----------|---------|-------------| -| `kvstore` | `llama_stack.core.storage.datatypes.KVStoreReference` | No | | | +| `kvstore` | `KVStoreReference` | No | | | ## Sample Configuration diff --git a/docs/docs/providers/datasetio/remote_huggingface.mdx b/docs/docs/providers/datasetio/remote_huggingface.mdx index d8ef7cd24c..ede8ed631d 100644 --- a/docs/docs/providers/datasetio/remote_huggingface.mdx +++ b/docs/docs/providers/datasetio/remote_huggingface.mdx @@ -14,7 +14,7 @@ HuggingFace datasets provider for accessing and managing datasets from the Huggi | Field | Type | Required | Default | Description | |-------|------|----------|---------|-------------| -| `kvstore` | `llama_stack.core.storage.datatypes.KVStoreReference` | No | | | +| `kvstore` | `KVStoreReference` | No | | | ## Sample Configuration diff --git a/docs/docs/providers/eval/inline_meta-reference.mdx b/docs/docs/providers/eval/inline_meta-reference.mdx index 3ca105410d..f1e923ee8c 100644 --- a/docs/docs/providers/eval/inline_meta-reference.mdx +++ b/docs/docs/providers/eval/inline_meta-reference.mdx @@ -14,7 +14,7 @@ Meta's reference implementation of evaluation tasks with support for multiple la | Field | Type | Required | Default | Description | |-------|------|----------|---------|-------------| -| `kvstore` | `llama_stack.core.storage.datatypes.KVStoreReference` | No | | | +| `kvstore` | `KVStoreReference` | No | | | ## Sample Configuration diff --git a/docs/docs/providers/files/inline_localfs.mdx b/docs/docs/providers/files/inline_localfs.mdx index 8faf476e23..aa3a9232b4 100644 --- a/docs/docs/providers/files/inline_localfs.mdx +++ b/docs/docs/providers/files/inline_localfs.mdx @@ -15,7 +15,7 @@ Local filesystem-based file storage provider for managing files and documents lo | Field | Type | Required | Default | Description | |-------|------|----------|---------|-------------| | `storage_dir` | `str` | No | | Directory to store uploaded files | -| `metadata_store` | `llama_stack.core.storage.datatypes.SqlStoreReference` | No | | SQL store configuration for file metadata | +| `metadata_store` | `SqlStoreReference` | No | | SQL store configuration for file metadata | | `ttl_secs` | `int` | No | 31536000 | | ## Sample Configuration diff --git a/docs/docs/providers/files/remote_openai.mdx b/docs/docs/providers/files/remote_openai.mdx index 9cf82643ed..48fe2fd573 100644 --- a/docs/docs/providers/files/remote_openai.mdx +++ b/docs/docs/providers/files/remote_openai.mdx @@ -15,7 +15,7 @@ OpenAI Files API provider for managing files through OpenAI's native file storag | Field | Type | Required | Default | Description | |-------|------|----------|---------|-------------| | `api_key` | `str` | No | | OpenAI API key for authentication | -| `metadata_store` | `llama_stack.core.storage.datatypes.SqlStoreReference` | No | | SQL store configuration for file metadata | +| `metadata_store` | `SqlStoreReference` | No | | SQL store configuration for file metadata | ## Sample Configuration diff --git a/docs/docs/providers/files/remote_s3.mdx b/docs/docs/providers/files/remote_s3.mdx index 3747bfa8da..857ba1819f 100644 --- a/docs/docs/providers/files/remote_s3.mdx +++ b/docs/docs/providers/files/remote_s3.mdx @@ -20,7 +20,7 @@ AWS S3-based file storage provider for scalable cloud file management with metad | `aws_secret_access_key` | `str \| None` | No | | AWS secret access key (optional if using IAM roles) | | `endpoint_url` | `str \| None` | No | | Custom S3 endpoint URL (for MinIO, LocalStack, etc.) | | `auto_create_bucket` | `bool` | No | False | Automatically create the S3 bucket if it doesn't exist | -| `metadata_store` | `llama_stack.core.storage.datatypes.SqlStoreReference` | No | | SQL store configuration for file metadata | +| `metadata_store` | `SqlStoreReference` | No | | SQL store configuration for file metadata | ## Sample Configuration diff --git a/docs/docs/providers/inference/inline_meta-reference.mdx b/docs/docs/providers/inference/inline_meta-reference.mdx index ffa24b7540..55b1606b06 100644 --- a/docs/docs/providers/inference/inline_meta-reference.mdx +++ b/docs/docs/providers/inference/inline_meta-reference.mdx @@ -21,7 +21,7 @@ Meta's reference implementation of inference with support for various model form | `model_parallel_size` | `int \| None` | No | | | | `create_distributed_process_group` | `bool` | No | True | | | `checkpoint_dir` | `str \| None` | No | | | -| `quantization` | `Bf16QuantizationConfig \| Fp8QuantizationConfig \| Int4QuantizationConfig` | No | | | +| `quantization` | `Bf16QuantizationConfig \| Fp8QuantizationConfig \| Int4QuantizationConfig \| None` | No | | | ## Sample Configuration diff --git a/docs/docs/providers/inference/remote_anthropic.mdx b/docs/docs/providers/inference/remote_anthropic.mdx index e01a5dc4dd..14b4318941 100644 --- a/docs/docs/providers/inference/remote_anthropic.mdx +++ b/docs/docs/providers/inference/remote_anthropic.mdx @@ -16,7 +16,7 @@ Anthropic inference provider for accessing Claude models and Anthropic's AI serv |-------|------|----------|---------|-------------| | `allowed_models` | `list[str] \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. | | `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider | -| `api_key` | `pydantic.types.SecretStr \| None` | No | | Authentication credential for the provider | +| `api_key` | `SecretStr \| None` | No | | Authentication credential for the provider | ## Sample Configuration diff --git a/docs/docs/providers/inference/remote_azure.mdx b/docs/docs/providers/inference/remote_azure.mdx index 384c2a61a2..fd22b157e0 100644 --- a/docs/docs/providers/inference/remote_azure.mdx +++ b/docs/docs/providers/inference/remote_azure.mdx @@ -23,8 +23,8 @@ https://learn.microsoft.com/en-us/azure/ai-foundry/openai/overview |-------|------|----------|---------|-------------| | `allowed_models` | `list[str] \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. | | `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider | -| `api_key` | `pydantic.types.SecretStr \| None` | No | | Authentication credential for the provider | -| `api_base` | `pydantic.networks.HttpUrl` | No | | Azure API base for Azure (e.g., https://your-resource-name.openai.azure.com) | +| `api_key` | `SecretStr \| None` | No | | Authentication credential for the provider | +| `api_base` | `HttpUrl` | No | | Azure API base for Azure (e.g., https://your-resource-name.openai.azure.com) | | `api_version` | `str \| None` | No | | Azure API version for Azure (e.g., 2024-12-01-preview) | | `api_type` | `str \| None` | No | azure | Azure API type for Azure (e.g., azure) | diff --git a/docs/docs/providers/inference/remote_cerebras.mdx b/docs/docs/providers/inference/remote_cerebras.mdx index 4877467439..1fb9530bb9 100644 --- a/docs/docs/providers/inference/remote_cerebras.mdx +++ b/docs/docs/providers/inference/remote_cerebras.mdx @@ -16,7 +16,7 @@ Cerebras inference provider for running models on Cerebras Cloud platform. |-------|------|----------|---------|-------------| | `allowed_models` | `list[str] \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. | | `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider | -| `api_key` | `pydantic.types.SecretStr \| None` | No | | Authentication credential for the provider | +| `api_key` | `SecretStr \| None` | No | | Authentication credential for the provider | | `base_url` | `str` | No | https://api.cerebras.ai | Base URL for the Cerebras API | ## Sample Configuration diff --git a/docs/docs/providers/inference/remote_databricks.mdx b/docs/docs/providers/inference/remote_databricks.mdx index 7feb77297f..7a926baf44 100644 --- a/docs/docs/providers/inference/remote_databricks.mdx +++ b/docs/docs/providers/inference/remote_databricks.mdx @@ -16,7 +16,7 @@ Databricks inference provider for running models on Databricks' unified analytic |-------|------|----------|---------|-------------| | `allowed_models` | `list[str] \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. | | `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider | -| `api_token` | `pydantic.types.SecretStr \| None` | No | | The Databricks API token | +| `api_token` | `SecretStr \| None` | No | | The Databricks API token | | `url` | `str \| None` | No | | The URL for the Databricks model serving endpoint | ## Sample Configuration diff --git a/docs/docs/providers/inference/remote_fireworks.mdx b/docs/docs/providers/inference/remote_fireworks.mdx index a077adfc6f..7db74efc4b 100644 --- a/docs/docs/providers/inference/remote_fireworks.mdx +++ b/docs/docs/providers/inference/remote_fireworks.mdx @@ -16,7 +16,7 @@ Fireworks AI inference provider for Llama models and other AI models on the Fire |-------|------|----------|---------|-------------| | `allowed_models` | `list[str] \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. | | `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider | -| `api_key` | `pydantic.types.SecretStr \| None` | No | | Authentication credential for the provider | +| `api_key` | `SecretStr \| None` | No | | Authentication credential for the provider | | `url` | `str` | No | https://api.fireworks.ai/inference/v1 | The URL for the Fireworks server | ## Sample Configuration diff --git a/docs/docs/providers/inference/remote_gemini.mdx b/docs/docs/providers/inference/remote_gemini.mdx index 10cf4d6a65..75e6b96920 100644 --- a/docs/docs/providers/inference/remote_gemini.mdx +++ b/docs/docs/providers/inference/remote_gemini.mdx @@ -16,7 +16,7 @@ Google Gemini inference provider for accessing Gemini models and Google's AI ser |-------|------|----------|---------|-------------| | `allowed_models` | `list[str] \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. | | `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider | -| `api_key` | `pydantic.types.SecretStr \| None` | No | | Authentication credential for the provider | +| `api_key` | `SecretStr \| None` | No | | Authentication credential for the provider | ## Sample Configuration diff --git a/docs/docs/providers/inference/remote_groq.mdx b/docs/docs/providers/inference/remote_groq.mdx index 7646c57d51..3ebd6f9070 100644 --- a/docs/docs/providers/inference/remote_groq.mdx +++ b/docs/docs/providers/inference/remote_groq.mdx @@ -16,7 +16,7 @@ Groq inference provider for ultra-fast inference using Groq's LPU technology. |-------|------|----------|---------|-------------| | `allowed_models` | `list[str] \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. | | `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider | -| `api_key` | `pydantic.types.SecretStr \| None` | No | | Authentication credential for the provider | +| `api_key` | `SecretStr \| None` | No | | Authentication credential for the provider | | `url` | `str` | No | https://api.groq.com | The URL for the Groq AI server | ## Sample Configuration diff --git a/docs/docs/providers/inference/remote_hf_endpoint.mdx b/docs/docs/providers/inference/remote_hf_endpoint.mdx index 68044a51d6..52b40c1f21 100644 --- a/docs/docs/providers/inference/remote_hf_endpoint.mdx +++ b/docs/docs/providers/inference/remote_hf_endpoint.mdx @@ -15,7 +15,7 @@ HuggingFace Inference Endpoints provider for dedicated model serving. | Field | Type | Required | Default | Description | |-------|------|----------|---------|-------------| | `endpoint_name` | `str` | No | | The name of the Hugging Face Inference Endpoint in the format of '{namespace}/{endpoint_name}' (e.g. 'my-cool-org/meta-llama-3-1-8b-instruct-rce'). Namespace is optional and will default to the user account if not provided. | -| `api_token` | `pydantic.types.SecretStr \| None` | No | | Your Hugging Face user access token (will default to locally saved token if not provided) | +| `api_token` | `SecretStr \| None` | No | | Your Hugging Face user access token (will default to locally saved token if not provided) | ## Sample Configuration diff --git a/docs/docs/providers/inference/remote_hf_serverless.mdx b/docs/docs/providers/inference/remote_hf_serverless.mdx index d3638e01d6..52280df826 100644 --- a/docs/docs/providers/inference/remote_hf_serverless.mdx +++ b/docs/docs/providers/inference/remote_hf_serverless.mdx @@ -15,7 +15,7 @@ HuggingFace Inference API serverless provider for on-demand model inference. | Field | Type | Required | Default | Description | |-------|------|----------|---------|-------------| | `huggingface_repo` | `str` | No | | The model ID of the model on the Hugging Face Hub (e.g. 'meta-llama/Meta-Llama-3.1-70B-Instruct') | -| `api_token` | `pydantic.types.SecretStr \| None` | No | | Your Hugging Face user access token (will default to locally saved token if not provided) | +| `api_token` | `SecretStr \| None` | No | | Your Hugging Face user access token (will default to locally saved token if not provided) | ## Sample Configuration diff --git a/docs/docs/providers/inference/remote_llama-openai-compat.mdx b/docs/docs/providers/inference/remote_llama-openai-compat.mdx index 1b8d80261e..f67f409097 100644 --- a/docs/docs/providers/inference/remote_llama-openai-compat.mdx +++ b/docs/docs/providers/inference/remote_llama-openai-compat.mdx @@ -16,7 +16,7 @@ Llama OpenAI-compatible provider for using Llama models with OpenAI API format. |-------|------|----------|---------|-------------| | `allowed_models` | `list[str] \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. | | `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider | -| `api_key` | `pydantic.types.SecretStr \| None` | No | | Authentication credential for the provider | +| `api_key` | `SecretStr \| None` | No | | Authentication credential for the provider | | `openai_compat_api_base` | `str` | No | https://api.llama.com/compat/v1/ | The URL for the Llama API server | ## Sample Configuration diff --git a/docs/docs/providers/inference/remote_nvidia.mdx b/docs/docs/providers/inference/remote_nvidia.mdx index 77045f4899..6646d8b00a 100644 --- a/docs/docs/providers/inference/remote_nvidia.mdx +++ b/docs/docs/providers/inference/remote_nvidia.mdx @@ -16,7 +16,7 @@ NVIDIA inference provider for accessing NVIDIA NIM models and AI services. |-------|------|----------|---------|-------------| | `allowed_models` | `list[str] \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. | | `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider | -| `api_key` | `pydantic.types.SecretStr \| None` | No | | Authentication credential for the provider | +| `api_key` | `SecretStr \| None` | No | | Authentication credential for the provider | | `url` | `str` | No | https://integrate.api.nvidia.com | A base url for accessing the NVIDIA NIM | | `timeout` | `int` | No | 60 | Timeout for the HTTP requests | | `append_api_version` | `bool` | No | True | When set to false, the API version will not be appended to the base_url. By default, it is true. | diff --git a/docs/docs/providers/inference/remote_openai.mdx b/docs/docs/providers/inference/remote_openai.mdx index 02b70bf507..4931118fdb 100644 --- a/docs/docs/providers/inference/remote_openai.mdx +++ b/docs/docs/providers/inference/remote_openai.mdx @@ -16,7 +16,7 @@ OpenAI inference provider for accessing GPT models and other OpenAI services. |-------|------|----------|---------|-------------| | `allowed_models` | `list[str] \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. | | `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider | -| `api_key` | `pydantic.types.SecretStr \| None` | No | | Authentication credential for the provider | +| `api_key` | `SecretStr \| None` | No | | Authentication credential for the provider | | `base_url` | `str` | No | https://api.openai.com/v1 | Base URL for OpenAI API | ## Sample Configuration diff --git a/docs/docs/providers/inference/remote_passthrough.mdx b/docs/docs/providers/inference/remote_passthrough.mdx index d6e23a51c8..c339fef234 100644 --- a/docs/docs/providers/inference/remote_passthrough.mdx +++ b/docs/docs/providers/inference/remote_passthrough.mdx @@ -16,7 +16,7 @@ Passthrough inference provider for connecting to any external inference service |-------|------|----------|---------|-------------| | `allowed_models` | `list[str] \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. | | `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider | -| `api_key` | `pydantic.types.SecretStr \| None` | No | | API Key for the passthrouth endpoint | +| `api_key` | `SecretStr \| None` | No | | API Key for the passthrouth endpoint | | `url` | `str` | No | | The URL for the passthrough endpoint | ## Sample Configuration diff --git a/docs/docs/providers/inference/remote_runpod.mdx b/docs/docs/providers/inference/remote_runpod.mdx index c0be428660..3b67e157d6 100644 --- a/docs/docs/providers/inference/remote_runpod.mdx +++ b/docs/docs/providers/inference/remote_runpod.mdx @@ -16,7 +16,7 @@ RunPod inference provider for running models on RunPod's cloud GPU platform. |-------|------|----------|---------|-------------| | `allowed_models` | `list[str] \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. | | `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider | -| `api_token` | `pydantic.types.SecretStr \| None` | No | | The API token | +| `api_token` | `SecretStr \| None` | No | | The API token | | `url` | `str \| None` | No | | The URL for the Runpod model serving endpoint | ## Sample Configuration diff --git a/docs/docs/providers/inference/remote_sambanova.mdx b/docs/docs/providers/inference/remote_sambanova.mdx index d67231ef4d..6f4c5d7f66 100644 --- a/docs/docs/providers/inference/remote_sambanova.mdx +++ b/docs/docs/providers/inference/remote_sambanova.mdx @@ -16,7 +16,7 @@ SambaNova inference provider for running models on SambaNova's dataflow architec |-------|------|----------|---------|-------------| | `allowed_models` | `list[str] \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. | | `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider | -| `api_key` | `pydantic.types.SecretStr \| None` | No | | Authentication credential for the provider | +| `api_key` | `SecretStr \| None` | No | | Authentication credential for the provider | | `url` | `str` | No | https://api.sambanova.ai/v1 | The URL for the SambaNova AI server | ## Sample Configuration diff --git a/docs/docs/providers/inference/remote_together.mdx b/docs/docs/providers/inference/remote_together.mdx index e9ac940e4b..43192cc9e2 100644 --- a/docs/docs/providers/inference/remote_together.mdx +++ b/docs/docs/providers/inference/remote_together.mdx @@ -16,7 +16,7 @@ Together AI inference provider for open-source models and collaborative AI devel |-------|------|----------|---------|-------------| | `allowed_models` | `list[str] \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. | | `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider | -| `api_key` | `pydantic.types.SecretStr \| None` | No | | Authentication credential for the provider | +| `api_key` | `SecretStr \| None` | No | | Authentication credential for the provider | | `url` | `str` | No | https://api.together.xyz/v1 | The URL for the Together AI server | ## Sample Configuration diff --git a/docs/docs/providers/inference/remote_vllm.mdx b/docs/docs/providers/inference/remote_vllm.mdx index de5ecf8569..81620dbca9 100644 --- a/docs/docs/providers/inference/remote_vllm.mdx +++ b/docs/docs/providers/inference/remote_vllm.mdx @@ -16,7 +16,7 @@ Remote vLLM inference provider for connecting to vLLM servers. |-------|------|----------|---------|-------------| | `allowed_models` | `list[str] \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. | | `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider | -| `api_token` | `pydantic.types.SecretStr \| None` | No | | The API token | +| `api_token` | `SecretStr \| None` | No | | The API token | | `url` | `str \| None` | No | | The URL for the vLLM model serving endpoint | | `max_tokens` | `int` | No | 4096 | Maximum number of tokens to generate. | | `tls_verify` | `bool \| str` | No | True | Whether to verify TLS certificates. Can be a boolean or a path to a CA certificate file. | diff --git a/docs/docs/providers/inference/remote_watsonx.mdx b/docs/docs/providers/inference/remote_watsonx.mdx index b0ea8c1756..3a1dba3b4b 100644 --- a/docs/docs/providers/inference/remote_watsonx.mdx +++ b/docs/docs/providers/inference/remote_watsonx.mdx @@ -16,7 +16,7 @@ IBM WatsonX inference provider for accessing AI models on IBM's WatsonX platform |-------|------|----------|---------|-------------| | `allowed_models` | `list[str] \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. | | `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider | -| `api_key` | `pydantic.types.SecretStr \| None` | No | | Authentication credential for the provider | +| `api_key` | `SecretStr \| None` | No | | Authentication credential for the provider | | `url` | `str` | No | https://us-south.ml.cloud.ibm.com | A base url for accessing the watsonx.ai | | `project_id` | `str \| None` | No | | The watsonx.ai project ID | | `timeout` | `int` | No | 60 | Timeout for the HTTP requests | diff --git a/docs/docs/providers/post_training/inline_huggingface-gpu.mdx b/docs/docs/providers/post_training/inline_huggingface-gpu.mdx index 4a2f6a9ab8..0d4241b274 100644 --- a/docs/docs/providers/post_training/inline_huggingface-gpu.mdx +++ b/docs/docs/providers/post_training/inline_huggingface-gpu.mdx @@ -15,8 +15,8 @@ HuggingFace-based post-training provider for fine-tuning models using the Huggin | Field | Type | Required | Default | Description | |-------|------|----------|---------|-------------| | `device` | `str` | No | cuda | | -| `distributed_backend` | `Literal['fsdp', 'deepspeed' \| None]` | No | | | -| `checkpoint_format` | `Literal['full_state', 'huggingface' \| None]` | No | huggingface | | +| `distributed_backend` | `Literal[fsdp, deepspeed] \| None` | No | | | +| `checkpoint_format` | `Literal[full_state, huggingface] \| None` | No | huggingface | | | `chat_template` | `str` | No | `<|user|>`
`{input}`
`<|assistant|>`
`{output}` | | | `model_specific_config` | `dict` | No | `{'trust_remote_code': True, 'attn_implementation': 'sdpa'}` | | | `max_seq_length` | `int` | No | 2048 | | @@ -29,7 +29,7 @@ HuggingFace-based post-training provider for fine-tuning models using the Huggin | `dataloader_pin_memory` | `bool` | No | True | | | `dpo_beta` | `float` | No | 0.1 | | | `use_reference_model` | `bool` | No | True | | -| `dpo_loss_type` | `Literal['sigmoid', 'hinge', 'ipo', 'kto_pair']` | No | sigmoid | | +| `dpo_loss_type` | `Literal[sigmoid, hinge, ipo, kto_pair]` | No | sigmoid | | | `dpo_output_dir` | `str` | No | | | ## Sample Configuration diff --git a/docs/docs/providers/post_training/inline_torchtune-cpu.mdx b/docs/docs/providers/post_training/inline_torchtune-cpu.mdx index 3dc4bb0a90..3e2c15d3e3 100644 --- a/docs/docs/providers/post_training/inline_torchtune-cpu.mdx +++ b/docs/docs/providers/post_training/inline_torchtune-cpu.mdx @@ -15,7 +15,7 @@ TorchTune-based post-training provider for fine-tuning and optimizing models usi | Field | Type | Required | Default | Description | |-------|------|----------|---------|-------------| | `torch_seed` | `int \| None` | No | | | -| `checkpoint_format` | `Literal['meta', 'huggingface' \| None]` | No | meta | | +| `checkpoint_format` | `Literal[meta, huggingface] \| None` | No | meta | | ## Sample Configuration diff --git a/docs/docs/providers/post_training/inline_torchtune-gpu.mdx b/docs/docs/providers/post_training/inline_torchtune-gpu.mdx index 8bf1acb8c5..ac222d8a50 100644 --- a/docs/docs/providers/post_training/inline_torchtune-gpu.mdx +++ b/docs/docs/providers/post_training/inline_torchtune-gpu.mdx @@ -15,7 +15,7 @@ TorchTune-based post-training provider for fine-tuning and optimizing models usi | Field | Type | Required | Default | Description | |-------|------|----------|---------|-------------| | `torch_seed` | `int \| None` | No | | | -| `checkpoint_format` | `Literal['meta', 'huggingface' \| None]` | No | meta | | +| `checkpoint_format` | `Literal[meta, huggingface] \| None` | No | meta | | ## Sample Configuration diff --git a/docs/docs/providers/safety/remote_sambanova.mdx b/docs/docs/providers/safety/remote_sambanova.mdx index 8c7cf4a879..69712879cc 100644 --- a/docs/docs/providers/safety/remote_sambanova.mdx +++ b/docs/docs/providers/safety/remote_sambanova.mdx @@ -15,7 +15,7 @@ SambaNova's safety provider for content moderation and safety filtering. | Field | Type | Required | Default | Description | |-------|------|----------|---------|-------------| | `url` | `str` | No | https://api.sambanova.ai/v1 | The URL for the SambaNova AI server | -| `api_key` | `pydantic.types.SecretStr \| None` | No | | The SambaNova cloud API Key | +| `api_key` | `SecretStr \| None` | No | | The SambaNova cloud API Key | ## Sample Configuration diff --git a/docs/docs/providers/vector_io/inline_chromadb.mdx b/docs/docs/providers/vector_io/inline_chromadb.mdx index fe085b1f0f..d78a67b01f 100644 --- a/docs/docs/providers/vector_io/inline_chromadb.mdx +++ b/docs/docs/providers/vector_io/inline_chromadb.mdx @@ -79,7 +79,7 @@ See [Chroma's documentation](https://docs.trychroma.com/docs/overview/introducti | Field | Type | Required | Default | Description | |-------|------|----------|---------|-------------| | `db_path` | `str` | No | | | -| `persistence` | `llama_stack.core.storage.datatypes.KVStoreReference` | No | | Config for KV store backend | +| `persistence` | `KVStoreReference` | No | | Config for KV store backend | ## Sample Configuration diff --git a/docs/docs/providers/vector_io/inline_faiss.mdx b/docs/docs/providers/vector_io/inline_faiss.mdx index 19f8e4e463..c1eedf9db6 100644 --- a/docs/docs/providers/vector_io/inline_faiss.mdx +++ b/docs/docs/providers/vector_io/inline_faiss.mdx @@ -95,7 +95,7 @@ more details about Faiss in general. | Field | Type | Required | Default | Description | |-------|------|----------|---------|-------------| -| `persistence` | `llama_stack.core.storage.datatypes.KVStoreReference` | No | | | +| `persistence` | `KVStoreReference` | No | | | ## Sample Configuration diff --git a/docs/docs/providers/vector_io/inline_meta-reference.mdx b/docs/docs/providers/vector_io/inline_meta-reference.mdx index a0aa9f253a..9266b65b57 100644 --- a/docs/docs/providers/vector_io/inline_meta-reference.mdx +++ b/docs/docs/providers/vector_io/inline_meta-reference.mdx @@ -14,7 +14,7 @@ Meta's reference implementation of a vector database. | Field | Type | Required | Default | Description | |-------|------|----------|---------|-------------| -| `persistence` | `llama_stack.core.storage.datatypes.KVStoreReference` | No | | | +| `persistence` | `KVStoreReference` | No | | | ## Sample Configuration diff --git a/docs/docs/providers/vector_io/inline_milvus.mdx b/docs/docs/providers/vector_io/inline_milvus.mdx index 9225906c7a..e8408a74f3 100644 --- a/docs/docs/providers/vector_io/inline_milvus.mdx +++ b/docs/docs/providers/vector_io/inline_milvus.mdx @@ -17,7 +17,7 @@ Please refer to the remote provider documentation. | Field | Type | Required | Default | Description | |-------|------|----------|---------|-------------| | `db_path` | `str` | No | | | -| `persistence` | `llama_stack.core.storage.datatypes.KVStoreReference` | No | | Config for KV store backend (SQLite only for now) | +| `persistence` | `KVStoreReference` | No | | Config for KV store backend (SQLite only for now) | | `consistency_level` | `str` | No | Strong | The consistency level of the Milvus server | ## Sample Configuration diff --git a/docs/docs/providers/vector_io/inline_qdrant.mdx b/docs/docs/providers/vector_io/inline_qdrant.mdx index ef8e4c3087..8f61557325 100644 --- a/docs/docs/providers/vector_io/inline_qdrant.mdx +++ b/docs/docs/providers/vector_io/inline_qdrant.mdx @@ -98,7 +98,7 @@ See the [Qdrant documentation](https://qdrant.tech/documentation/) for more deta | Field | Type | Required | Default | Description | |-------|------|----------|---------|-------------| | `path` | `str` | No | | | -| `persistence` | `llama_stack.core.storage.datatypes.KVStoreReference` | No | | | +| `persistence` | `KVStoreReference` | No | | | ## Sample Configuration diff --git a/docs/docs/providers/vector_io/inline_sqlite-vec.mdx b/docs/docs/providers/vector_io/inline_sqlite-vec.mdx index 128e64cea5..180a8e6dce 100644 --- a/docs/docs/providers/vector_io/inline_sqlite-vec.mdx +++ b/docs/docs/providers/vector_io/inline_sqlite-vec.mdx @@ -408,7 +408,7 @@ See [sqlite-vec's GitHub repo](https://github.com/asg017/sqlite-vec/tree/main) f | Field | Type | Required | Default | Description | |-------|------|----------|---------|-------------| | `db_path` | `str` | No | | Path to the SQLite database file | -| `persistence` | `llama_stack.core.storage.datatypes.KVStoreReference` | No | | Config for KV store backend (SQLite only for now) | +| `persistence` | `KVStoreReference` | No | | Config for KV store backend (SQLite only for now) | ## Sample Configuration diff --git a/docs/docs/providers/vector_io/inline_sqlite_vec.mdx b/docs/docs/providers/vector_io/inline_sqlite_vec.mdx index 77c045a494..a25ff1b28e 100644 --- a/docs/docs/providers/vector_io/inline_sqlite_vec.mdx +++ b/docs/docs/providers/vector_io/inline_sqlite_vec.mdx @@ -17,7 +17,7 @@ Please refer to the sqlite-vec provider documentation. | Field | Type | Required | Default | Description | |-------|------|----------|---------|-------------| | `db_path` | `str` | No | | Path to the SQLite database file | -| `persistence` | `llama_stack.core.storage.datatypes.KVStoreReference` | No | | Config for KV store backend (SQLite only for now) | +| `persistence` | `KVStoreReference` | No | | Config for KV store backend (SQLite only for now) | ## Sample Configuration diff --git a/docs/docs/providers/vector_io/remote_chromadb.mdx b/docs/docs/providers/vector_io/remote_chromadb.mdx index b0ee11900f..970f4420f6 100644 --- a/docs/docs/providers/vector_io/remote_chromadb.mdx +++ b/docs/docs/providers/vector_io/remote_chromadb.mdx @@ -78,7 +78,7 @@ See [Chroma's documentation](https://docs.trychroma.com/docs/overview/introducti | Field | Type | Required | Default | Description | |-------|------|----------|---------|-------------| | `url` | `str \| None` | No | | | -| `persistence` | `llama_stack.core.storage.datatypes.KVStoreReference` | No | | Config for KV store backend | +| `persistence` | `KVStoreReference` | No | | Config for KV store backend | ## Sample Configuration diff --git a/docs/docs/providers/vector_io/remote_milvus.mdx b/docs/docs/providers/vector_io/remote_milvus.mdx index 2763ad063d..3e8ae71cf7 100644 --- a/docs/docs/providers/vector_io/remote_milvus.mdx +++ b/docs/docs/providers/vector_io/remote_milvus.mdx @@ -408,7 +408,7 @@ For more details on TLS configuration, refer to the [TLS setup guide](https://mi | `uri` | `str` | No | | The URI of the Milvus server | | `token` | `str \| None` | No | | The token of the Milvus server | | `consistency_level` | `str` | No | Strong | The consistency level of the Milvus server | -| `persistence` | `llama_stack.core.storage.datatypes.KVStoreReference` | No | | Config for KV store backend | +| `persistence` | `KVStoreReference` | No | | Config for KV store backend | | `config` | `dict` | No | `{}` | This configuration allows additional fields to be passed through to the underlying Milvus client. See the [Milvus](https://milvus.io/docs/install-overview.md) documentation for more details about Milvus in general. | :::note diff --git a/docs/docs/providers/vector_io/remote_pgvector.mdx b/docs/docs/providers/vector_io/remote_pgvector.mdx index cb70f35d14..cd69e2b2f6 100644 --- a/docs/docs/providers/vector_io/remote_pgvector.mdx +++ b/docs/docs/providers/vector_io/remote_pgvector.mdx @@ -218,7 +218,7 @@ See [PGVector's documentation](https://github.com/pgvector/pgvector) for more de | `db` | `str \| None` | No | postgres | | | `user` | `str \| None` | No | postgres | | | `password` | `str \| None` | No | mysecretpassword | | -| `persistence` | `llama_stack.core.storage.datatypes.KVStoreReference \| None` | No | | Config for KV store backend (SQLite only for now) | +| `persistence` | `KVStoreReference \| None` | No | | Config for KV store backend (SQLite only for now) | ## Sample Configuration diff --git a/docs/docs/providers/vector_io/remote_qdrant.mdx b/docs/docs/providers/vector_io/remote_qdrant.mdx index d8070d63a9..9b5117bcb3 100644 --- a/docs/docs/providers/vector_io/remote_qdrant.mdx +++ b/docs/docs/providers/vector_io/remote_qdrant.mdx @@ -26,7 +26,7 @@ Please refer to the inline provider documentation. | `prefix` | `str \| None` | No | | | | `timeout` | `int \| None` | No | | | | `host` | `str \| None` | No | | | -| `persistence` | `llama_stack.core.storage.datatypes.KVStoreReference` | No | | | +| `persistence` | `KVStoreReference` | No | | | ## Sample Configuration diff --git a/docs/docs/providers/vector_io/remote_weaviate.mdx b/docs/docs/providers/vector_io/remote_weaviate.mdx index b809bed2e3..7a29d0d48a 100644 --- a/docs/docs/providers/vector_io/remote_weaviate.mdx +++ b/docs/docs/providers/vector_io/remote_weaviate.mdx @@ -75,7 +75,7 @@ See [Weaviate's documentation](https://weaviate.io/developers/weaviate) for more |-------|------|----------|---------|-------------| | `weaviate_api_key` | `str \| None` | No | | The API key for the Weaviate instance | | `weaviate_cluster_url` | `str \| None` | No | localhost:8080 | The URL of the Weaviate cluster | -| `persistence` | `llama_stack.core.storage.datatypes.KVStoreReference \| None` | No | | Config for KV store backend (SQLite only for now) | +| `persistence` | `KVStoreReference \| None` | No | | Config for KV store backend (SQLite only for now) | ## Sample Configuration diff --git a/scripts/provider_codegen.py b/scripts/provider_codegen.py index e0cb35bcd9..dd03b8a4f6 100755 --- a/scripts/provider_codegen.py +++ b/scripts/provider_codegen.py @@ -5,11 +5,11 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -import re import subprocess import sys from pathlib import Path -from typing import Any +from types import UnionType +from typing import Annotated, Any, Union, get_args, get_origin from pydantic_core import PydanticUndefined from rich.progress import Progress, SpinnerColumn, TextColumn @@ -52,6 +52,41 @@ def changed_paths(self): return self._changed_paths +def extract_type_annotation(annotation: Any) -> str: + """extract a type annotation into a clean string representation.""" + if annotation is None: + return "Any" + + if annotation is type(None): + return "None" + + origin = get_origin(annotation) + args = get_args(annotation) + + # recursive workaround for Annotated types to ignore FieldInfo part + if origin is Annotated and args: + return extract_type_annotation(args[0]) + + if origin in [Union, UnionType]: + non_none_args = [arg for arg in args if arg is not type(None)] + has_none = len(non_none_args) < len(args) + + if len(non_none_args) == 1: + formatted = extract_type_annotation(non_none_args[0]) + return f"{formatted} | None" if has_none else formatted + else: + formatted_args = [extract_type_annotation(arg) for arg in non_none_args] + result = " | ".join(formatted_args) + return f"{result} | None" if has_none else result + + if origin is not None and args: + origin_name = getattr(origin, "__name__", str(origin)) + formatted_args = [extract_type_annotation(arg) for arg in args] + return f"{origin_name}[{', '.join(formatted_args)}]" + + return annotation.__name__ if hasattr(annotation, "__name__") else str(annotation) + + def get_config_class_info(config_class_path: str) -> dict[str, Any]: """Extract configuration information from a config class.""" try: @@ -79,16 +114,8 @@ def get_config_class_info(config_class_path: str) -> dict[str, Any]: for field_name, field in config_class.model_fields.items(): if getattr(field, "exclude", False): continue - field_type = str(field.annotation) if field.annotation else "Any" - - field_type = field_type.replace("typing.", "") - field_type = field_type.replace("llama_stack.apis.inference.inference.", "") - field_type = field_type.replace("llama_stack.providers.", "") - field_type = re.sub(r"Optional\[([^\]]+)\]", r"\1 | None", field_type) - field_type = re.sub(r"Annotated\[([^,]+),.*?\]", r"\1", field_type) - field_type = re.sub(r"FieldInfo\([^)]*\)", "", field_type) - field_type = re.sub(r"", r"\1", field_type) + field_type = extract_type_annotation(field.annotation) default_value = field.default if field.default_factory is not None: From 0912fbeb7077dbb7d2343bf1f7c01186261aa89d Mon Sep 17 00:00:00 2001 From: thepetk Date: Sun, 9 Nov 2025 18:17:25 +0000 Subject: [PATCH 5/5] Remove profile_name from docs --- docs/docs/providers/safety/remote_bedrock.mdx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/docs/providers/safety/remote_bedrock.mdx b/docs/docs/providers/safety/remote_bedrock.mdx index a543864e71..716c0862c7 100644 --- a/docs/docs/providers/safety/remote_bedrock.mdx +++ b/docs/docs/providers/safety/remote_bedrock.mdx @@ -20,7 +20,7 @@ AWS Bedrock safety provider for content moderation using AWS's safety services. | `aws_secret_access_key` | `str \| None` | No | | The AWS secret access key to use. Default use environment variable: AWS_SECRET_ACCESS_KEY | | `aws_session_token` | `str \| None` | No | | The AWS session token to use. Default use environment variable: AWS_SESSION_TOKEN | | `region_name` | `str \| None` | No | | The default AWS Region to use, for example, us-west-1 or us-west-2.Default use environment variable: AWS_DEFAULT_REGION | -| `profile_name` | `str \| None` | No | tpetkos | The profile name that contains credentials to use.Default use environment variable: AWS_PROFILE | +| `profile_name` | `str \| None` | No | | The profile name that contains credentials to use.Default use environment variable: AWS_PROFILE | | `total_max_attempts` | `int \| None` | No | | An integer representing the maximum number of attempts that will be made for a single request, including the initial attempt. Default use environment variable: AWS_MAX_ATTEMPTS | | `retry_mode` | `str \| None` | No | | A string representing the type of retries Boto3 will perform.Default use environment variable: AWS_RETRY_MODE | | `connect_timeout` | `float \| None` | No | 60.0 | The time in seconds till a timeout exception is thrown when attempting to make a connection. The default is 60 seconds. |