Skip to content

specifications

specifications

Core specification models for pipeline configuration.

These Pydantic models define the configuration contracts for all pipeline components, following the principle of separation between configuration (what to do) and execution (how to do it).

DataSourceType

Bases: str, Enum

Supported data source types.

LLMProvider

Bases: str, Enum

Supported LLM providers.

ErrorPolicy

Bases: str, Enum

Error handling policies for processing failures.

MergeStrategy

Bases: str, Enum

Output merge strategies.

DatasetSpec

Bases: BaseModel

Specification for data source configuration.

validate_source_path classmethod

validate_source_path(v: str | Path | None) -> Path | None

Convert string paths to Path objects.

Source code in ondine/core/specifications.py
@field_validator("source_path")
@classmethod
def validate_source_path(cls, v: str | Path | None) -> Path | None:
    """Convert string paths to Path objects."""
    if v is None:
        return None
    return Path(v) if isinstance(v, str) else v

validate_no_overlap classmethod

validate_no_overlap(v: list[str], info: Any) -> list[str]

Ensure output columns don't overlap with input columns.

Source code in ondine/core/specifications.py
@field_validator("output_columns")
@classmethod
def validate_no_overlap(cls, v: list[str], info: Any) -> list[str]:
    """Ensure output columns don't overlap with input columns."""
    if "input_columns" in info.data:
        input_cols = set(info.data["input_columns"])
        output_cols = set(v)
        overlap = input_cols & output_cols
        if overlap:
            raise ValueError(f"Output columns overlap with input: {overlap}")
    return v

PromptSpec

Bases: BaseModel

Specification for prompt template configuration.

validate_template classmethod

validate_template(v: str) -> str

Validate template has at least one variable.

Source code in ondine/core/specifications.py
@field_validator("template")
@classmethod
def validate_template(cls, v: str) -> str:
    """Validate template has at least one variable."""
    if "{" not in v or "}" not in v:
        raise ValueError(
            "Template must contain at least one variable in {var} format"
        )
    return v

validate_response_format classmethod

validate_response_format(v: str) -> str

Validate response format is supported.

Source code in ondine/core/specifications.py
@field_validator("response_format")
@classmethod
def validate_response_format(cls, v: str) -> str:
    """Validate response format is supported."""
    allowed = ["raw", "json", "regex"]
    if v not in allowed:
        raise ValueError(f"response_format must be one of {allowed}, got '{v}'")
    return v

LLMSpec

Bases: BaseModel

Specification for LLM provider configuration.

validate_base_url_format classmethod

validate_base_url_format(v: str | None) -> str | None

Validate base_url is a valid HTTP(S) URL with a host.

Source code in ondine/core/specifications.py
@field_validator("base_url")
@classmethod
def validate_base_url_format(cls, v: str | None) -> str | None:
    """Validate base_url is a valid HTTP(S) URL with a host."""
    if v is None:
        return v
    from urllib.parse import urlparse

    parsed = urlparse(v)
    if parsed.scheme not in {"http", "https"}:
        raise ValueError("base_url must start with http:// or https://")
    if not parsed.netloc:
        raise ValueError(
            "base_url must include a host (e.g., localhost, api.example.com)"
        )
    return v

validate_azure_config classmethod

validate_azure_config(v: str | None, info: Any) -> str | None

Validate Azure-specific configuration.

Source code in ondine/core/specifications.py
@field_validator("azure_endpoint", "azure_deployment")
@classmethod
def validate_azure_config(cls, v: str | None, info: Any) -> str | None:
    """Validate Azure-specific configuration."""
    if info.data.get("provider") == LLMProvider.AZURE_OPENAI and v is None:
        field_name = info.field_name
        raise ValueError(f"{field_name} required for Azure OpenAI provider")
    return v

validate_provider_requirements

validate_provider_requirements() -> LLMSpec

Validate provider-specific requirements.

Source code in ondine/core/specifications.py
@model_validator(mode="after")
def validate_provider_requirements(self) -> "LLMSpec":
    """Validate provider-specific requirements."""
    # Check openai_compatible requires base_url
    if self.provider == LLMProvider.OPENAI_COMPATIBLE and self.base_url is None:
        raise ValueError("base_url required for openai_compatible provider")
    return self

ProcessingSpec

Bases: BaseModel

Specification for processing parameters.

validate_checkpoint_dir classmethod

validate_checkpoint_dir(v: str | Path) -> Path

Convert string paths to Path objects.

Source code in ondine/core/specifications.py
@field_validator("checkpoint_dir")
@classmethod
def validate_checkpoint_dir(cls, v: str | Path) -> Path:
    """Convert string paths to Path objects."""
    return Path(v) if isinstance(v, str) else v

OutputSpec

Bases: BaseModel

Specification for output configuration.

validate_destination_path classmethod

validate_destination_path(v: str | Path | None) -> Path | None

Convert string paths to Path objects.

Source code in ondine/core/specifications.py
@field_validator("destination_path")
@classmethod
def validate_destination_path(cls, v: str | Path | None) -> Path | None:
    """Convert string paths to Path objects."""
    if v is None:
        return None
    return Path(v) if isinstance(v, str) else v

PipelineSpecifications

Bases: BaseModel

Container for all pipeline specifications.

LLMProviderPresets

Pre-configured LLM provider specifications for common use cases.

These presets provide convenient access to popular LLM providers with correct base URLs, pricing, and configuration. API keys must be provided at runtime via environment variables or explicit overrides.

Example

Use preset with env var API key

from ondine.core.specifications import LLMProviderPresets

pipeline = ( PipelineBuilder.create() .from_csv("data.csv", input_columns=["text"], output_columns=["result"]) .with_prompt("Process: {text}") .with_llm_spec(LLMProviderPresets.TOGETHER_AI_LLAMA_70B) .build() )

Override API key

spec = LLMProviderPresets.TOGETHER_AI_LLAMA_70B.model_copy( update={"api_key": "your-key"} # pragma: allowlist secret ) pipeline.with_llm_spec(spec)

Security Note

All presets have api_key=None by default. You must provide API keys at runtime via environment variables or explicit overrides.

create_custom_openai_compatible classmethod

create_custom_openai_compatible(provider_name: str, model: str, base_url: str, input_cost_per_1k: float = 0.0, output_cost_per_1k: float = 0.0, **kwargs) -> LLMSpec

Factory method for custom OpenAI-compatible providers.

Use this for providers like vLLM, LocalAI, Anyscale, or any custom OpenAI-compatible API endpoint.

Parameters:

Name Type Description Default
provider_name str

Display name for the provider (for logging/metrics)

required
model str

Model identifier

required
base_url str

API endpoint URL (e.g., http://localhost:8000/v1)

required
input_cost_per_1k float

Input token cost per 1K tokens (default: 0.0)

0.0
output_cost_per_1k float

Output token cost per 1K tokens (default: 0.0)

0.0
**kwargs

Additional LLMSpec parameters (temperature, max_tokens, etc.)

{}

Returns:

Type Description
LLMSpec

Configured LLMSpec for the custom provider

Example

spec = LLMProviderPresets.create_custom_openai_compatible( provider_name="My vLLM Server", model="mistral-7b-instruct", base_url="http://my-server:8000/v1", temperature=0.7 )

Source code in ondine/core/specifications.py
@classmethod
def create_custom_openai_compatible(
    cls,
    provider_name: str,
    model: str,
    base_url: str,
    input_cost_per_1k: float = 0.0,
    output_cost_per_1k: float = 0.0,
    **kwargs,
) -> LLMSpec:
    """
    Factory method for custom OpenAI-compatible providers.

    Use this for providers like vLLM, LocalAI, Anyscale, or any custom
    OpenAI-compatible API endpoint.

    Args:
        provider_name: Display name for the provider (for logging/metrics)
        model: Model identifier
        base_url: API endpoint URL (e.g., http://localhost:8000/v1)
        input_cost_per_1k: Input token cost per 1K tokens (default: 0.0)
        output_cost_per_1k: Output token cost per 1K tokens (default: 0.0)
        **kwargs: Additional LLMSpec parameters (temperature, max_tokens, etc.)

    Returns:
        Configured LLMSpec for the custom provider

    Example:
        spec = LLMProviderPresets.create_custom_openai_compatible(
            provider_name="My vLLM Server",
            model="mistral-7b-instruct",
            base_url="http://my-server:8000/v1",
            temperature=0.7
        )
    """
    return LLMSpec(
        provider=LLMProvider.OPENAI_COMPATIBLE,
        provider_name=provider_name,
        model=model,
        base_url=base_url,
        input_cost_per_1k_tokens=Decimal(str(input_cost_per_1k)),
        output_cost_per_1k_tokens=Decimal(str(output_cost_per_1k)),
        **kwargs,
    )