converters

Class info¶

Classes¶

Name	Children	Inherits
BaseConverterConfig llmling_agent.models.converters Base configuration for document converters.	DoclingConverterConfig MarkItDownConfig YouTubeConverterConfig LocalWhisperConfig WhisperAPIConfig GoogleSpeechConfig PlainConverterConfig	BaseModel
ConversionConfig llmling_agent.models.converters Global conversion configuration.		BaseModel
DoclingConverterConfig llmling_agent.models.converters Configuration for docling-based converter.		BaseConverterConfig
`DocumentConverter` llmling_agent_converters.base Base class for document converters.		ABC
GoogleSpeechConfig llmling_agent.models.converters Configuration for Google Cloud Speech-to-Text.		BaseConverterConfig
LocalWhisperConfig llmling_agent.models.converters Configuration for local Whisper model.		BaseConverterConfig
MarkItDownConfig llmling_agent.models.converters Configuration for MarkItDown-based converter.		BaseConverterConfig
PlainConverterConfig llmling_agent.models.converters Configuration for plain text fallback converter.		BaseConverterConfig
WhisperAPIConfig llmling_agent.models.converters Configuration for OpenAI's Whisper API.		BaseConverterConfig
YouTubeConverterConfig llmling_agent.models.converters Configuration for YouTube transcript converter.		BaseConverterConfig

🛈 DocStrings¶

BaseConverterConfig ¶

Bases: BaseModel

Base configuration for document converters.

Source code in src/llmling_agent/models/converters.py

class BaseConverterConfig(BaseModel):
    """Base configuration for document converters."""

    type: str = Field(init=False)
    """Type discriminator for converter configs."""

    enabled: bool = True
    """Whether this converter is currently active."""

    model_config = ConfigDict(frozen=True, use_attribute_docstrings=True, extra="forbid")

    def get_converter(self) -> DocumentConverter:
        """Get the converter instance."""
        raise NotImplementedError

enabled `class-attribute` `instance-attribute` ¶

enabled: bool = True

Whether this converter is currently active.

type `class-attribute` `instance-attribute` ¶

type: str = Field(init=False)

Type discriminator for converter configs.

get_converter ¶

get_converter() -> DocumentConverter

Get the converter instance.

Source code in src/llmling_agent/models/converters.py

def get_converter(self) -> DocumentConverter:
    """Get the converter instance."""
    raise NotImplementedError

ConversionConfig ¶

Bases: BaseModel

Global conversion configuration.

Source code in src/llmling_agent/models/converters.py

class ConversionConfig(BaseModel):
    """Global conversion configuration."""

    providers: list[ConverterConfig] | None = None
    """List of configured converter providers."""

    default_provider: str | None = None
    """Name of default provider for conversions."""

    max_size: int | None = None
    """Global size limit for all converters."""

    model_config = ConfigDict(frozen=True, use_attribute_docstrings=True, extra="forbid")

default_provider `class-attribute` `instance-attribute` ¶

default_provider: str | None = None

Name of default provider for conversions.

max_size `class-attribute` `instance-attribute` ¶

max_size: int | None = None

Global size limit for all converters.

providers `class-attribute` `instance-attribute` ¶

providers: list[ConverterConfig] | None = None

List of configured converter providers.

DoclingConverterConfig ¶

Bases: BaseConverterConfig

Configuration for docling-based converter.

Source code in src/llmling_agent/models/converters.py

class DoclingConverterConfig(BaseConverterConfig):
    """Configuration for docling-based converter."""

    type: Literal["docling"] = Field("docling", init=False)
    """Type discriminator for docling converter."""

    max_size: int | None = None
    """Optional size limit in bytes."""

    def get_converter(self) -> DocumentConverter:
        """Get the converter instance."""
        from llmling_agent_converters.docling import DoclingConverter

        return DoclingConverter(self)

max_size `class-attribute` `instance-attribute` ¶

max_size: int | None = None

Optional size limit in bytes.

type `class-attribute` `instance-attribute` ¶

type: Literal['docling'] = Field('docling', init=False)

Type discriminator for docling converter.

get_converter ¶

get_converter() -> DocumentConverter

Get the converter instance.

Source code in src/llmling_agent/models/converters.py

def get_converter(self) -> DocumentConverter:
    """Get the converter instance."""
    from llmling_agent_converters.docling import DoclingConverter

    return DoclingConverter(self)

GoogleSpeechConfig ¶

Bases: BaseConverterConfig

Configuration for Google Cloud Speech-to-Text.

Source code in src/llmling_agent/models/converters.py

class GoogleSpeechConfig(BaseConverterConfig):
    """Configuration for Google Cloud Speech-to-Text."""

    type: Literal["google_speech"] = Field("google_speech", init=False)
    """Type discriminator for converter config."""

    language: str = "en-US"
    """Language code for transcription."""

    model: str = "default"
    """Speech model to use."""

    encoding: Literal["LINEAR16", "FLAC", "MP3"] = "LINEAR16"
    """Audio encoding format."""

    def get_converter(self) -> DocumentConverter:
        """Get the converter instance."""
        from llmling_agent_converters.google_speech import GoogleSpeechConverter

        return GoogleSpeechConverter(self)

encoding `class-attribute` `instance-attribute` ¶

encoding: Literal['LINEAR16', 'FLAC', 'MP3'] = 'LINEAR16'

Audio encoding format.

language `class-attribute` `instance-attribute` ¶

language: str = 'en-US'

Language code for transcription.

model `class-attribute` `instance-attribute` ¶

model: str = 'default'

Speech model to use.

type `class-attribute` `instance-attribute` ¶

type: Literal['google_speech'] = Field('google_speech', init=False)

Type discriminator for converter config.

get_converter ¶

get_converter() -> DocumentConverter

Get the converter instance.

Source code in src/llmling_agent/models/converters.py

def get_converter(self) -> DocumentConverter:
    """Get the converter instance."""
    from llmling_agent_converters.google_speech import GoogleSpeechConverter

    return GoogleSpeechConverter(self)

LocalWhisperConfig ¶

Bases: BaseConverterConfig

Configuration for local Whisper model.

Source code in src/llmling_agent/models/converters.py

class LocalWhisperConfig(BaseConverterConfig):
    """Configuration for local Whisper model."""

    type: Literal["local_whisper"] = Field("local_whisper", init=False)
    """Type discriminator for converter config."""

    model: str | None = None
    """Optional model name."""

    model_size: Literal["tiny", "base", "small", "medium", "large"] = "base"
    """Size of the Whisper model to use."""

    device: Literal["cpu", "cuda"] | None = None
    """Device to run model on (None for auto-select)."""

    compute_type: Literal["float32", "float16"] = "float16"
    """Compute precision to use."""

    def get_converter(self) -> DocumentConverter:
        """Get the converter instance."""
        from llmling_agent_converters.local_whisper import LocalWhisperConverter

        return LocalWhisperConverter(self)

compute_type `class-attribute` `instance-attribute` ¶

compute_type: Literal['float32', 'float16'] = 'float16'

Compute precision to use.

device `class-attribute` `instance-attribute` ¶

device: Literal['cpu', 'cuda'] | None = None

Device to run model on (None for auto-select).

model `class-attribute` `instance-attribute` ¶

model: str | None = None

Optional model name.

model_size `class-attribute` `instance-attribute` ¶

model_size: Literal['tiny', 'base', 'small', 'medium', 'large'] = 'base'

Size of the Whisper model to use.

type `class-attribute` `instance-attribute` ¶

type: Literal['local_whisper'] = Field('local_whisper', init=False)

Type discriminator for converter config.

get_converter ¶

get_converter() -> DocumentConverter

Get the converter instance.

Source code in src/llmling_agent/models/converters.py

def get_converter(self) -> DocumentConverter:
    """Get the converter instance."""
    from llmling_agent_converters.local_whisper import LocalWhisperConverter

    return LocalWhisperConverter(self)

MarkItDownConfig ¶

Bases: BaseConverterConfig

Configuration for MarkItDown-based converter.

Source code in src/llmling_agent/models/converters.py

class MarkItDownConfig(BaseConverterConfig):
    """Configuration for MarkItDown-based converter."""

    type: Literal["markitdown"] = Field("markitdown", init=False)
    """Type discriminator for MarkItDown converter."""

    max_size: int | None = None
    """Optional size limit in bytes."""

    def get_converter(self) -> DocumentConverter:
        """Get the converter instance."""
        from llmling_agent_converters.markitdown_converter import MarkItDownConverter

        return MarkItDownConverter(self)

max_size `class-attribute` `instance-attribute` ¶

max_size: int | None = None

Optional size limit in bytes.

type `class-attribute` `instance-attribute` ¶

type: Literal['markitdown'] = Field('markitdown', init=False)

Type discriminator for MarkItDown converter.

get_converter ¶

get_converter() -> DocumentConverter

Get the converter instance.

Source code in src/llmling_agent/models/converters.py

def get_converter(self) -> DocumentConverter:
    """Get the converter instance."""
    from llmling_agent_converters.markitdown_converter import MarkItDownConverter

    return MarkItDownConverter(self)

PlainConverterConfig ¶

Bases: BaseConverterConfig

Configuration for plain text fallback converter.

Source code in src/llmling_agent/models/converters.py

class PlainConverterConfig(BaseConverterConfig):
    """Configuration for plain text fallback converter."""

    type: Literal["plain"] = Field("plain", init=False)
    """Type discriminator for plain text converter."""

    force: bool = False
    """Whether to attempt converting any file type."""

    def get_converter(self) -> DocumentConverter:
        """Get the converter instance."""
        from llmling_agent_converters.plain_converter import PlainConverter

        return PlainConverter(self)

force `class-attribute` `instance-attribute` ¶

force: bool = False

Whether to attempt converting any file type.

type `class-attribute` `instance-attribute` ¶

type: Literal['plain'] = Field('plain', init=False)

Type discriminator for plain text converter.

get_converter ¶

get_converter() -> DocumentConverter

Get the converter instance.

Source code in src/llmling_agent/models/converters.py

def get_converter(self) -> DocumentConverter:
    """Get the converter instance."""
    from llmling_agent_converters.plain_converter import PlainConverter

    return PlainConverter(self)

WhisperAPIConfig ¶

Bases: BaseConverterConfig

Configuration for OpenAI's Whisper API.

Source code in src/llmling_agent/models/converters.py

class WhisperAPIConfig(BaseConverterConfig):
    """Configuration for OpenAI's Whisper API."""

    type: Literal["whisper_api"] = Field("whisper_api", init=False)
    """Type discriminator for converter config."""

    model: str | None = None
    """Optional model name."""

    api_key: SecretStr | None = None
    """OpenAI API key."""

    language: str | None = None
    """Optional language code."""

    def get_converter(self) -> DocumentConverter:
        """Get the converter instance."""
        from llmling_agent_converters.whisper_api import WhisperAPIConverter

        return WhisperAPIConverter(self)

api_key `class-attribute` `instance-attribute` ¶

api_key: SecretStr | None = None

OpenAI API key.

language `class-attribute` `instance-attribute` ¶

language: str | None = None

Optional language code.

model `class-attribute` `instance-attribute` ¶

model: str | None = None

Optional model name.

type `class-attribute` `instance-attribute` ¶

type: Literal['whisper_api'] = Field('whisper_api', init=False)

Type discriminator for converter config.

get_converter ¶

get_converter() -> DocumentConverter

Get the converter instance.

Source code in src/llmling_agent/models/converters.py

def get_converter(self) -> DocumentConverter:
    """Get the converter instance."""
    from llmling_agent_converters.whisper_api import WhisperAPIConverter

    return WhisperAPIConverter(self)

YouTubeConverterConfig ¶

Bases: BaseConverterConfig

Configuration for YouTube transcript converter.

Source code in src/llmling_agent/models/converters.py

class YouTubeConverterConfig(BaseConverterConfig):
    """Configuration for YouTube transcript converter."""

    type: Literal["youtube"] = Field("youtube", init=False)
    """Type discriminator for converter config."""

    languages: list[str] = Field(default_factory=lambda: ["en"])
    """Preferred language codes in priority order. Defaults to ['en']."""

    format: FormatterType = "text"
    """Output format. One of: text, json, vtt, srt."""

    preserve_formatting: bool = False
    """Whether to keep HTML formatting elements like <i> and <b>."""

    cookies_path: str | None = None
    """Optional path to cookies file for age-restricted videos."""

    https_proxy: str | None = None
    """Optional HTTPS proxy URL (format: https://user:pass@domain:port)."""

    max_retries: int = 3
    """Maximum number of retries for failed requests."""

    timeout: int = 30
    """Request timeout in seconds."""

    def get_converter(self) -> DocumentConverter:
        """Get the converter instance."""
        from llmling_agent_converters.youtubeconverter import YouTubeTranscriptConverter

        return YouTubeTranscriptConverter(self)

cookies_path `class-attribute` `instance-attribute` ¶

cookies_path: str | None = None

Optional path to cookies file for age-restricted videos.

format `class-attribute` `instance-attribute` ¶

format: FormatterType = 'text'

Output format. One of: text, json, vtt, srt.

https_proxy `class-attribute` `instance-attribute` ¶

https_proxy: str | None = None

Optional HTTPS proxy URL (format: https://user:pass@domain:port).

languages `class-attribute` `instance-attribute` ¶

languages: list[str] = Field(default_factory=lambda: ['en'])

Preferred language codes in priority order. Defaults to ['en'].

max_retries `class-attribute` `instance-attribute` ¶

max_retries: int = 3

Maximum number of retries for failed requests.

preserve_formatting `class-attribute` `instance-attribute` ¶

preserve_formatting: bool = False

Whether to keep HTML formatting elements like and .

timeout class-attribute instance-attribute ¶

timeout: int = 30

Request timeout in seconds.

type class-attribute instance-attribute ¶

type: Literal['youtube'] = Field('youtube', init=False)

Type discriminator for converter config.

get_converter ¶

get_converter() -> DocumentConverter

Get the converter instance.

Source code in src/llmling_agent/models/converters.py

86 87 88 89 90
def get_converter(self) -> DocumentConverter: """Get the converter instance.""" from llmling_agent_converters.youtubeconverter import YouTubeTranscriptConverter return YouTubeTranscriptConverter(self)

converters

Class info¶

Classes¶

🛈 DocStrings¶

BaseConverterConfig ¶

enabled class-attribute instance-attribute ¶

type class-attribute instance-attribute ¶

get_converter ¶

ConversionConfig ¶

default_provider class-attribute instance-attribute ¶

max_size class-attribute instance-attribute ¶

providers class-attribute instance-attribute ¶

DoclingConverterConfig ¶

max_size class-attribute instance-attribute ¶

type class-attribute instance-attribute ¶

get_converter ¶

GoogleSpeechConfig ¶

encoding class-attribute instance-attribute ¶

language class-attribute instance-attribute ¶

model class-attribute instance-attribute ¶

type class-attribute instance-attribute ¶

get_converter ¶

LocalWhisperConfig ¶

compute_type class-attribute instance-attribute ¶

device class-attribute instance-attribute ¶

model class-attribute instance-attribute ¶

model_size class-attribute instance-attribute ¶

type class-attribute instance-attribute ¶

get_converter ¶

MarkItDownConfig ¶

max_size class-attribute instance-attribute ¶

type class-attribute instance-attribute ¶

get_converter ¶

PlainConverterConfig ¶

force class-attribute instance-attribute ¶

type class-attribute instance-attribute ¶

get_converter ¶

WhisperAPIConfig ¶

api_key class-attribute instance-attribute ¶

language class-attribute instance-attribute ¶

model class-attribute instance-attribute ¶

type class-attribute instance-attribute ¶

get_converter ¶

YouTubeConverterConfig ¶

cookies_path class-attribute instance-attribute ¶

format class-attribute instance-attribute ¶

https_proxy class-attribute instance-attribute ¶

languages class-attribute instance-attribute ¶

max_retries class-attribute instance-attribute ¶

preserve_formatting class-attribute instance-attribute ¶

timeout class-attribute instance-attribute ¶

type class-attribute instance-attribute ¶

get_converter ¶

enabled `class-attribute` `instance-attribute` ¶

type `class-attribute` `instance-attribute` ¶

default_provider `class-attribute` `instance-attribute` ¶

max_size `class-attribute` `instance-attribute` ¶

providers `class-attribute` `instance-attribute` ¶

max_size `class-attribute` `instance-attribute` ¶

type `class-attribute` `instance-attribute` ¶

encoding `class-attribute` `instance-attribute` ¶

language `class-attribute` `instance-attribute` ¶

model `class-attribute` `instance-attribute` ¶

type `class-attribute` `instance-attribute` ¶

compute_type `class-attribute` `instance-attribute` ¶

device `class-attribute` `instance-attribute` ¶

model `class-attribute` `instance-attribute` ¶

model_size `class-attribute` `instance-attribute` ¶

type `class-attribute` `instance-attribute` ¶

max_size `class-attribute` `instance-attribute` ¶

type `class-attribute` `instance-attribute` ¶

force `class-attribute` `instance-attribute` ¶

type `class-attribute` `instance-attribute` ¶

api_key `class-attribute` `instance-attribute` ¶

language `class-attribute` `instance-attribute` ¶

model `class-attribute` `instance-attribute` ¶

type `class-attribute` `instance-attribute` ¶

cookies_path `class-attribute` `instance-attribute` ¶

format `class-attribute` `instance-attribute` ¶

https_proxy `class-attribute` `instance-attribute` ¶

languages `class-attribute` `instance-attribute` ¶

max_retries `class-attribute` `instance-attribute` ¶

preserve_formatting `class-attribute` `instance-attribute` ¶

timeout `class-attribute` `instance-attribute` ¶

type `class-attribute` `instance-attribute` ¶