count_tokens

Class info¶

🛈 DocStrings¶

Token counting utilities with fallback strategies.

batch_count_tokens ¶

batch_count_tokens(texts: Sequence[str], model: str | None = None) -> list[int]

Count tokens for multiple texts.

Parameters:

Name	Type	Description	Default
`texts`	`Sequence[str]`	Sequence of texts to count	required
`model`	`str \| None`	Optional model name for tiktoken	`None`

Returns:

Type	Description
`list[int]`	List of token counts

Source code in src/llmling_agent/utils/count_tokens.py

def batch_count_tokens(texts: Sequence[str], model: str | None = None) -> list[int]:
    """Count tokens for multiple texts.

    Args:
        texts: Sequence of texts to count
        model: Optional model name for tiktoken

    Returns:
        List of token counts
    """
    if has_tiktoken():
        import tiktoken

        encoding = tiktoken.encoding_for_model(model or "gpt-3.5-turbo")
        return [len(encoding.encode(text)) for text in texts]

    return [count_tokens(text) for text in texts]

count_tokens ¶

count_tokens(text: str, model: str | None = None) -> int

Count tokens in text with fallback strategy.

Parameters:

Name	Type	Description	Default
`text`	`str`	Text to count tokens for	required
`model`	`str \| None`	Optional model name for tiktoken (ignored in fallback)	`None`

Returns:

Type	Description
`int`	Estimated token count

Source code in src/llmling_agent/utils/count_tokens.py

def count_tokens(text: str, model: str | None = None) -> int:
    """Count tokens in text with fallback strategy.

    Args:
        text: Text to count tokens for
        model: Optional model name for tiktoken (ignored in fallback)

    Returns:
        Estimated token count
    """
    if has_tiktoken():
        import tiktoken

        encoding = tiktoken.encoding_for_model(model or "gpt-3.5-turbo")
        return len(encoding.encode(text))

    # Fallback: very rough approximation
    # Strategies could be:
    # 1. ~4 chars per token (quick but rough)
    # 2. Word count * 1.3 (better for English)
    # 3. Split on common token boundaries
    return len(text.split()) + len(text) // 4

has_tiktoken `cached` ¶

has_tiktoken() -> bool

Check if tiktoken is available.

Source code in src/llmling_agent/utils/count_tokens.py

@lru_cache
def has_tiktoken() -> bool:
    """Check if tiktoken is available."""
    return bool(find_spec("tiktoken"))

count_tokens

Class info¶

🛈 DocStrings¶

batch_count_tokens ¶

count_tokens ¶

has_tiktoken cached ¶

has_tiktoken `cached` ¶