Manages document conversion using configured providers.
In order to not make things super complex, all Converters will be implemented as sync.
The manager will handle async I/O and thread pooling.
Source code in src/llmling_agent/prompts/conversion_manager.py
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100 | class ConversionManager:
"""Manages document conversion using configured providers.
In order to not make things super complex, all Converters will be implemented as sync.
The manager will handle async I/O and thread pooling.
"""
def __init__(self, config: ConversionConfig | list[DocumentConverter]):
if isinstance(config, list):
self.config = ConversionConfig()
self._converters = config
else:
self.config = config
self._converters = self._setup_converters()
self._executor = ThreadPoolExecutor(max_workers=3)
def __del__(self):
self._executor.shutdown(wait=False)
def supports_file(self, path: JoinablePathLike) -> bool:
"""Check if any converter supports the file."""
return any(c.supports_file(path) for c in self._converters)
def supports_content(self, content: Any, mime_type: str | None = None) -> bool:
"""Check if any converter supports the file."""
return any(c.supports_content(content, mime_type) for c in self._converters)
def _setup_converters(self) -> list[DocumentConverter]:
"""Create converter instances from config."""
from llmling_agent_converters.plain_converter import PlainConverter
converters = [i.get_converter() for i in self.config.providers or [] if i.enabled]
# Always add PlainConverter as fallback
# if it gets configured by user, that one gets preference.
converters.append(PlainConverter())
return converters
async def convert_file(self, path: JoinablePathLike) -> str:
"""Convert file using first supporting converter."""
loop = asyncio.get_running_loop()
content = await read_path(path, "rb")
for converter in self._converters:
# Run support check in thread pool
supports = await loop.run_in_executor(
self._executor, converter.supports_file, path
)
if not supports:
continue
# Run conversion in thread pool
import mimetypes
typ = mimetypes.guess_type(str(path))[0]
return await loop.run_in_executor(
self._executor,
converter.convert_content,
content,
typ,
)
return str(content)
async def convert_content(self, content: Any, mime_type: str | None = None) -> str:
"""Convert content using first supporting converter."""
loop = asyncio.get_running_loop()
for converter in self._converters:
# Run support check in thread pool
supports = await loop.run_in_executor(
self._executor, converter.supports_content, content, mime_type
)
if not supports:
continue
# Run conversion in thread pool
return await loop.run_in_executor(
self._executor, converter.convert_content, content, mime_type
)
return str(content) # Fallback for unsupported content
|
convert_content
async
convert_content(content: Any, mime_type: str | None = None) -> str
Convert content using first supporting converter.
Source code in src/llmling_agent/prompts/conversion_manager.py
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100 | async def convert_content(self, content: Any, mime_type: str | None = None) -> str:
"""Convert content using first supporting converter."""
loop = asyncio.get_running_loop()
for converter in self._converters:
# Run support check in thread pool
supports = await loop.run_in_executor(
self._executor, converter.supports_content, content, mime_type
)
if not supports:
continue
# Run conversion in thread pool
return await loop.run_in_executor(
self._executor, converter.convert_content, content, mime_type
)
return str(content) # Fallback for unsupported content
|
convert_file
async
convert_file(path: JoinablePathLike) -> str
Convert file using first supporting converter.
Source code in src/llmling_agent/prompts/conversion_manager.py
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81 | async def convert_file(self, path: JoinablePathLike) -> str:
"""Convert file using first supporting converter."""
loop = asyncio.get_running_loop()
content = await read_path(path, "rb")
for converter in self._converters:
# Run support check in thread pool
supports = await loop.run_in_executor(
self._executor, converter.supports_file, path
)
if not supports:
continue
# Run conversion in thread pool
import mimetypes
typ = mimetypes.guess_type(str(path))[0]
return await loop.run_in_executor(
self._executor,
converter.convert_content,
content,
typ,
)
return str(content)
|
supports_content
supports_content(content: Any, mime_type: str | None = None) -> bool
Check if any converter supports the file.
Source code in src/llmling_agent/prompts/conversion_manager.py
| def supports_content(self, content: Any, mime_type: str | None = None) -> bool:
"""Check if any converter supports the file."""
return any(c.supports_content(content, mime_type) for c in self._converters)
|
supports_file
supports_file(path: JoinablePathLike) -> bool
Check if any converter supports the file.
Source code in src/llmling_agent/prompts/conversion_manager.py
| def supports_file(self, path: JoinablePathLike) -> bool:
"""Check if any converter supports the file."""
return any(c.supports_file(path) for c in self._converters)
|