Source code for council.llm.providers.ollama.ollama_llm

from __future__ import annotations

from typing import Any, List, Mapping, Optional, Sequence, Union

from council.contexts import Consumption, LLMContext
from council.llm import LLMBase, LLMMessage, LLMResult
from council.utils.utils import DurationManager
from ollama import Client
from ollama._types import Message, Options

from .ollama_llm_configuration import OllamaLLMConfiguration
from .ollama_llm_cost import OllamaConsumptionCalculator


[docs] class OllamaLLM(LLMBase[OllamaLLMConfiguration]):
[docs] def __init__(self, config: OllamaLLMConfiguration) -> None: """ Initialize a new instance. Args: config (OllamaLLMConfiguration): configuration for the instance """ super().__init__(name=f"{self.__class__.__name__}", configuration=config) self._client = Client()
@property def client(self) -> Client: """ Ollama Client. While self._post_chat_request() focuses on chat-based LLM interactions, you can use the client for broader model management, such as listing, pulling, and deleting models, generating completions and embeddings, etc. See https://github.com/ollama/ollama/blob/main/docs/api.md """ return self._client
[docs] def pull(self) -> Mapping[str, Any]: """Download the model from the ollama library.""" return self.client.pull(model=self.model_name)
[docs] def load(self, keep_alive: Optional[Union[float, str]] = None) -> Mapping[str, Any]: """Load LLM in memory.""" keep_alive_value = keep_alive if keep_alive is not None else self._configuration.keep_alive_value return self.client.chat(model=self.model_name, messages=[], keep_alive=keep_alive_value)
[docs] def unload(self) -> Mapping[str, Any]: """Unload LLM from memory.""" return self.client.chat(model=self.model_name, messages=[], keep_alive=0)
def _post_chat_request(self, context: LLMContext, messages: Sequence[LLMMessage], **kwargs: Any) -> LLMResult: messages_payload = self._build_messages_payload(messages) with DurationManager() as timer: response = self.client.chat( model=self.model_name, messages=messages_payload, stream=False, keep_alive=self._configuration.keep_alive_value, format=self._configuration.format, options=Options(**self._configuration.params_to_options()), # type: ignore ) return LLMResult( choices=self._to_choices(response), consumptions=self._to_consumptions(timer.duration, response), raw_response=dict(response), ) @staticmethod def _build_messages_payload(messages: Sequence[LLMMessage]) -> List[Message]: return [Message(role=message.role.value, content=message.content) for message in messages] @staticmethod def _to_choices(response: Mapping[str, Any]) -> List[str]: return [response["message"]["content"]] @staticmethod def _to_consumptions(duration: float, response: Mapping[str, Any]) -> Sequence[Consumption]: calculator = OllamaConsumptionCalculator(response["model"]) return calculator.get_consumptions(duration, response)