Source code for council.scorers.llm_similarity_scorer

from typing import Any, Dict, List, Optional

from council.contexts import ChatMessage, ContextBase, ScorerContext
from council.llm import LLMAnswer, LLMBase, LLMMessage, MonitoredLLM, llm_property

from ..llm.llm_answer import LLMParsingException, llm_class_validator
from ..utils import Option
from . import ScorerException
from .scorer_base import ScorerBase


class SimilarityScore:
    def __init__(self, score: float, justification: str) -> None:
        self._score = score
        self._justification = justification

    @llm_property
    def score(self) -> float:
        """Your similarity Score"""
        return self._score / 100.0

    @llm_property
    def justification(self) -> str:
        """Short, helpful and specific explanation your score"""
        return self._justification

    def __str__(self) -> str:
        return f"Similarity score is {self.score} with the justification: {self._justification}"

    @llm_class_validator
    def validate(self) -> None:
        if self._score < 0 or self._score > 100:
            raise LLMParsingException(f"Similarity Score `{self._score}` is invalid, value must be between 0 and 100.")


[docs] class LLMSimilarityScorer(ScorerBase): """ Using an LLM to compute a similarity score between two messages. """
[docs] def __init__(self, llm: LLMBase, expected: str) -> None: """ Initialize a new instance Parameters: llm (LLMBase): the LLM to be used expected (str): the expected text message """ super().__init__() self._llm = self.register_monitor(MonitoredLLM("llm", llm)) self._expected = expected self._llm_answer = LLMAnswer(SimilarityScore) self._system_message = self._build_system_message() self._retry = 3
[docs] def to_dict(self) -> Dict[str, Any]: result = super().to_dict() result["expected"] = self._expected return result
def _score(self, context: ScorerContext, message: ChatMessage) -> float: retry = self._retry messages = self._build_llm_messages(message) new_messages: List[LLMMessage] = [] while retry > 0: messages = messages + new_messages llm_result = self._llm.post_chat_request(context, messages) response = llm_result.first_choice context.logger.debug(f"llm response: {response}") try: retry -= 1 similarity_score = self._parse_response(context, response) return similarity_score.score except LLMParsingException as e: assistant_message = f"Your response is not correctly formatted:\n{response}" new_messages = self._handle_error(e, assistant_message, context) except ScorerException as e: assistant_message = f"Your response raised an exception:\n{response}" new_messages = self._handle_error(e, assistant_message, context) raise ScorerException(f"LLMSimilarityScorer failed to execute after {self._retry} retries.") @staticmethod def _handle_error(e: Exception, assistant_message: str, context: ContextBase) -> List[LLMMessage]: error = f"{e.__class__.__name__}: `{e}`" context.logger.warning(f"Exception occurred: {error}") return [LLMMessage.assistant_message(assistant_message), LLMMessage.user_message(f"Fix:\n{error}")] def _build_llm_messages(self, message: ChatMessage) -> List[LLMMessage]: user_prompt = [ "Please give the similarity score of the actual message compared to the expected one.", "Actual message:", message.message, "Expected message:", self._expected, ] result = [self._system_message, LLMMessage.user_message("\n".join(user_prompt))] return result def _build_system_message(self) -> LLMMessage: system_prompt = [ "# ROLE", "You are an expert specialized in evaluating how similar an expected message and an actual message are.", "\n# INSTRUCTIONS", "1. Compare the {expected} message and the {actual} message.", "2. Score 0 (the 2 messages are unrelated) to 100 (the 2 messages have the same content).", "3. Your score must be fair.", "\n# FORMATTING", "1. Your response is precisely formatted as:", self._llm_answer.to_prompt(), ] return LLMMessage.system_message("\n".join(system_prompt)) def _parse_response(self, context: ContextBase, response: str) -> SimilarityScore: parsed = [self._parse_line(line) for line in response.strip().splitlines()] filtered = [r.unwrap() for r in parsed if r.is_some()] if len(filtered) == 0: raise LLMParsingException("None of your response could be parsed. Follow exactly formatting instructions.") similarity_score = filtered[0] context.logger.debug(f"{similarity_score}") return similarity_score def _parse_line(self, line: str) -> Option[SimilarityScore]: if LLMAnswer.field_separator() not in line: return Option.none() similarity_score: Optional[SimilarityScore] = self._llm_answer.to_object(line) if similarity_score is not None: return Option.some(similarity_score) return Option.none()