Skip to content

Ollama Provider API Reference

The OllamaProvider class implements the AIProvider interface for the Ollama service. It provides methods for text generation and chat functionality using locally hosted models through Ollama.

Class Definition

Bases: AIProvider

Ollama-specific implementation of the AIProvider abstract base class.

This class provides methods to interact with Ollama's models for text generation and chat functionality.

Attributes:

Name Type Description
client OllamaClientProtocol

The Ollama client used for making API calls.

Parameters:

Name Type Description Default
host Optional[str]

The host address for the Ollama server. If not provided, the default Ollama client will be used.

None

Raises:

Type Description
ImportError

If the Ollama package is not installed.

Examples:

Initialize the Ollama provider:

provider = Provider(host="http://localhost:11434")

Source code in clientai/ollama/provider.py
class Provider(AIProvider):
    """
    Ollama-specific implementation of the AIProvider abstract base class.

    This class provides methods to interact with Ollama's models for
    text generation and chat functionality.

    Attributes:
        client: The Ollama client used for making API calls.

    Args:
        host: The host address for the Ollama server.
            If not provided, the default Ollama client will be used.

    Raises:
        ImportError: If the Ollama package is not installed.

    Examples:
        Initialize the Ollama provider:
        ```python
        provider = Provider(host="http://localhost:11434")
        ```
    """

    def __init__(self, host: Optional[str] = None):
        if not OLLAMA_INSTALLED or Client is None:
            raise ImportError(
                "The ollama package is not installed. "
                "Please install it with 'pip install clientai[ollama]'."
            )
        self.client: OllamaClientProtocol = cast(
            OllamaClientProtocol, Client(host=host) if host else ollama
        )

    def _stream_generate_response(
        self,
        stream: Iterator[OllamaStreamResponse],
        return_full_response: bool,
    ) -> Iterator[Union[str, OllamaStreamResponse]]:
        """
        Process the streaming response from Ollama API for text generation.

        Args:
            stream: The stream of responses from Ollama API.
            return_full_response: If True, yield full response objects.

        Yields:
            Union[str, OllamaStreamResponse]: Processed content or
                                              full response objects.
        """
        for chunk in stream:
            if return_full_response:
                yield chunk
            else:
                yield chunk["response"]

    def _stream_chat_response(
        self,
        stream: Iterator[OllamaChatResponse],
        return_full_response: bool,
    ) -> Iterator[Union[str, OllamaChatResponse]]:
        """
        Process the streaming response from Ollama API for chat.

        Args:
            stream: The stream of responses from Ollama API.
            return_full_response: If True, yield full response objects.

        Yields:
            Union[str, OllamaChatResponse]: Processed content or
                                            full response objects.
        """
        for chunk in stream:
            if return_full_response:
                yield chunk
            else:
                yield chunk["message"]["content"]

    def _map_exception_to_clientai_error(self, e: Exception) -> ClientAIError:
        """
        Maps an Ollama exception to the appropriate ClientAI exception.

        Args:
            e (Exception): The exception caught during the API call.

        Returns:
            ClientAIError: An instance of the appropriate ClientAI exception.
        """
        message = str(e)

        if isinstance(e, ollama.RequestError):
            if "authentication" in message.lower():
                return AuthenticationError(
                    message, status_code=401, original_error=e
                )
            elif "rate limit" in message.lower():
                return RateLimitError(
                    message, status_code=429, original_error=e
                )
            elif "not found" in message.lower():
                return ModelError(message, status_code=404, original_error=e)
            else:
                return InvalidRequestError(
                    message, status_code=400, original_error=e
                )
        elif isinstance(e, ollama.ResponseError):
            if "timeout" in message.lower() or "timed out" in message.lower():
                return TimeoutError(message, status_code=408, original_error=e)
            else:
                return APIError(message, status_code=500, original_error=e)
        else:
            return ClientAIError(message, status_code=500, original_error=e)

    def generate_text(
        self,
        prompt: str,
        model: str,
        return_full_response: bool = False,
        stream: bool = False,
        **kwargs: Any,
    ) -> OllamaGenericResponse:
        """
        Generate text based on a given prompt using a specified Ollama model.

        Args:
            prompt: The input prompt for text generation.
            model: The name or identifier of the Ollama model to use.
            return_full_response: If True, return the full response object.
                If False, return only the generated text. Defaults to False.
            stream: If True, return an iterator for streaming responses.
                Defaults to False.
            **kwargs: Additional keyword arguments to pass to the Ollama API.

        Returns:
            OllamaGenericResponse: The generated text, full response object,
            or an iterator for streaming responses.

        Examples:
            Generate text (text only):
            ```python
            response = provider.generate_text(
                "Explain the concept of machine learning",
                model="llama2",
            )
            print(response)
            ```

            Generate text (full response):
            ```python
            response = provider.generate_text(
                "Explain the concept of machine learning",
                model="llama2",
                return_full_response=True
            )
            print(response["response"])
            ```

            Generate text (streaming):
            ```python
            for chunk in provider.generate_text(
                "Explain the concept of machine learning",
                model="llama2",
                stream=True
            ):
                print(chunk, end="", flush=True)
            ```
        """
        try:
            response = self.client.generate(
                model=model, prompt=prompt, stream=stream, **kwargs
            )

            if stream:
                return cast(
                    OllamaGenericResponse,
                    self._stream_generate_response(
                        cast(Iterator[OllamaStreamResponse], response),
                        return_full_response,
                    ),
                )
            else:
                response = cast(OllamaResponse, response)
                if return_full_response:
                    return response
                else:
                    return response["response"]

        except Exception as e:
            raise self._map_exception_to_clientai_error(e)

    def chat(
        self,
        messages: List[Message],
        model: str,
        return_full_response: bool = False,
        stream: bool = False,
        **kwargs: Any,
    ) -> OllamaGenericResponse:
        """
        Engage in a chat conversation using a specified Ollama model.

        Args:
            messages: A list of message dictionaries, each containing
                      'role' and 'content'.
            model: The name or identifier of the Ollama model to use.
            return_full_response: If True, return the full response object.
                If False, return only the generated text. Defaults to False.
            stream: If True, return an iterator for streaming responses.
                Defaults to False.
            **kwargs: Additional keyword arguments to pass to the Ollama API.

        Returns:
            OllamaGenericResponse: The chat response, full response object,
            or an iterator for streaming responses.

        Examples:
            Chat (message content only):
            ```python
            messages = [
                {"role": "user", "content": "What is the capital of Japan?"},
                {"role": "assistant", "content": "The capital is Tokyo."},
                {"role": "user", "content": "What is its population?"}
            ]
            response = provider.chat(
                messages,
                model="llama2",
            )
            print(response)
            ```

            Chat (full response):
            ```python
            response = provider.chat(
                messages,
                model="llama2",
                return_full_response=True
            )
            print(response["message"]["content"])
            ```

            Chat (streaming):
            ```python
            for chunk in provider.chat(
                messages,
                model="llama2",
                stream=True
            ):
                print(chunk, end="", flush=True)
            ```
        """
        try:
            response = self.client.chat(
                model=model, messages=messages, stream=stream, **kwargs
            )

            if stream:
                return cast(
                    OllamaGenericResponse,
                    self._stream_chat_response(
                        cast(Iterator[OllamaChatResponse], response),
                        return_full_response,
                    ),
                )
            else:
                response = cast(OllamaChatResponse, response)
                if return_full_response:
                    return response
                else:
                    return response["message"]["content"]

        except Exception as e:
            raise self._map_exception_to_clientai_error(e)

chat(messages, model, return_full_response=False, stream=False, **kwargs)

Engage in a chat conversation using a specified Ollama model.

Parameters:

Name Type Description Default
messages List[Message]

A list of message dictionaries, each containing 'role' and 'content'.

required
model str

The name or identifier of the Ollama model to use.

required
return_full_response bool

If True, return the full response object. If False, return only the generated text. Defaults to False.

False
stream bool

If True, return an iterator for streaming responses. Defaults to False.

False
**kwargs Any

Additional keyword arguments to pass to the Ollama API.

{}

Returns:

Name Type Description
OllamaGenericResponse OllamaGenericResponse

The chat response, full response object,

OllamaGenericResponse

or an iterator for streaming responses.

Examples:

Chat (message content only):

messages = [
    {"role": "user", "content": "What is the capital of Japan?"},
    {"role": "assistant", "content": "The capital is Tokyo."},
    {"role": "user", "content": "What is its population?"}
]
response = provider.chat(
    messages,
    model="llama2",
)
print(response)

Chat (full response):

response = provider.chat(
    messages,
    model="llama2",
    return_full_response=True
)
print(response["message"]["content"])

Chat (streaming):

for chunk in provider.chat(
    messages,
    model="llama2",
    stream=True
):
    print(chunk, end="", flush=True)

Source code in clientai/ollama/provider.py
def chat(
    self,
    messages: List[Message],
    model: str,
    return_full_response: bool = False,
    stream: bool = False,
    **kwargs: Any,
) -> OllamaGenericResponse:
    """
    Engage in a chat conversation using a specified Ollama model.

    Args:
        messages: A list of message dictionaries, each containing
                  'role' and 'content'.
        model: The name or identifier of the Ollama model to use.
        return_full_response: If True, return the full response object.
            If False, return only the generated text. Defaults to False.
        stream: If True, return an iterator for streaming responses.
            Defaults to False.
        **kwargs: Additional keyword arguments to pass to the Ollama API.

    Returns:
        OllamaGenericResponse: The chat response, full response object,
        or an iterator for streaming responses.

    Examples:
        Chat (message content only):
        ```python
        messages = [
            {"role": "user", "content": "What is the capital of Japan?"},
            {"role": "assistant", "content": "The capital is Tokyo."},
            {"role": "user", "content": "What is its population?"}
        ]
        response = provider.chat(
            messages,
            model="llama2",
        )
        print(response)
        ```

        Chat (full response):
        ```python
        response = provider.chat(
            messages,
            model="llama2",
            return_full_response=True
        )
        print(response["message"]["content"])
        ```

        Chat (streaming):
        ```python
        for chunk in provider.chat(
            messages,
            model="llama2",
            stream=True
        ):
            print(chunk, end="", flush=True)
        ```
    """
    try:
        response = self.client.chat(
            model=model, messages=messages, stream=stream, **kwargs
        )

        if stream:
            return cast(
                OllamaGenericResponse,
                self._stream_chat_response(
                    cast(Iterator[OllamaChatResponse], response),
                    return_full_response,
                ),
            )
        else:
            response = cast(OllamaChatResponse, response)
            if return_full_response:
                return response
            else:
                return response["message"]["content"]

    except Exception as e:
        raise self._map_exception_to_clientai_error(e)

generate_text(prompt, model, return_full_response=False, stream=False, **kwargs)

Generate text based on a given prompt using a specified Ollama model.

Parameters:

Name Type Description Default
prompt str

The input prompt for text generation.

required
model str

The name or identifier of the Ollama model to use.

required
return_full_response bool

If True, return the full response object. If False, return only the generated text. Defaults to False.

False
stream bool

If True, return an iterator for streaming responses. Defaults to False.

False
**kwargs Any

Additional keyword arguments to pass to the Ollama API.

{}

Returns:

Name Type Description
OllamaGenericResponse OllamaGenericResponse

The generated text, full response object,

OllamaGenericResponse

or an iterator for streaming responses.

Examples:

Generate text (text only):

response = provider.generate_text(
    "Explain the concept of machine learning",
    model="llama2",
)
print(response)

Generate text (full response):

response = provider.generate_text(
    "Explain the concept of machine learning",
    model="llama2",
    return_full_response=True
)
print(response["response"])

Generate text (streaming):

for chunk in provider.generate_text(
    "Explain the concept of machine learning",
    model="llama2",
    stream=True
):
    print(chunk, end="", flush=True)

Source code in clientai/ollama/provider.py
def generate_text(
    self,
    prompt: str,
    model: str,
    return_full_response: bool = False,
    stream: bool = False,
    **kwargs: Any,
) -> OllamaGenericResponse:
    """
    Generate text based on a given prompt using a specified Ollama model.

    Args:
        prompt: The input prompt for text generation.
        model: The name or identifier of the Ollama model to use.
        return_full_response: If True, return the full response object.
            If False, return only the generated text. Defaults to False.
        stream: If True, return an iterator for streaming responses.
            Defaults to False.
        **kwargs: Additional keyword arguments to pass to the Ollama API.

    Returns:
        OllamaGenericResponse: The generated text, full response object,
        or an iterator for streaming responses.

    Examples:
        Generate text (text only):
        ```python
        response = provider.generate_text(
            "Explain the concept of machine learning",
            model="llama2",
        )
        print(response)
        ```

        Generate text (full response):
        ```python
        response = provider.generate_text(
            "Explain the concept of machine learning",
            model="llama2",
            return_full_response=True
        )
        print(response["response"])
        ```

        Generate text (streaming):
        ```python
        for chunk in provider.generate_text(
            "Explain the concept of machine learning",
            model="llama2",
            stream=True
        ):
            print(chunk, end="", flush=True)
        ```
    """
    try:
        response = self.client.generate(
            model=model, prompt=prompt, stream=stream, **kwargs
        )

        if stream:
            return cast(
                OllamaGenericResponse,
                self._stream_generate_response(
                    cast(Iterator[OllamaStreamResponse], response),
                    return_full_response,
                ),
            )
        else:
            response = cast(OllamaResponse, response)
            if return_full_response:
                return response
            else:
                return response["response"]

    except Exception as e:
        raise self._map_exception_to_clientai_error(e)