Source code for camel.agents.tool_agents.hugging_face_tool_agent

# =========== Copyright 2023 @ CAMEL-AI.org. All Rights Reserved. ===========
# Licensed under the Apache License, Version 2.0 (the “License”);
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an “AS IS” BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# =========== Copyright 2023 @ CAMEL-AI.org. All Rights Reserved. ===========
from typing import Any, Optional

from camel.agents.tool_agents import BaseToolAgent


# flake8: noqa :E501
[docs]class HuggingFaceToolAgent(BaseToolAgent): r"""Tool agent for calling HuggingFace models. This agent is a wrapper around agents from the `transformers` library. For more information about the available models, please see the `transformers` documentation at https://huggingface.co/docs/transformers/transformers_agents. Args: name (str): The name of the agent. *args (Any): Additional positional arguments to pass to the underlying Agent class. remote (bool, optional): Flag indicating whether to run the agent remotely. (default: :obj:`True`) **kwargs (Any): Additional keyword arguments to pass to the underlying Agent class. """ def __init__( self, name: str, *args: Any, remote: bool = True, **kwargs: Any, ) -> None: try: # TODO: Support other tool agents import transformers from packaging import version if version.parse( transformers.__version__) < version.parse("4.31.0"): raise ValueError( "The version of \"transformers\" package should >= 4.31.0") from transformers.tools import OpenAiAgent from transformers.tools.agent_types import AgentImage except (ImportError, ValueError): raise ValueError( "Could not import transformers tool agents. " "Please setup the environment with " "pip install huggingface_hub==0.14.1 transformers==4.31.0 diffusers accelerate==0.20.3 datasets torch soundfile sentencepiece opencv-python" ) self.agent_image_type = AgentImage self.agent = OpenAiAgent(*args, **kwargs) description = f"""The `{name}` is a tool agent that can perform a variety of tasks including: - Document question answering: given a document (such as a PDF) in image format, answer a question on this document - Text question answering: given a long text and a question, answer the question in the text - Unconditional image captioning: Caption the image! - Image question answering: given an image, answer a question on this image - Image segmentation: given an image and a prompt, output the segmentation mask of that prompt - Speech to text: given an audio recording of a person talking, transcribe the speech into text - Text to speech: convert text to speech - Zero-shot text classification: given a text and a list of labels, identify to which label the text corresponds the most - Text summarization: summarize a long text in one or a few sentences - Translation: translate the text into a given language - Text downloading: to download a text from a web URL - Text to image: generate an image according to a prompt, leveraging stable diffusion - Image transformation: modify an image given an initial image and a prompt, leveraging instruct pix2pix stable diffusion - Text to video: generate a small video according to a prompt Here are some python code examples of what you can do with this agent: Single execution (step) mode, the single execution method is when using the step() method of the agent: ``` # Text to image rivers_and_lakes_image = {name}.step("Draw me a picture of rivers and lakes.") rivers_and_lakes_image.save("./rivers_and_lakes_image.png") # Text to image -> Image transformation sea_add_island_image = {name}.step("Draw me a picture of the sea then transform the picture to add an island") sea_add_island_image.save("./sea_add_island_image.png") # If you'd like to keep a state across executions or to pass non-text objects to the agent, # you can do so by specifying variables that you would like the agent to use. For example, # you could generate the first image of rivers and lakes, and ask the model to update that picture to add an island by doing the following: picture = {name}.step("Generate a picture of rivers and lakes.") picture.save("./picture.png") updated_picture = {name}.step("Transform the image in `picture` to add an island to it.", picture=picture) updated_picture.save("./updated_picture.png") capybara_sea_image = {name}.step("Draw me a picture of the `prompt`", prompt="a capybara swimming in the sea") capybara_sea_image.save("./capybara_sea_image.png") # Document question answering answer = {name}.step( "In the following `document`, where will the TRRF Scientific Advisory Council Meeting take place?", document=document, ) print(answer) # Text to image boat_image = {name}.step("Generate an image of a boat in the water") boat_image.save("./boat_image.png") # Unconditional image captioning boat_image_caption = {name}.step("Can you caption the `boat_image`?", boat_image=boat_image) print(boat_image_caption) # Text to image -> Unconditional image captioning -> Text to speech boat_audio = {name}.step("Can you generate an image of a boat? Please read out loud the contents of the image afterwards") # Text downloading document = {name}.step("Download the text from http://hf.co") print(document) # Text summarization summary = {name}.step("Summarize the following text: `document`", document=document) print(summary) # Text downloading -> Text summarization -> Text to speech audio = {name}.step("Read out loud the summary of http://hf.co") ``` Chat-based execution (chat), the agent also has a chat-based approach, using the chat() method: ``` # Clean the chat history {name}.reset() # Text to image capybara_image = {name}.chat("Show me an an image of a capybara") capybara_image.save("./capybara_image.png") # Image transformation transformed_capybara_image = {name}.chat("Transform the image so that it snows") transformed_capybara_image.save("./transformed_capybara_image.png") # Image segmentation segmented_transformed_capybara_image = {name}.chat("Show me a mask of the snowy capybaras") segmented_transformed_capybara_image.save("./segmented_transformed_capybara_image.png") ``` """ super(HuggingFaceToolAgent, self).__init__(name, description) self.remote = remote
[docs] def reset(self) -> None: r"""Resets the chat history of the agent.""" self.agent.prepare_for_new_chat()
[docs] def step( self, *args: Any, remote: Optional[bool] = None, **kwargs: Any, ) -> Any: r"""Runs the agent in single execution mode. Args: *args (Any): Positional arguments to pass to the agent. remote (bool, optional): Flag indicating whether to run the agent remotely. Overrides the default setting. (default: :obj:`None`) **kwargs (Any): Keyword arguments to pass to the agent. Returns: str: The response from the agent. """ if remote is None: remote = self.remote agent_output = self.agent.run(*args, remote=remote, **kwargs) if isinstance(agent_output, self.agent_image_type): agent_output = agent_output.to_raw() return agent_output
[docs] def chat( self, *args: Any, remote: Optional[bool] = None, **kwargs: Any, ) -> Any: r"""Runs the agent in a chat conversation mode. Args: *args (Any): Positional arguments to pass to the agent. remote (bool, optional): Flag indicating whether to run the agent remotely. Overrides the default setting. (default: :obj:`None`) **kwargs (Any): Keyword arguments to pass to the agent. Returns: str: The response from the agent. """ if remote is None: remote = self.remote agent_output = self.agent.chat(*args, remote=remote, **kwargs) if isinstance(agent_output, self.agent_image_type): agent_output = agent_output.to_raw() return agent_output