###### tags: `LangChain` `LLM` `FormosaFoundationModel`
# LLM wrapper for FormosaFoundationModel in LangChain
## LLM class
LLM class 作為與 LLM providers (如 OpenAI, Cohere, Hugging Face, etc) 互動的標準介面。主要必須實作:
* `_call` method: 實作請求模型服務的業務邏輯,通常是以”**字串輸入並輸出字串**“的方式呈現。
另外,可選擇實作`_identifying_params` property 來回傳請求模型服務時所需要的參數資訊。
Reference: [LangChain Custom_llm](https://python.langchain.com/docs/modules/model_io/models/llms/custom_llm)
## Custom LLM Wrapper in LangChain
```python
from typing import Any, Dict, List, Mapping, Optional, Tuple
from langchain.llms.base import BaseLLM
import requests
from langchain.callbacks.manager import CallbackManagerForLLMRun
from langchain.schema.language_model import BaseLanguageModel
from langchain.schema import Generation, LLMResult
from pydantic import Field
import json
import os
class _FormosaFoundationCommon(BaseLanguageModel):
base_url: str = "http://localhost:12345"
"""Base url the model is hosted under."""
model: str = "Llama-2-7b-chat-hf"
"""Model name to use."""
temperature: Optional[float]
"""The temperature of the model. Increasing the temperature will
make the model answer more creatively."""
stop: Optional[List[str]]
"""Sets the stop tokens to use."""
top_k: int = 50
"""Reduces the probability of generating nonsense. A higher value (e.g. 100)
will give more diverse answers, while a lower value (e.g. 10)
will be more conservative. (Default: 50)"""
top_p: float = 1
"""Works together with top-k. A higher value (e.g., 0.95) will lead
to more diverse text, while a lower value (e.g., 0.5) will
generate more focused and conservative text. (Default: 1)"""
max_new_tokens: int = 350
"""The maximum number of tokens to generate in the completion.
-1 returns as many tokens as possible given the prompt and
the models maximal context size."""
frequence_penalty: float = 1
"""Penalizes repeated tokens according to frequency."""
model_kwargs: Dict[str, Any] = {}
"""Holds any model parameters valid for `create` call not explicitly specified."""
ffm_api_key: Optional[str] = None
@property
def _default_params(self) -> Dict[str, Any]:
"""Get the default parameters for calling FFM API."""
normal_params = {
"temperature": self.temperature,
"max_new_tokens": self.max_new_tokens,
"top_p": self.top_p,
"frequence_penalty": self.frequence_penalty,
"top_k": self.top_k,
}
return {**normal_params, **self.model_kwargs}
def _call(
self,
prompt,
stop: Optional[List[str]] = None,
**kwargs: Any,
) -> str:
if self.stop is not None and stop is not None:
raise ValueError("`stop` found in both the input and default params.")
elif self.stop is not None:
stop = self.stop
elif stop is None:
stop = []
params = {**self._default_params, "stop": stop, **kwargs}
parameter_payload = {"parameters": params, "inputs": prompt, "model": self.model}
# HTTP headers for authorization
headers = {
"X-API-KEY": self.ffm_api_key,
"Content-Type": "application/json",
}
endpoint_url = f"{self.base_url}/api/models/generate"
# send request
try:
response = requests.post(
url=endpoint_url,
headers=headers,
data=json.dumps(parameter_payload, ensure_ascii=False).encode("utf8"),
stream=False,
)
response.encoding = "utf-8"
generated_text = response.json()
if response.status_code != 200:
detail = generated_text.get("detail")
raise ValueError(
f"FormosaFoundationModel endpoint_url: {endpoint_url}\n"
f"error raised with status code {response.status_code}\n"
f"Details: {detail}\n"
)
except requests.exceptions.RequestException as e: # This is the correct syntax
raise ValueError(f"FormosaFoundationModel error raised by inference endpoint: {e}\n")
if generated_text.get("detail") is not None:
detail = generated_text["detail"]
raise ValueError(
f"FormosaFoundationModel endpoint_url: {endpoint_url}\n"
f"error raised by inference API: {detail}\n"
)
if generated_text.get("generated_text") is None:
raise ValueError(
f"FormosaFoundationModel endpoint_url: {endpoint_url}\n"
f"Response format error: {generated_text}\n"
)
return generated_text
class FormosaFoundationModel(BaseLLM, _FormosaFoundationCommon):
"""Formosa Foundation Model
Example:
.. code-block:: python
ffm = FormosaFoundationModel(model_name="Llama-2-7b-chat-hf")
"""
@property
def _llm_type(self) -> str:
return "FormosaFoundationModel"
@property
def _identifying_params(self) -> Mapping[str, Any]:
"""Get the identifying parameters."""
return {
**{
"model": self.model,
"base_url": self.base_url
},
**self._default_params
}
def _generate(
self,
prompts: List[str],
stop: Optional[List[str]] = None,
run_manager: Optional[CallbackManagerForLLMRun] = None,
**kwargs: Any,
) -> LLMResult:
"""Call out to FormosaFoundationModel's generate endpoint.
Args:
prompt: The prompt to pass into the model.
stop: Optional list of stop words to use when generating.
Returns:
The string generated by the model.
Example:
.. code-block:: python
response = FormosaFoundationModel("Tell me a joke.")
"""
generations = []
token_usage = 0
for prompt in prompts:
final_chunk = super()._call(
prompt,
stop=stop,
**kwargs,
)
generations.append(
[
Generation(
text = final_chunk["generated_text"],
generation_info=dict(
finish_reason = final_chunk["finish_reason"]
)
)
]
)
token_usage += final_chunk["generated_tokens"]
llm_output = {"token_usage": token_usage, "model": self.model}
return LLMResult(generations=generations, llm_output=llm_output)
```
### LangChain usage
完成以上封裝後,就可以在 Langchain 中直接使用 FormosaFoundationModel 來完成特定的大語言模型任務:
```python
MODLE_NAME = "Llama-2-7b-chat-hf"
API_KEY = "00000000-0000-0000-0000-000000000000"
API_URL = "https://{DOMAIN_URL}/text-generation"
ffm = FormosaFoundationModel(
base_url = API_URL,
max_new_tokens = 350,
temperature = 0.5,
top_k = 50,
top_p = 1.0,
frequence_penalty = 1.0,
ffm_api_key = API_KEY,
model = MODLE_NAME
)
print(ffm("請問台灣最高的山是?"))
```
> 台灣最高的山是玉山,海拔3952公尺。
## Custom Chat Model Wrapper in LangChain
```python
"""Wrapper LLM conversation APIs."""
from typing import Any, Dict, List, Mapping, Optional, Tuple
from langchain.llms.base import LLM
import requests
from langchain.llms.utils import enforce_stop_tokens
from langchain.llms.base import BaseLLM
from langchain.llms.base import create_base_retry_decorator
from pydantic import BaseModel, Extra, Field, root_validator
from langchain.chat_models.base import BaseChatModel
from langchain.schema.language_model import BaseLanguageModel
from langchain.schema import (
BaseMessage,
ChatGeneration,
ChatResult,
ChatMessage,
AIMessage,
HumanMessage,
SystemMessage
)
from langchain.callbacks.manager import (
Callbacks,
AsyncCallbackManagerForLLMRun,
CallbackManagerForLLMRun,
)
import json
import os
class _ChatFormosaFoundationCommon(BaseLanguageModel):
base_url: str = "http://localhost:12345"
"""Base url the model is hosted under."""
model: str = "Llama-2-7b-chat-hf"
"""Model name to use."""
temperature: Optional[float]
"""The temperature of the model. Increasing the temperature will
make the model answer more creatively."""
stop: Optional[List[str]]
"""Sets the stop tokens to use."""
top_k: int = 50
"""Reduces the probability of generating nonsense. A higher value (e.g. 100)
will give more diverse answers, while a lower value (e.g. 10)
will be more conservative. (Default: 50)"""
top_p: float = 1
"""Works together with top-k. A higher value (e.g., 0.95) will lead
to more diverse text, while a lower value (e.g., 0.5) will
generate more focused and conservative text. (Default: 1)"""
max_new_tokens: int = 350
"""The maximum number of tokens to generate in the completion.
-1 returns as many tokens as possible given the prompt and
the models maximal context size."""
frequence_penalty: float = 1
"""Penalizes repeated tokens according to frequency."""
model_kwargs: Dict[str, Any] = {}
"""Holds any model parameters valid for `create` call not explicitly specified."""
ffm_api_key: Optional[str] = None
@property
def _default_params(self) -> Dict[str, Any]:
"""Get the default parameters for calling FFM API."""
normal_params = {
"temperature": self.temperature,
"max_new_tokens": self.max_new_tokens,
"top_p": self.top_p,
"frequence_penalty": self.frequence_penalty,
"top_k": self.top_k,
}
return {**normal_params, **self.model_kwargs}
def _call(
self,
prompt,
stop: Optional[List[str]] = None,
**kwargs: Any,
) -> str:
if self.stop is not None and stop is not None:
raise ValueError("`stop` found in both the input and default params.")
elif self.stop is not None:
stop = self.stop
elif stop is None:
stop = []
params = {**self._default_params, "stop": stop, **kwargs}
parameter_payload = {"parameters": params, "messages": prompt, "model": self.model}
# HTTP headers for authorization
headers = {
"X-API-KEY": self.ffm_api_key,
"Content-Type": "application/json",
}
endpoint_url = f"{self.base_url}/api/models/conversation"
# send request
try:
response = requests.post(
url=endpoint_url,
headers=headers,
data=json.dumps(parameter_payload, ensure_ascii=False).encode("utf8"),
stream=False,
)
response.encoding = "utf-8"
generated_text = response.json()
if response.status_code != 200:
detail = generated_text.get("detail")
raise ValueError(
f"FormosaFoundationModel endpoint_url: {endpoint_url}\n"
f"error raised with status code {response.status_code}\n"
f"Details: {detail}\n"
)
except requests.exceptions.RequestException as e: # This is the correct syntax
raise ValueError(f"FormosaFoundationModel error raised by inference endpoint: {e}\n")
if generated_text.get("detail") is not None:
detail = generated_text["detail"]
raise ValueError(
f"FormosaFoundationModel endpoint_url: {endpoint_url}\n"
f"error raised by inference API: {detail}\n"
)
if generated_text.get("generated_text") is None:
raise ValueError(
f"FormosaFoundationModel endpoint_url: {endpoint_url}\n"
f"Response format error: {generated_text}\n"
)
return generated_text
class ChatFormosaFoundationModel(BaseChatModel, _ChatFormosaFoundationCommon):
"""`FormosaFoundation` Chat large language models API.
The environment variable ``OPENAI_API_KEY`` set with your API key.
Example:
.. code-block:: python
ffm = ChatFormosaFoundationModel(model_name="Llama-2-7b-chat-hf")
"""
@property
def _llm_type(self) -> str:
return "ChatFormosaFoundationModel"
@property
def lc_serializable(self) -> bool:
return True
def _convert_message_to_dict(self, message: BaseMessage) -> dict:
if isinstance(message, ChatMessage):
message_dict = {"role": message.role, "content": message.content}
elif isinstance(message, HumanMessage):
message_dict = {"role": "human", "content": message.content}
elif isinstance(message, AIMessage):
message_dict = {"role": "assistant", "content": message.content}
elif isinstance(message, SystemMessage):
message_dict = {"role": "system", "content": message.content}
else:
raise ValueError(f"Got unknown type {message}")
return message_dict
def _create_conversation_messages(
self,
messages: List[BaseMessage],
stop: Optional[List[str]]
) -> Tuple[List[Dict[str, Any]], Dict[str, Any]]:
params: Dict[str, Any] = {**self._default_params}
if stop is not None:
if "stop" in params:
raise ValueError("`stop` found in both the input and default params.")
params["stop"] = stop
message_dicts = [self._convert_message_to_dict(m) for m in messages]
return message_dicts, params
def _create_chat_result(self, response: Mapping[str, Any]) -> ChatResult:
chat_generation = ChatGeneration(
message = AIMessage(content=response.get("generated_text")),
generation_info = {
"token_usage": response.get("generated_tokens"),
"model": self.model
}
)
return ChatResult(generations=[chat_generation])
def _generate(
self,
messages: List[BaseMessage],
stop: Optional[List[str]] = None,
run_manager: Optional[CallbackManagerForLLMRun] = None,
**kwargs: Any,
) -> ChatResult:
message_dicts, params = self._create_message_dicts(messages, stop)
params = {**params, **kwargs}
response = self._call(prompt=message_dicts)
if type(response) is str: # response is not the format of dictionary
return response
return self._create_chat_result(response)
async def _agenerate(
self, messages: List[BaseMessage], stop: Optional[List[str]] = None
) -> ChatResult:
pass
def _create_message_dicts(
self,
messages: List[BaseMessage],
stop: Optional[List[str]]
) -> Tuple[List[Dict[str, Any]], Dict[str, Any]]:
params = self._default_params
if stop is not None:
if "stop" in params:
raise ValueError("`stop` found in both the input and default params.")
params["stop"] = stop
message_dicts = [self._convert_message_to_dict(m) for m in messages]
return message_dicts, params
```
### LangChain usage
完成以上封裝後,就可以在 Langchain 中直接使用 ChatFormosaFoundationModel 來完成特定的大語言模型任務:
```python
MODLE_NAME = "Llama-2-7b-chat-hf"
API_KEY = "00000000-0000-0000-0000-000000000000"
API_URL = "https://{DOMAIN_URL}/text-generation"
chat_ffm = ChatFormosaFoundationModel(
base_url = API_URL,
max_new_tokens = 350,
temperature = 0.5,
top_k = 50,
top_p = 1.0,
frequence_penalty = 1.0,
ffm_api_key = API_KEY,
model = MODLE_NAME
)
messages = [
HumanMessage(content="人口最多的國家是?"),
AIMessage(content="人口最多的國家是印度。"),
HumanMessage(content="主要宗教為?")
]
result = chat_ffm(messages)
print(result.content)
```
> According to the CIA World Factbook, the main religion in India is Hinduism, which is practiced by about 80% of the population. Other significant religions in > India include Islam, Christianity, Sikhism, and Buddhism.
## Custom Embedding Model Wrapper in LangChain
```python
"""Wrapper Embedding model APIs."""
import json
import requests
from typing import List
from pydantic import BaseModel
from langchain.embeddings.base import Embeddings
import os
class CustomEmbeddingModel(BaseModel, Embeddings):
base_url: str = "http://localhost:12345"
api_key: str = ""
def get_embeddings(self, payload):
endpoint_url=f"{self.base_url}/embeddings/api/embeddings"
embeddings = []
headers = {
"Content-type": "application/json",
"accept": "application/json",
"X-API-KEY": self.api_key,
}
response = requests.post(endpoint_url, headers=headers, data=payload)
body = response.json()
datas = body["data"]
for data in datas:
embeddings.append(data["embedding"])
return embeddings
def embed_documents(self, texts: List[str]) -> List[List[float]]:
payload = json.dumps({"input": texts})
return self.get_embeddings(payload)
def embed_query(self, text: str) -> List[List[float]]:
payload = json.dumps({"input": [text]})
emb = self.get_embeddings(payload)
return emb[0]
```
### LangChain usage
完成以上封裝後,就可以在 Langchain 中直接使用 CustomEmbeddingModel 來完成特定的大語言模型任務:
* 單一字串取得 embeddings,使用 *embed_query()* 這個函式,並返回結果。
```python
API_KEY = "00000000-0000-0000-0000-000000000000"
API_URL = "https://{DOMAIN_URL}/text-generation"
embeddings = CustomEmbeddingModel(
base_url = API_URL,
api_key = API_KEY,
)
print(embeddings.embed_query("請問台灣最高的山是?"))
```
> [-1.1431972, -4.723901, 2.3445783, -2.19996, ......, 1.0784563, -3.4114947, -2.5193133]
* 多字串取得 embeddings,使用 *embed_documents()* 這個函式,會一次返回全部結果。
```python
API_KEY = "00000000-0000-0000-0000-000000000000"
API_URL = "https://{DOMAIN_URL}"
embeddings = CustomEmbeddingModel(
base_url = API_URL,
api_key = API_KEY,
)
print(embeddings.embed_documents(["test1", "test2", "test3"]))
```
> [[-0.14880371, ......, 0.7011719], [-0.023590088, ...... , 0.49320474], [-0.86242676, ......, 0.22867839]]