Merge branch 'main' of github.com:abetlen/llama_cpp_python into main · aniljava/llama-cpp-python@9d7c830 · GitHub
Skip to content

Commit 9d7c830

Browse files
committed
Merge branch 'main' of github.com:abetlen/llama_cpp_python into main
2 parents 3dc21b2 + b8438f7 commit 9d7c830

3 files changed

Lines changed: 54 additions & 2 deletions

File tree

llama_cpp/llama.py

Lines changed: 28 additions & 2 deletions

llama_cpp/llama_chat_format.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,8 @@ def __call__(
2626
temperature: float = 0.2,
2727
top_p: float = 0.95,
2828
top_k: int = 40,
29+
min_p: float = 0.05,
30+
typical_p: float = 1.0,
2931
stream: bool = False,
3032
stop: Optional[Union[str, List[str]]] = [],
3133
seed: Optional[int] = None,
@@ -287,6 +289,8 @@ def basic_create_chat_completion(
287289
temperature: float = 0.2,
288290
top_p: float = 0.95,
289291
top_k: int = 40,
292+
min_p: float = 0.05,
293+
typical_p: float = 1.0,
290294
stream: bool = False,
291295
stop: Optional[Union[str, List[str]]] = [],
292296
seed: Optional[int] = None,
@@ -330,6 +334,8 @@ def basic_create_chat_completion(
330334
temperature=temperature,
331335
top_p=top_p,
332336
top_k=top_k,
337+
min_p=min_p,
338+
typical_p=typical_p,
333339
stream=stream,
334340
stop=stop,
335341
seed=seed,
@@ -579,6 +585,8 @@ def functionary_chat_handler(
579585
temperature: float = 0.2,
580586
top_p: float = 0.95,
581587
top_k: int = 40,
588+
min_p: float = 0.05,
589+
typical_p: float = 1.0,
582590
stream: bool = False,
583591
stop: Optional[Union[str, List[str]]] = [],
584592
response_format: Optional[llama_types.ChatCompletionRequestResponseFormat] = None,
@@ -761,6 +769,8 @@ def message_to_str(msg: llama_types.ChatCompletionRequestMessage):
761769
temperature=temperature,
762770
top_p=top_p,
763771
top_k=top_k,
772+
min_p=min_p,
773+
typical_p=typical_p,
764774
stream=stream,
765775
stop=["user:", "</s>"],
766776
max_tokens=max_tokens,
@@ -831,6 +841,8 @@ def message_to_str(msg: llama_types.ChatCompletionRequestMessage):
831841
temperature=temperature,
832842
top_p=top_p,
833843
top_k=top_k,
844+
min_p=min_p,
845+
typical_p=typical_p,
834846
presence_penalty=presence_penalty,
835847
frequency_penalty=frequency_penalty,
836848
repeat_penalty=repeat_penalty,
@@ -929,6 +941,8 @@ def __call__(
929941
temperature: float = 0.2,
930942
top_p: float = 0.95,
931943
top_k: int = 40,
944+
min_p: float = 0.05,
945+
typical_p: float = 1.0,
932946
stream: bool = False,
933947
stop: Optional[Union[str, List[str]]] = [],
934948
response_format: Optional[
@@ -1045,6 +1059,8 @@ def __call__(
10451059
temperature=temperature,
10461060
top_p=top_p,
10471061
top_k=top_k,
1062+
min_p=min_p,
1063+
typical_p=typical_p,
10481064
stream=stream,
10491065
stop=stop,
10501066
max_tokens=max_tokens,

llama_cpp/server/app.py

Lines changed: 10 additions & 0 deletions

0 commit comments

Comments
 (0)