Add mkdocs · aniljava/llama-cpp-python@df15caa · GitHub
Skip to content

Commit df15caa

Browse files
committed
Add mkdocs
1 parent a61fd3b commit df15caa

5 files changed

Lines changed: 890 additions & 341 deletions

File tree

docs/index.md

Lines changed: 23 additions & 0 deletions

llama_cpp/llama.py

Lines changed: 42 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@
88

99

1010
class Llama:
11+
"""High-level Python wrapper for a llama.cpp model."""
12+
1113
def __init__(
1214
self,
1315
model_path: str,
@@ -18,7 +20,25 @@ def __init__(
1820
logits_all: bool = False,
1921
vocab_only: bool = False,
2022
n_threads: Optional[int] = None,
21-
):
23+
) -> "Llama":
24+
"""Load a llama.cpp model from `model_path`.
25+
26+
Args:
27+
model_path: Path to the model directory.
28+
n_ctx: Number of tokens to keep in memory.
29+
n_parts: Number of parts to split the model into. If -1, the number of parts is automatically determined.
30+
seed: Random seed.
31+
f16_kv: Use half-precision for key/value matrices.
32+
logits_all: Return logits for all tokens, not just the vocabulary.
33+
vocab_only: Only use tokens in the vocabulary.
34+
n_threads: Number of threads to use. If None, the number of threads is automatically determined.
35+
36+
Raises:
37+
ValueError: If the model path does not exist.
38+
39+
Returns:
40+
A Llama instance.
41+
"""
2242
self.model_path = model_path
2343

2444
self.last_n = 64
@@ -56,6 +76,27 @@ def __call__(
5676
repeat_penalty: float = 1.1,
5777
top_k: int = 40,
5878
):
79+
"""Generate text from a prompt.
80+
81+
Args:
82+
prompt: The prompt to generate text from.
83+
suffix: A suffix to append to the generated text. If None, no suffix is appended.
84+
max_tokens: The maximum number of tokens to generate.
85+
temperature: The temperature to use for sampling.
86+
top_p: The top-p value to use for sampling.
87+
logprobs: The number of logprobs to return. If None, no logprobs are returned.
88+
echo: Whether to echo the prompt.
89+
stop: A list of strings to stop generation when encountered.
90+
repeat_penalty: The penalty to apply to repeated tokens.
91+
top_k: The top-k value to use for sampling.
92+
93+
Raises:
94+
ValueError: If the requested tokens exceed the context window.
95+
RuntimeError: If the prompt fails to tokenize or the model fails to evaluate the prompt.
96+
97+
Returns:
98+
Response object containing the generated text.
99+
"""
59100
text = b""
60101
finish_reason = "length"
61102
completion_tokens = 0

mkdocs.yml

Lines changed: 10 additions & 0 deletions

0 commit comments

Comments
 (0)