Was this helpful?
temperature=1.0, top_p=0.95, top_k=20, min_p=0.0, presence_penalty=1.5, repetition_penalty=1.0temperature=0.6, top_p=0.95, top_k=20, min_p=0.0, presence_penalty=0.0, repetition_penalty=1.0temperature=0.7, top_p=0.8, top_k=20, min_p=0.0, presence_penalty=1.5, repetition_penalty=1.0temperature=1.0, top_p=0.95, top_k=20, min_p=0.0, presence_penalty=1.5, repetition_penalty=1.0curl -fsSL https://unsloth.ai/install.sh | shirm https://unsloth.ai/install.ps1 | iexunsloth studio -H 0.0.0.0 -p 8888apt-get update
apt-get install pciutils build-essential cmake curl libcurl4-openssl-dev -y
git clone https://github.com/ggml-org/llama.cpp
cmake llama.cpp -B llama.cpp/build \
-DBUILD_SHARED_LIBS=OFF -DGGML_CUDA=ON
cmake --build llama.cpp/build --config Release -j --clean-first --target llama-cli llama-mtmd-cli llama-server llama-gguf-split
cp llama.cpp/build/bin/llama-* llama.cppexport LLAMA_CACHE="unsloth/Qwen3.5-35B-A3B-GGUF"
./llama.cpp/llama-cli \
-hf unsloth/Qwen3.5-35B-A3B-GGUF:UD-Q4_K_XL \
--ctx-size 16384 \
--temp 0.6 \
--top-p 0.95 \
--top-k 20 \
--min-p 0.00export LLAMA_CACHE="unsloth/Qwen3.5-35B-A3B-GGUF"
./llama.cpp/llama-cli \
-hf unsloth/Qwen3.5-35B-A3B-GGUF:UD-Q4_K_XL \
--ctx-size 16384 \
--temp 1.0 \
--top-p 0.95 \
--top-k 20 \
--min-p 0.00export LLAMA_CACHE="unsloth/Qwen3.5-35B-A3B-GGUF"
./llama.cpp/llama-server \
-hf unsloth/Qwen3.5-35B-A3B-GGUF:UD-Q4_K_XL \
--ctx-size 16384 \
--temp 0.7 \
--top-p 0.8 \
--top-k 20 \
--min-p 0.00 \
--chat-template-kwargs '{"enable_thinking":false}'export LLAMA_CACHE="unsloth/Qwen3.5-35B-A3B-GGUF"
./llama.cpp/llama-server \
-hf unsloth/Qwen3.5-35B-A3B-GGUF:UD-Q4_K_XL \
--ctx-size 16384 \
--temp 1.0 \
--top-p 0.95 \
--top-k 20 \
--min-p 0.00 \
--chat-template-kwargs '{"enable_thinking":false}'hf download unsloth/Qwen3.5-35B-A3B-GGUF \
--local-dir unsloth/Qwen3.5-35B-A3B-GGUF \
--include "*mmproj-F16*" \
--include "*UD-Q4_K_XL*" # Use "*UD-Q2_K_XL*" for Dynamic 2bit./llama.cpp/llama-cli \
--model unsloth/Qwen3.5-35B-A3B-GGUF/Qwen3.5-35B-A3B-UD-Q4_K_XL.gguf \
--mmproj unsloth/Qwen3.5-35B-A3B-GGUF/mmproj-F16.gguf \
--temp 1.0 \
--top-p 0.95 \
--min-p 0.00 \
--top-k 20apt-get update
apt-get install pciutils build-essential cmake curl libcurl4-openssl-dev -y
git clone https://github.com/ggml-org/llama.cpp
cmake llama.cpp -B llama.cpp/build \
-DBUILD_SHARED_LIBS=OFF -DGGML_CUDA=ON
cmake --build llama.cpp/build --config Release -j --clean-first --target llama-cli llama-mtmd-cli llama-server llama-gguf-split
cp llama.cpp/build/bin/llama-* llama.cppexport LLAMA_CACHE="unsloth/Qwen3.5-9B-GGUF"
./llama.cpp/llama-server \
-hf unsloth/Qwen3.5-9B-GGUF:UD-Q4_K_XL \
--ctx-size 16384 \
--temp 0.6 \
--top-p 0.95 \
--top-k 20 \
--min-p 0.00 \
--alias "unsloth/Qwen3.5-9B-GGUF" \
--port 8001 \
--chat-template-kwargs '{"enable_thinking":true}'export LLAMA_CACHE="unsloth/Qwen3.5-9B-GGUF"
./llama.cpp/llama-server \
-hf unsloth/Qwen3.5-9B-GGUF:UD-Q4_K_XL \
--ctx-size 16384 \
--temp 1.0 \
--top-p 0.95 \
--top-k 20 \
--min-p 0.00 \
--alias "unsloth/Qwen3.5-9B-GGUF" \
--port 8001 \
--chat-template-kwargs '{"enable_thinking":true}'export LLAMA_CACHE="unsloth/Qwen3.5-9B-GGUF"
./llama.cpp/llama-cli \
-hf unsloth/Qwen3.5-9B-GGUF:UD-Q4_K_XL \
--ctx-size 16384 \
--temp 0.7 \
--top-p 0.8 \
--top-k 20 \
--min-p 0.00export LLAMA_CACHE="unsloth/Qwen3.5-9B-GGUF"
./llama.cpp/llama-cli \
-hf unsloth/Qwen3.5-9B-GGUF:UD-Q4_K_XL \
--ctx-size 16384 \
--temp 1.0 \
--top-p 0.95 \
--top-k 20 \
--min-p 0.00hf download unsloth/Qwen3.5-9B-GGUF \
--local-dir unsloth/Qwen3.5-9B-GGUF \
--include "*mmproj-F16*" \
--include "*UD-Q4_K_XL*" # Use "*UD-Q2_K_XL*" for Dynamic 2bit./llama.cpp/llama-cli \
--model unsloth/Qwen3.5-9B-GGUF/Qwen3.5-9B-UD-Q4_K_XL.gguf \
--mmproj unsloth/Qwen3.5-9B-GGUF/mmproj-F16.gguf \
--temp 1.0 \
--top-p 0.95 \
--min-p 0.00 \
--top-k 20apt-get update
apt-get install pciutils build-essential cmake curl libcurl4-openssl-dev -y
git clone https://github.com/ggml-org/llama.cpp
cmake llama.cpp -B llama.cpp/build \
-DBUILD_SHARED_LIBS=OFF -DGGML_CUDA=ON
cmake --build llama.cpp/build --config Release -j --clean-first --target llama-cli llama-mtmd-cli llama-server llama-gguf-split
cp llama.cpp/build/bin/llama-* llama.cppexport LLAMA_CACHE="unsloth/Qwen3.5-27B-GGUF"
./llama.cpp/llama-cli \
-hf unsloth/Qwen3.5-27B-GGUF:UD-Q4_K_XL \
--ctx-size 16384 \
--temp 0.6 \
--top-p 0.95 \
--top-k 20 \
--min-p 0.00export LLAMA_CACHE="unsloth/Qwen3.5-27B-GGUF"
./llama.cpp/llama-cli \
-hf unsloth/Qwen3.5-27B-GGUF:UD-Q4_K_XL \
--ctx-size 16384 \
--temp 1.0 \
--top-p 0.95 \
--top-k 20 \
--min-p 0.00export LLAMA_CACHE="unsloth/Qwen3.5-27B-GGUF"
./llama.cpp/llama-server \
-hf unsloth/Qwen3.5-27B-GGUF:UD-Q4_K_XL \
--ctx-size 16384 \
--temp 0.7 \
--top-p 0.8 \
--top-k 20 \
--min-p 0.00 \
--chat-template-kwargs '{"enable_thinking":false}'export LLAMA_CACHE="unsloth/Qwen3.5-27B-GGUF"
./llama.cpp/llama-server \
-hf unsloth/Qwen3.5-27B-GGUF:UD-Q4_K_XL \
--ctx-size 16384 \
--temp 1.0 \
--top-p 0.95 \
--top-k 20 \
--min-p 0.00 \
--chat-template-kwargs '{"enable_thinking":false}'hf download unsloth/Qwen3.5-27B-GGUF \
--local-dir unsloth/Qwen3.5-27B-GGUF \
--include "*mmproj-F16*" \
--include "*UD-Q4_K_XL*" # Use "*UD-Q2_K_XL*" for Dynamic 2bit./llama.cpp/llama-cli \
--model unsloth/Qwen3.5-27B-GGUF/Qwen3.5-27B-UD-Q4_K_XL.gguf \
--mmproj unsloth/Qwen3.5-27B-GGUF/mmproj-F16.gguf \
--temp 1.0 \
--top-p 0.95 \
--min-p 0.00 \
--top-k 20apt-get update
apt-get install pciutils build-essential cmake curl libcurl4-openssl-dev -y
git clone https://github.com/ggml-org/llama.cpp
cmake llama.cpp -B llama.cpp/build \
-DBUILD_SHARED_LIBS=OFF -DGGML_CUDA=ON
cmake --build llama.cpp/build --config Release -j --clean-first --target llama-cli llama-mtmd-cli llama-server llama-gguf-split
cp llama.cpp/build/bin/llama-* llama.cppexport LLAMA_CACHE="unsloth/Qwen3.5-122B-A10B-GGUF"
./llama.cpp/llama-cli \
-hf unsloth/Qwen3.5-122B-A10B-GGUF:UD-Q4_K_XL \
--ctx-size 16384 \
--temp 0.6 \
--top-p 0.95 \
--top-k 20 \
--min-p 0.00export LLAMA_CACHE="unsloth/Qwen3.5-122B-A10B-GGUF"
./llama.cpp/llama-cli \
-hf unsloth/Qwen3.5-122B-A10B-GGUF:UD-Q4_K_XL \
--ctx-size 16384 \
--temp 1.0 \
--top-p 0.95 \
--top-k 20 \
--min-p 0.00export LLAMA_CACHE="unsloth/Qwen3.5-122B-A10B-GGUF"
./llama.cpp/llama-server \
-hf unsloth/Qwen3.5-122B-A10B-GGUF:UD-Q4_K_XL \
--ctx-size 16384 \
--temp 0.7 \
--top-p 0.8 \
--top-k 20 \
--min-p 0.00 \
--chat-template-kwargs '{"enable_thinking":false}'export LLAMA_CACHE="unsloth/Qwen3.5-122B-A10B-GGUF"
./llama.cpp/llama-server \
-hf unsloth/Qwen3.5-122B-A10B-GGUF:UD-Q4_K_XL \
--ctx-size 16384 \
--temp 1.0 \
--top-p 0.95 \
--top-k 20 \
--min-p 0.00 \
--chat-template-kwargs '{"enable_thinking":false}'hf download unsloth/Qwen3.5-122B-A10B-GGUF \
--local-dir unsloth/Qwen3.5-122B-A10B-GGUF \
--include "*mmproj-F16*" \
--include "*UD-Q4_K_XL*" # Use "*UD-Q2_K_XL*" for Dynamic 2bit./llama.cpp/llama-cli \
--model unsloth/Qwen3.5-122B-A10B-GGUF/UD-Q4_K_XL/Qwen3.5-122B-A10B-UD-Q4_K_XL-00001-of-00003.gguf \
--mmproj unsloth/Qwen3.5-122B-A10B-GGUF/mmproj-F16.gguf \
--ctx-size 16384 \
--temp 0.6 \
--top-p 0.95 \
--top-k 20 \
--min-p 0.00apt-get update
apt-get install pciutils build-essential cmake curl libcurl4-openssl-dev -y
git clone https://github.com/ggml-org/llama.cpp
cmake llama.cpp -B llama.cpp/build \
-DBUILD_SHARED_LIBS=OFF -DGGML_CUDA=ON
cmake --build llama.cpp/build --config Release -j --clean-first --target llama-cli llama-mtmd-cli llama-server llama-gguf-split
cp llama.cpp/build/bin/llama-* llama.cppexport LLAMA_CACHE="unsloth/Qwen3.5-397B-A17B-GGUF"
./llama.cpp/llama-cli \
-hf unsloth/Qwen3.5-397B-A17B-GGUF:UD-Q4_K_XL \
--ctx-size 16384 \
--temp 0.6 \
--top-p 0.95 \
--top-k 20 \
--min-p 0.00export LLAMA_CACHE="unsloth/Qwen3.5-397B-A17B-GGUF"
./llama.cpp/llama-server \
-hf unsloth/Qwen3.5-397B-A17B-GGUF:UD-Q4_K_XL \
--ctx-size 16384 \
--temp 0.7 \
--top-p 0.8 \
--top-k 20 \
--min-p 0.00 \
--chat-template-kwargs '{"enable_thinking":false}'hf download unsloth/Qwen3.5-397B-A17B-GGUF \
--local-dir unsloth/Qwen3.5-397B-A17B-GGUF \
--include "*mmproj-F16*" \
--include "*UD-Q4_K_XL" # Use "*UD-Q2_K_XL*" for Dynamic 2bit./llama.cpp/llama-cli \
--model unsloth/Qwen3.5-397B-A17B-GGUF/UD-Q4_K_XL/Qwen3.5-397B-A17B-UD-Q4_K_XL-00001-of-00006.gguf \
--mmproj unsloth/Qwen3.5-397B-A17B-GGUF/mmproj-F16.gguf \
--ctx-size 16384 \
--temp 0.6 \
--top-p 0.95 \
--top-k 20 \
--min-p 0.00lms get unsloth/qwen3.5-4b./llama.cpp/llama-server \
--model unsloth/Qwen3.5-35B-A3B-GGUF/Qwen3.5-35B-A3B-UD-Q4_K_XL.gguf \
--mmproj unsloth/Qwen3.5-35B-A3B-GGUF/mmproj-F16.gguf \
--alias "unsloth/Qwen3.5-35B-A3B" \
--temp 0.6 \
--top-p 0.95 \
--ctx-size 16384 \
--top-k 20 \
--min-p 0.00 \
--port 8001from openai import OpenAI
import json
openai_client = OpenAI(
base_url = "http://127.0.0.1:8001/v1",
api_key = "sk-no-key-required",
)
completion = openai_client.chat.completions.create(
model = "unsloth/Qwen3.5-397B-A17B",
messages = [{"role": "user", "content": "Create a Snake game."},],
)
print(completion.choices[0].message.content) --chat-template-kwargs '{"enable_thinking":false}' --chat-template-kwargs '{"enable_thinking":true}'./llama.cpp/llama-server \
--model unsloth/Qwen3.5-9B-GGUF/Qwen3.5-9B-BF16.gguf \
--alias "unsloth/Qwen3.5-9B-GGUF" \
--temp 0.6 \
--top-p 0.95 \
--ctx-size 16384 \
--top-k 20 \
--min-p 0.00 \
--port 8001 \
--chat-template-kwargs '{"enable_thinking":true}'from openai import OpenAI
import json
openai_client = OpenAI(
base_url = "http://127.0.0.1:8001/v1",
api_key = "sk-no-key-required",
)
completion = openai_client.chat.completions.create(
model = "unsloth/Qwen3.5-9B-GGUF",
messages = [{"role": "user", "content": "What is 2+2?"},],
)
print(completion.choices[0].message.content)
print(completion.choices[0].message.reasoning_content)




















