Was this helpful?
<|im_start|>system\n<|im_end|>\n<|im_start|>user\nWhat is 1+1?<|im_end|>\n<|im_start|>assistant\n<think></think>2<|im_end|>\n<|im_start|>user\nWhat is 2+2?<|im_end|>\n<|im_start|>assistant\n<think>\ncurl -fsSL https://unsloth.ai/install.sh | shirm https://unsloth.ai/install.ps1 | iexunsloth studio -H 0.0.0.0 -p 8888apt-get update
apt-get install pciutils build-essential cmake curl libcurl4-openssl-dev -y
git clone https://github.com/ggml-org/llama.cpp
cmake llama.cpp -B llama.cpp/build \
-DBUILD_SHARED_LIBS=OFF -DGGML_CUDA=ON
cmake --build llama.cpp/build --config Release -j --clean-first --target llama-cli llama-mtmd-cli llama-server llama-gguf-split
cp llama.cpp/build/bin/llama-* llama.cpp./llama.cpp/llama-cli \
-hf unsloth/NVIDIA-Nemotron-3-Nano-4B-GGUF:Q8_0 \
--ctx-size 16384 \
--temp 1.0 --top-p 1.0./llama.cpp/llama-cli \
-hf unsloth/NVIDIA-Nemotron-3-Nano-4B-GGUF:Q8_0 \
--ctx-size 32768 \
--temp 0.6 --top-p 0.95# !pip install huggingface_hub hf_transfer
import os
os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"
from huggingface_hub import snapshot_download
snapshot_download(
repo_id = "unsloth/NVIDIA-Nemotron-3-Nano-4B-GGUF",
local_dir = "unsloth/NVIDIA-Nemotron-3-Nano-4B-GGUF",
allow_patterns = ["*Q8_0*"],
)./llama.cpp/llama-cli \
--model unsloth/NVIDIA-Nemotron-3-Nano-4B-GGUF/NVIDIA-Nemotron-3-Nano-4B-Q8_0.gguf \
--ctx-size 16384 \
--seed 3407 \
--prio 2 \
--temp 0.6 \
--top-p 0.95curl -fsSL https://unsloth.ai/main/install.sh | shirm https://unsloth.ai/install.ps1 | iexsource unsloth_studio/bin/activate
unsloth studio -H 0.0.0.0 -p 8888& .\unsloth_studio\Scripts\unsloth.exe studio -H 0.0.0.0 -p 8888apt-get update
apt-get install pciutils build-essential cmake curl libcurl4-openssl-dev -y
git clone https://github.com/ggml-org/llama.cpp
cmake llama.cpp -B llama.cpp/build \
-DBUILD_SHARED_LIBS=OFF -DGGML_CUDA=ON -DLLAMA_CURL=ON
cmake --build llama.cpp/build --config Release -j --clean-first --target llama-cli llama-mtmd-cli llama-server llama-gguf-split
cp llama.cpp/build/bin/llama-* llama.cpp./llama.cpp/llama-cli \
-hf unsloth/Nemotron-3-Nano-30B-A3B-GGUF:UD-Q4_K_XL \
--ctx-size 32768 \
--temp 1.0 --top-p 1.0./llama.cpp/llama-cli \
-hf unsloth/Nemotron-3-Nano-30B-A3B-GGUF:UD-Q4_K_XL \
--ctx-size 32768 \
--temp 0.6 --top-p 0.95# !pip install huggingface_hub hf_transfer
import os
os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"
from huggingface_hub import snapshot_download
snapshot_download(
repo_id = "unsloth/Nemotron-3-Nano-30B-A3B-GGUF",
local_dir = "unsloth/Nemotron-3-Nano-30B-A3B-GGUF",
allow_patterns = ["*UD-Q4_K_XL*"],
)./llama.cpp/llama-cli \
--model unsloth/Nemotron-3-Nano-30B-A3B-GGUF/Nemotron-3-Nano-30B-A3B-UD-Q4_K_XL.gguf \
--ctx-size 16384 \
--seed 3407 \
--prio 2 \
--temp 0.6 \
--top-p 0.95./llama.cpp/llama-server \
--model unsloth/Nemotron-3-Nano-30B-A3B-GGUF/Nemotron-3-Nano-30B-A3B-UD-Q4_K_XL.gguf \
--alias "unsloth/Nemotron-3-Nano-30B-A3B" \
--prio 3 \
--min_p 0.01 \
--temp 0.6 \
--top-p 0.95 \
--ctx-size 16384 \
--port 8001from openai import OpenAI
import json
openai_client = OpenAI(
base_url = "http://127.0.0.1:8001/v1",
api_key = "sk-no-key-required",
)
completion = openai_client.chat.completions.create(
model = "unsloth/Nemotron-3-Nano-30B-A3B",
messages = [{"role": "user", "content": "What is 2+2?"},],
)
print(completion.choices[0].message.content)User asks a simple question: "What is 2+2?" The answer is 4. Provide answer.
2 + 2 = 4.









