iframe-proxy | Sunbelt Computer Software

895 lines (893 loc) · 26.2 KB
# ASSETS.yaml — External asset manifest
# Auto-generated from per-eval eval.yaml external_assets fields.
# Regenerate: python tools/generate_asset_manifest.py
# This is a read-only manifest (like uv.lock but for external assets).
# To update, edit the external_assets section in the relevant eval.yaml,
# then regenerate this file.
  abstention_bench:
  - type: direct_url
    source: https://huggingface.co/datasets/rajpurkar/squad_v2/resolve/{SHA}/
    fetch_method: download_and_verify
    state: pinned
  - type: direct_url
    source: https://raw.githubusercontent.com/stellalisy/mediQ/{SHA}/
    fetch_method: download_and_verify
    state: pinned
  - type: direct_url
    source: https://raw.githubusercontent.com/nyu-mll/BBQ/{SHA}/
    fetch_method: download_and_verify
    state: pinned
  - type: direct_url
    source: https://raw.githubusercontent.com/yinzhangyue/SelfAware/{SHA}/
    fetch_method: download_and_verify
    state: pinned
  - type: direct_url
    source: https://raw.githubusercontent.com/thunlp/FalseQA/{SHA}/
    fetch_method: download_and_verify
    state: pinned
  - type: direct_url
    source: https://raw.githubusercontent.com/Yuki-Asuuna/UMWP/{SHA}/
    fetch_method: download_and_verify
    state: pinned
  - type: direct_url
    source: https://raw.githubusercontent.com/suzgunmirac/BIG-Bench-Hard/{SHA}/
    fetch_method: download_and_verify
    state: pinned
  - type: direct_url
    source: https://github.com/facebookresearch/worldsense/raw/{SHA}/
    fetch_method: download_and_verify
    state: pinned
  - type: direct_url
    source: https://drive.google.com/uc?id=12aLKsSKe85G0u5bBTq0X0aKICsdxpaFL
    fetch_method: gdown_and_verify
    state: pinned
  - type: direct_url
    source: https://drive.google.com/uc?id=1q-6FIEGufKVBE3s6OdFoLWL2iHQPJh8h
    fetch_method: gdown_and_verify
    state: pinned
  - type: direct_url
    source: https://drive.google.com/uc?id={multiple}
    fetch_method: gdown_and_verify
    state: pinned
  - type: huggingface
    source: tasksource/bigbench
    fetch_method: load_dataset
    state: pinned
  - type: huggingface
    source: allenai/coconot
    fetch_method: load_dataset
    state: pinned
  - type: huggingface
    source: Idavidrein/gpqa
    fetch_method: load_dataset
    state: pinned
  - type: huggingface
    source: openai/gsm8k
    fetch_method: load_dataset
    state: pinned
  - type: huggingface
    source: amayuelas/KUQ
    fetch_method: load_dataset
    state: pinned
  - type: huggingface
    source: cais/mmlu
    fetch_method: load_dataset
    state: pinned
  - type: huggingface
    source: ninoscherrer/moralchoice
    fetch_method: load_dataset
    state: pinned
  - type: direct_url
    source: https://raw.githubusercontent.com/mikejqzhang/SituatedQA/{SHA}/data/qa_data/
    fetch_method: other
    state: pinned
    comment: Custom HF builder downloads via dl_manager
  - type: direct_url
    source: https://qasper-dataset.s3.us-west-2.amazonaws.com/qasper-{train-dev,test}-v0.3.tgz
    fetch_method: other
    state: pinned
    comment: Custom HF builder downloads via dl_manager
  agent_bench:
  - type: git_clone
    source: https://github.com/THUDM/AgentBench
    fetch_method: git_clone
    state: pinned
  agentdojo:
  - type: git_dependency
    source: https://github.com/ethz-spylab/agentdojo
    fetch_method: pyproject_toml
    state: pinned
  agentharm:
  - type: huggingface
    source: ai-safety-institute/AgentHarm
    fetch_method: snapshot_download
    state: pinned
  agentic_misalignment: []
  - type: direct_url
    source: https://raw.githubusercontent.com/ruixiangcui/AGIEval/{SHA}/data/v1_1/
    fetch_method: other
    state: pinned
    comment: Multiple task files fetched via fsspec (load_json_dataset)
  - type: huggingface
    source: sentientfutures/ahb
    fetch_method: hf_dataset
    state: pinned
  aime2024:
  - type: huggingface
    source: Maxwell-Jia/AIME_2024
    fetch_method: hf_dataset
    state: pinned
  aime2025:
  - type: huggingface
    source: math-ai/aime25
    fetch_method: hf_dataset
    state: pinned
  aime2026:
  - type: huggingface
    source: math-ai/aime26
    fetch_method: hf_dataset
    state: pinned
  air_bench:
  - type: huggingface
    source: stanford-crfm/air-bench-2024
    fetch_method: hf_dataset
    state: pinned
  - type: direct_url
    source: https://raw.githubusercontent.com/AlignmentResearch/AttemptPersuadeEval/{SHA}/src/topics/
    fetch_method: other
    state: pinned
    comment: Fetched via fsspec (load_json_dataset)
  - type: huggingface
    source: codeparrot/apps
    fetch_method: snapshot_download
    state: pinned
  - type: huggingface
    source: allenai/ai2_arc
    fetch_method: hf_dataset
    state: pinned
  assistant_bench:
  - type: huggingface
    source: AssistantBench/AssistantBench
    fetch_method: hf_dataset
    state: pinned
  - type: huggingface
    source: Lakera/b3-agent-security-benchmark-weak
    fetch_method: hf_dataset
    state: pinned
  - type: huggingface
    source: BBEH/bbeh
    fetch_method: hf_dataset
    state: pinned
  - type: huggingface
    source: Joschka/big_bench_hard
    fetch_method: hf_dataset
    state: pinned
  - type: huggingface
    source: Joschka/big_bench_hard
    fetch_method: load_dataset
    state: pinned
  - type: huggingface
    source: heegyu/bbq
    fetch_method: snapshot_download
    state: pinned
  - type: git_clone
    source: https://github.com/ShishirPatil/gorilla.git
    fetch_method: git_clone
    state: pinned
  bigcodebench:
  - type: huggingface
    source: bigcode/bigcodebench
    fetch_method: load_dataset
    state: pinned
  - type: huggingface
    source: AmazonScience/bold
    fetch_method: load_dataset
    state: pinned
  - type: huggingface
    source: google/boolq
    fetch_method: hf_dataset
    state: pinned
  browse_comp:
  - type: direct_url
    source: https://openaipublic.blob.core.windows.net/simple-evals/browse_comp_test_set.csv
    fetch_method: download_and_verify
    state: pinned
  chembench:
  - type: huggingface
    source: jablonkagroup/ChemBench
    fetch_method: hf_dataset
    state: pinned
  class_eval:
  - type: huggingface
    source: FudanSELab/ClassEval
    fetch_method: hf_dataset
    state: pinned
  - type: huggingface
    source: allenai/coconot
    fetch_method: hf_dataset
    state: pinned
  commonsense_qa:
  - type: huggingface
    source: tau/commonsense_qa
    fetch_method: hf_dataset
    state: pinned
  compute_eval:
  - type: huggingface
    source: nvidia/compute-eval
    fetch_method: hf_dataset
    state: pinned
  core_bench:
  - type: direct_url
    source: https://corebench.cs.princeton.edu/capsules/{id}.tar.gz
    fetch_method: download_and_verify
    state: pinned
  - type: huggingface
    source: siegelz/core-bench
    fetch_method: hf_hub_download
    state: pinned
  cti_realm:
  - type: huggingface
    source: arjun180-new/cti_realm
    fetch_method: hf_hub_download
    state: pinned
  cve_bench:
  - type: git_dependency
    source: https://github.com/Scott-Simmons/cve-bench.git
    fetch_method: pyproject_toml
    state: pinned
  - type: direct_url
    source: https://www.haproxy.org/download/2.8/src/haproxy-2.8.1.tar.gz
    fetch_method: wget
    state: pinned
  cybergym:
  - type: huggingface
    source: sunblaze-ucb/cybergym
    fetch_method: snapshot_download
    state: pinned
  cybermetric:
  - type: direct_url
    source: https://raw.githubusercontent.com/cybermetric/CyberMetric/{SHA}/
    fetch_method: download_and_verify
    state: pinned
  cyberseceval_2:
  - type: direct_url
    source: https://nodejs.org/dist/v20.18.3/node-v20.18.3-linux-x64.tar.xz
    fetch_method: curl
    state: pinned
  - type: direct_url
    source: https://raw.githubusercontent.com/meta-llama/PurpleLlama/{SHA}/CybersecurityBenchmarks/datasets/
    fetch_method: other
    state: pinned
    comment: interpreter.json and prompt_injection.json; fetched via fsspec
  cyberseceval_3:
  - type: huggingface
    source: facebook/cyberseceval3-visual-prompt-injection
    fetch_method: snapshot_download
    state: pinned
  - type: huggingface
    source: facebook/cyberseceval3-visual-prompt-injection
    fetch_method: hf_dataset
    state: pinned
  cyberseceval_4:
  - type: direct_url
    source: https://raw.githubusercontent.com/meta-llama/PurpleLlama/fe05293b610dabc3967443f2dd4dc35c4e8971b6/CybersecurityBenchmarks/datasets/
    fetch_method: other
    state: pinned
    comment: instruct.json, autocomplete.json, mitre_benchmark_100_per_category_with_augmentation.json, mitre_frr.json, and multiturn_phishing_challenges.json; fetched via fsspec (load_json_dataset)
  - type: direct_url
    source: https://raw.githubusercontent.com/meta-llama/PurpleLlama/fe05293b610dabc3967443f2dd4dc35c4e8971b6/CybersecurityBenchmarks/datasets/prompt_injection/prompt_injection_multilingual_machine_translated.json
    fetch_method: other
    state: pinned
    comment: fetched via fsspec (load_json_dataset)
  - type: direct_url
    source: https://github.com/CrowdStrike/CyberSOCEval_data/archive/ce7daa5bc7da51559ca97476d2277be02631783e.zip
    fetch_method: other
    state: pinned
    comment: malware-analysis and threat-intelligence report data; downloaded via download_command_with_progress (wget/curl)
  - type: direct_url
    source: https://media.defense.gov/2024/Feb/26/2003399756/-1/-1/0/CSA-SVR-ADAPT-TACTICS-FOR-INITIAL-CLOUD-ACCESS.PDF
    fetch_method: requests
    state: pinned
    comment: threat-intelligence fallback PDF when the pinned CrowdStrike archive lacks this report
  - type: direct_url
    source: https://www.ncsc.gov.uk/sites/default/files/pdfs/news/svr-cyber-actors-adapt-tactics-for-initial-cloud-access.pdf
    fetch_method: requests
    state: pinned
    comment: alternate mirror for the threat-intelligence fallback PDF
  - type: huggingface
    source: lmms-lab/DocVQA
    fetch_method: hf_dataset
    state: pinned
  - type: huggingface
    source: EleutherAI/drop
    fetch_method: hf_dataset
    state: pinned
  - type: huggingface
    source: xlangai/DS-1000
    fetch_method: hf_dataset
    state: pinned
  fortress:
  - type: huggingface
    source: ScaleAI/fortress_public
    fetch_method: hf_dataset
    state: pinned
  frontier_cs:
  - type: direct_url
    source: https://github.com/FrontierCS/Frontier-CS/archive/{SHA}.tar.gz
    fetch_method: download_and_verify
    state: pinned
  - type: direct_url
    source: https://raw.githubusercontent.com/MikeMirzayanov/testlib/{SHA}/testlib.h
    fetch_method: wget
    state: pinned
  - type: direct_url
    source: https://julialang-s3.julialang.org/bin/linux/x64/1.11/julia-1.11.3-linux-x86_64.tar.gz
    fetch_method: wget
    state: pinned
  - type: huggingface
    source: FrontierCS/Frontier-CS
    fetch_method: hf_dataset
    state: pinned
  frontierscience:
  - type: huggingface
    source: openai/frontierscience
    fetch_method: hf_dataset
    state: pinned
  - type: huggingface
    source: gaia-benchmark/GAIA
    fetch_method: hf_dataset
    state: pinned
  gdm_in_house_ctf:
  - type: direct_url
    source: https://hub.docker.com/u/marshw
    fetch_method: other
    state: floating
    comment: Per-challenge Docker images (marshw/cmd_injection, marshw/db_3, marshw/grafana, etc.) pulled at eval runtime
  gdm_intercode_ctf:
  - type: direct_url
    source: https://github.com/princeton-nlp/intercode/archive/{SHA}.zip
    fetch_method: download_and_verify
    state: pinned
  gdm_self_proliferation:
  - type: git_clone
    source: https://github.com/mistralai/mistral-src.git
    fetch_method: git_clone
    state: pinned
    comment: Pinned to commit 2557e12d0e1878a1562ece30434820f80aa3e12a in mistral.Dockerfile
  - type: direct_url
    source: https://files.mistral-7b-v0-1.mistral.ai/mistral-7B-v0.1.tar
    fetch_method: wget
    state: pinned
  - type: direct_url
    source: pip install git+https://github.com/huggingface/{transformers,peft,accelerate}.git
    fetch_method: pip_install_git
    state: pinned
    comment: Pinned to commit SHAs in mistral.Dockerfile
  - type: direct_url
    source: https://raw.githubusercontent.com/UKGovernmentBEIS/inspect_evals/{version}/...secrets.zip
    fetch_method: requests
    state: controlled
  gdm_self_reasoning: []
  gdm_stealth: []
  - type: huggingface
    source: openai/gdpval
    fetch_method: hf_dataset
    state: pinned
  - type: huggingface
    source: openai/gdpval
    fetch_method: load_dataset
    state: pinned
  - type: direct_url
    source: https://openaipublic.blob.core.windows.net/simple-evals/gpqa_diamond.csv
    fetch_method: download_and_verify
    state: pinned
  - type: huggingface
    source: openai/gsm8k
    fetch_method: hf_dataset
    state: pinned
  healthbench:
  - type: direct_url
    source: https://openaipublic.blob.core.windows.net/simple-evals/healthbench/{file}.jsonl
    fetch_method: other
    state: pinned
    comment: 4 files with timestamp-encoded filenames; fetched via fsspec (load_json_dataset)
  hellaswag:
  - type: huggingface
    source: Rowan/hellaswag
    fetch_method: hf_dataset
    state: pinned
  - type: huggingface
    source: cais/hle
    fetch_method: hf_dataset
    state: pinned
  humaneval:
  - type: huggingface
    source: openai/openai_humaneval
    fetch_method: hf_dataset
    state: pinned
  - type: git_dependency
    source: https://github.com/josejg/instruction_following_eval
    fetch_method: pyproject_toml
    state: pinned
  - type: huggingface
    source: google/IFEval
    fetch_method: hf_dataset
    state: pinned
  ifevalcode:
  - type: huggingface
    source: Multilingual-Multimodal-NLP/IfEvalCode-testset
    fetch_method: hf_dataset
    state: pinned
  infinite_bench:
  - type: huggingface
    source: xinrongzhang2022/InfiniteBench
    fetch_method: hf_dataset
    state: pinned
  instrumentaleval:
  - type: direct_url
    source: https://raw.githubusercontent.com/yf-he/InstrumentalEval/{SHA}/benchmark/
    fetch_method: download_and_verify
    state: pinned
  ipi_coding_agent: []
  kernelbench:
  - type: direct_url
    source: https://astral.sh/uv/0.9.9/install.sh
    fetch_method: curl
    state: pinned
  - type: git_dependency
    source: https://github.com/ScalingIntelligence/KernelBench
    fetch_method: pyproject_toml
    state: pinned
  - type: huggingface
    source: ScalingIntelligence/KernelBench
    fetch_method: hf_dataset
    state: pinned
  lab_bench:
  - type: huggingface
    source: futurehouse/lab-bench
    fetch_method: hf_dataset
    state: pinned
  - type: huggingface
    source: ambean/lingOly
    fetch_method: load_dataset
    state: pinned
  - type: huggingface
    source: jkhouja/LingOly-TOO
    fetch_method: hf_dataset
    state: pinned
  livebench:
  - type: git_dependency
    source: https://github.com/LiveBench/LiveBench.git
    fetch_method: pyproject_toml
    state: pinned
  - type: huggingface
    source: livebench/{math,reasoning,coding,language,data_analysis,instruction_following}
    fetch_method: hf_dataset
    state: pinned
  livecodebench_pro:
  - type: huggingface
    source: QAQAQAQAQ/LiveCodeBench-Pro
    fetch_method: hf_dataset
    state: pinned
  make_me_pay: []
  makemesay:
  - type: direct_url
    source: https://github.com/openai/evals/raw/{SHA}/evals/registry/data/make_me_say/
    fetch_method: download_and_verify
    state: pinned
  - type: huggingface
    source: cais/MASK
    fetch_method: hf_dataset
    state: pinned
  - type: huggingface
    source: DigitalLearningGmbH/MATH-lighteval
    fetch_method: hf_dataset
    state: pinned
  mathvista:
  - type: huggingface
    source: AI4Math/MathVista
    fetch_method: hf_dataset
    state: pinned
  - type: huggingface
    source: google-research-datasets/mbpp
    fetch_method: load_dataset
    state: pinned
  - type: huggingface
    source: google-research-datasets/mbpp
    fetch_method: hf_dataset
    state: pinned
  - type: huggingface
    source: bigbio/med_qa
    fetch_method: snapshot_download
    state: pinned
  - type: direct_url
    source: https://openaipublic.blob.core.windows.net/simple-evals/mgsm_{lang}.tsv
    fetch_method: download_and_verify
    state: pinned
    comment: 11 language files
  mind2web:
  - type: direct_url
    source: https://huggingface.co/datasets/osunlp/Mind2Web/resolve/{SHA}/scores_all_data.pkl
    fetch_method: download_and_verify
    state: pinned
  - type: huggingface
    source: osunlp/Multimodal-Mind2Web
    fetch_method: hf_dataset
    state: pinned
  mind2web_sc: []
  mle_bench:
  - type: git_clone
    source: https://github.com/openai/mle-bench.git
    fetch_method: git_clone
    state: pinned
    comment: Pinned to commit 2451bcb in Dockerfile
  - type: direct_url
    source: https://github.com/conda-forge/miniforge/releases/download/24.11.3-0/Miniforge3-Linux-x86_64.sh
    fetch_method: wget
    state: pinned
  - type: direct_url
    source: https://github.com/git-lfs/git-lfs/releases/download/v3.6.1/git-lfs-linux-amd64-v3.6.1.tar.gz
    fetch_method: curl
    state: pinned
  - type: git_dependency
    source: https://github.com/openai/mle-bench.git
    fetch_method: pyproject_toml
    state: pinned
  mlrc_bench:
  - type: direct_url
    source: https://github.com/yunx-z/MLRC-Bench/archive/{SHA}.zip
    fetch_method: download_and_verify
    state: pinned
  - type: direct_url
    source: https://huggingface.co/datasets/FanqingM/MMIU-Benchmark/resolve/{SHA}/{file}.zip
    fetch_method: download_and_verify
    state: pinned
  - type: huggingface
    source: FanqingM/MMIU-Benchmark
    fetch_method: hf_dataset
    state: pinned
  - type: huggingface
    source: openai/MMMLU
    fetch_method: hf_dataset
    state: pinned
  - type: huggingface
    source: cais/mmlu
    fetch_method: hf_dataset
    state: pinned
  mmlu_pro:
  - type: huggingface
    source: TIGER-Lab/MMLU-Pro
    fetch_method: hf_dataset
    state: pinned
  - type: huggingface
    source: MMMU/MMMU
    fetch_method: load_dataset
    state: pinned
  - type: huggingface
    source: sentientfutures/moru-benchmark
    fetch_method: hf_dataset
    state: pinned
  - type: huggingface
    source: sentientfutures/moru-benchmark-dimensions
    fetch_method: load_dataset
    state: pinned
  - type: huggingface
    source: TAUR-Lab/MuSR
    fetch_method: hf_dataset
    state: pinned
  - type: huggingface
    source: opencompass/NeedleBench
    fetch_method: load_dataset
    state: pinned
  novelty_bench:
  - type: huggingface
    source: yimingzhang/novelty-bench
    fetch_method: hf_dataset
    state: pinned
  - type: huggingface
    source: openthaigpt/thai-onet-m6-exam
    fetch_method: hf_dataset
    state: pinned
  - type: git_clone
    source: https://github.com/xlang-ai/OSWorld.git
    fetch_method: git_clone
    state: pinned
    comment: Dockerfile sparse clone at a revision pinned to the commit matching OSWORLD_PINNED_COMMIT
  - type: git_clone
    source: https://github.com/xlang-ai/OSWorld.git
    fetch_method: git_clone
    state: pinned
    comment: Runtime sparse-checkout pinned to OSWORLD_PINNED_COMMIT
  - type: direct_url
    source: https://raw.githubusercontent.com/epatey/fonts/main/fonts.tar.gz
    fetch_method: curl
    state: floating
  paperbench:
  - type: huggingface
    source: josancamon/paperbench
    fetch_method: load_dataset
    state: pinned
  - type: huggingface
    source: google-research-datasets/paws
    fetch_method: hf_dataset
    state: pinned
  persistbench: []
  personality:
  - type: direct_url
    source: https://raw.githubusercontent.com/guiem/personality-tests/{SHA}/
    fetch_method: download_and_verify
    state: pinned
  - type: huggingface
    source: mirlab/TRAIT
    fetch_method: hf_dataset
    state: pinned
  - type: huggingface
    source: ybisk/piqa
    fetch_method: snapshot_download
    state: pinned
  - type: direct_url
    source: https://storage.googleapis.com/ai2-mosaic/public/physicaliqa/physicaliqa-train-dev.zip
    fetch_method: other
    state: floating
    comment: Fetched by bundled HF builder via download_and_prepare
  - type: direct_url
    source: https://yonatanbisk.com/piqa/data/tests.jsonl
    fetch_method: other
    state: floating
    comment: Fetched by bundled HF builder (test split)
  pre_flight:
  - type: huggingface
    source: AirsideLabs/pre-flight-06
    fetch_method: hf_dataset
    state: pinned
  pubmedqa:
  - type: huggingface
    source: qiaojin/PubMedQA
    fetch_method: hf_dataset
    state: pinned
  - type: huggingface
    source: ehovy/race
    fetch_method: hf_dataset
    state: pinned
  - type: direct_url
    source: https://api.github.com/repos/LRudL/sad/contents/{path}?ref={SHA}
    fetch_method: download_and_verify
    state: pinned
  scbench: []
  - type: direct_url
    source: https://raw.githubusercontent.com/scicode-bench/SciCode/{SHA}/eval/data/
    fetch_method: download_and_verify
    state: pinned
  - type: direct_url
    source: https://drive.google.com/uc?id=17G_k65N_6yFFZ2O-jQH00Lh6iaw3z-AW
    fetch_method: gdown_and_verify
    state: pinned
  sciknoweval:
  - type: direct_url
    source: https://drive.google.com/uc?id={file_id}
    fetch_method: gdown_and_verify
    state: pinned
  - type: huggingface
    source: hicai-zju/SciKnowEval
    fetch_method: hf_dataset
    state: pinned
  - type: huggingface
    source: zefang-liu/secqa
    fetch_method: hf_dataset
    state: pinned
  sevenllm:
  - type: direct_url
    source: https://huggingface.co/datasets/Multilingual-Multimodal-NLP/SEVENLLM-Dataset/raw/{SHA}/
    fetch_method: other
    state: pinned
    comment: Raw HF URL fetched via fsspec (load_json_dataset), not via HF datasets API
  simpleqa:
  - type: huggingface
    source: codelion/SimpleQA-Verified
    fetch_method: hf_dataset
    state: pinned
  - type: direct_url
    source: https://openaipublic.blob.core.windows.net/simple-evals/simple_qa_test_set.csv
    fetch_method: download_and_verify
    state: pinned
    comment: Used by simpleqa task
  sosbench:
  - type: huggingface
    source: SOSBench/SOSBench
    fetch_method: hf_dataset
    state: pinned
  - type: huggingface
    source: rajpurkar/squad_v2
    fetch_method: hf_dataset
    state: pinned
  stereoset:
  - type: huggingface
    source: McGill-NLP/stereoset
    fetch_method: hf_dataset
    state: pinned
  strong_reject:
  - type: direct_url
    source: https://raw.githubusercontent.com/alexandrasouly/strongreject/{SHA}/strongreject_dataset/
    fetch_method: other
    state: pinned
    comment: Fetched via fsspec (load_csv_dataset)
  swe_bench:
  - type: git_clone
    source: https://github.com/swe-bench/experiments
    fetch_method: git_clone
    state: pinned
    comment: Pinned to commit 559cf877c1095c2e244af73b2de42bd53bd0c9d5 in download_baselines.sh
  - type: huggingface
    source: princeton-nlp/SWE-bench_Verified
    fetch_method: load_dataset
    state: pinned
  swe_lancer:
  - type: direct_url
    source: https://hub.docker.com/u/swelancer
    fetch_method: other
    state: floating
    comment: Per-task images (swelancer/swelancer_x86_{issue_id}:releasev1) and monolith image pulled at eval runtime
  sycophancy:
  - type: direct_url
    source: https://raw.githubusercontent.com/meg-tong/sycophancy-eval/{SHA}/datasets/
    fetch_method: other
    state: pinned
    comment: Fetched via fsspec (load_json_dataset)
  - type: huggingface
    source: sentientfutures/tac
    fetch_method: hf_hub_download
    state: pinned
  theagentcompany: []
  threecb: []
  truthfulqa:
  - type: huggingface
    source: truthfulqa/truthful_qa
    fetch_method: hf_dataset
    state: pinned
  - type: huggingface
    source: CraneAILabs/UCCB
    fetch_method: hf_dataset
    state: pinned
  - type: direct_url
    source: https://drive.usercontent.google.com/download?id=1z5ODOJMqyer1QxzYtEUZ2hbAx-7nU8Vi
    fetch_method: gdown_and_verify
    state: pinned
  vimgolf_challenges:
  - type: huggingface
    source: cybergod-kevin/vimgolf-public-challenges-inspect-eval
    fetch_method: hf_dataset
    state: pinned
  - type: huggingface
    source: flaviagiammarino/vqa-rad
    fetch_method: hf_dataset
    state: pinned
  vstar_bench:
  - type: huggingface
    source: craigwu/vstar_bench
    fetch_method: hf_dataset
    state: pinned
  winogrande:
  - type: huggingface
    source: allenai/winogrande
    fetch_method: hf_dataset
    state: pinned
  - type: huggingface
    source: cais/wmdp
    fetch_method: hf_dataset
    state: pinned
  worldsense:
  - type: direct_url
    source: https://github.com/facebookresearch/worldsense/raw/{SHA}/data/
    fetch_method: download_and_verify
    state: pinned
  writingbench: []
  - type: huggingface
    source: walledai/XSTest
    fetch_method: hf_dataset
    state: pinned
  zerobench:
  - type: huggingface
    source: jonathan-roberts1/zerobench
    fetch_method: hf_dataset
    state: pinned
large_local_files_threshold: '>1 MB'
large_local_files:
  gdm_self_proliferation:
  - path: data/sp08/secrets.zip
    size: 52.5M
  writingbench:
  - path: benchmark_all.jsonl
    size: 14.0M
  swe_lancer:
  - path: data/all_swelancer_tasks.csv
    size: 8.0M
  - path: challenges/network_tools/resources/nettools
    size: 5.3M
  - path: challenges/network_tools/images/victim/share/nettools
    size: 5.3M
  - path: challenges/delulu/images/victim/challenge/glibc/libc.so.6
    size: 2.1M
  - path: challenges/rpgo/resources/rpgo
    size: 1.3M
  - path: challenges/robust_cbc/images/victim/app/server
    size: 1.3M
  - path: challenges/flecks_of_gold/solution/flecks_patched
    size: 1.0M
  - path: challenges/flecks_of_gold/resources/flecks
    size: 1.0M
  - path: uccb_evaluation_results/x-ai_grok-4/detailed_results.json
    size: 2.7M
  - path: uccb_evaluation_results/cohere_command-a/detailed_results.json
    size: 2.6M
  - path: uccb_evaluation_results/google_gemini-2.0-flash-001/detailed_results.json
    size: 2.5M
  - path: uccb_evaluation_results/anthropic_claude-sonnet-4.5/detailed_results.json
    size: 2.5M
  - path: uccb_evaluation_results/openai_gpt-5/detailed_results.json
    size: 1.1M
  - path: uccb_evaluation_results/google_gemini-2.5-pro/detailed_results.json
    size: 1.1M
  agentdojo:
  - path: data/suites/workspace_plus/terminal/emma-computer/home-dir/Downloads/1706.03762v7.pdf
    size: 2.1M
  gdm_stealth:
  - path: data/ai_secretary/register.json
    size: 2.0M
  mind2web_sc:
  - path: data/seeact/sample_labeled_all.json
    size: 1.2M
Sunbelt Computer Software

PL/B Language Development and Support

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Sunbelt Computer Software

PL/B Language Development and Support

FilesExpand file tree

ASSETS.yaml

Latest commit

History

ASSETS.yaml

File metadata and controls