Skip to content

Hugging Face Publish

ctx publishes the GitHub repository as the public Hugging Face dataset Stevesolun/ctx. The dataset is a clean git ls-files snapshot, including the shipped graph tarball and catalog artifacts, not local review reports or ignored caches.

What gets uploaded

  • Tracked source, docs, tests, and packaging files.
  • graph/wiki-graph.tar.gz.
  • graph/skills-sh-catalog.json.gz.
  • Tracked graph visualizations under graph/.

Ignored local reports, review notes, raw ingest caches, coverage files, site/, and .pytest_cache/ are not uploaded because they are not tracked by git.

Publish command

Set the token in the process environment. Do not pass it on a command line that will be saved in shell history.

$env:HF_TOKEN = "<hugging-face-write-token>"
python -m pip install --upgrade huggingface_hub
git lfs install
git lfs pull --include="graph/wiki-graph.tar.gz"
@'
from __future__ import annotations

import os
import shutil
import subprocess
import tempfile
from pathlib import Path

from huggingface_hub import HfApi

root = Path.cwd()
token = os.environ["HF_TOKEN"]
api = HfApi(token=token)
owner = api.whoami()["name"]
repo_id = f"{owner}/ctx"
repo_type = "dataset"
sha = subprocess.check_output(["git", "rev-parse", "--short", "HEAD"], text=True).strip()
files = [
    Path(raw.decode("utf-8"))
    for raw in subprocess.check_output(["git", "ls-files", "-z"], cwd=root).split(b"\0")
    if raw
]
graph_tar = root / "graph" / "wiki-graph.tar.gz"
if not graph_tar.is_file() or graph_tar.stat().st_size < 100_000_000:
    raise SystemExit(
        "graph/wiki-graph.tar.gz is not hydrated; run git lfs pull before publishing"
    )

api.create_repo(repo_id=repo_id, repo_type=repo_type, private=False, exist_ok=True, token=token)
staging = Path(tempfile.mkdtemp(prefix="ctx-hf-upload-"))
try:
    for rel in files:
        src = root / rel
        if not src.is_file():
            continue
        dst = staging / rel
        dst.parent.mkdir(parents=True, exist_ok=True)
        shutil.copy2(src, dst)
    api.upload_folder(
        repo_id=repo_id,
        repo_type=repo_type,
        folder_path=staging,
        commit_message=f"Publish ctx snapshot {sha}",
        token=token,
    )
finally:
    shutil.rmtree(staging, ignore_errors=True)
'@ | python -

Then upload the dataset-card metadata wrapper for README.md:

$env:HF_TOKEN = "<hugging-face-write-token>"
@'
from __future__ import annotations

import os
import tempfile
from pathlib import Path

from huggingface_hub import HfApi

token = os.environ["HF_TOKEN"]
api = HfApi(token=token)
repo_id = f"{api.whoami()['name']}/ctx"
frontmatter = """---
license: mit
tags:
- agents
- mcp
- skills
- knowledge-graph
- llm-wiki
- recommendation-system
- harness
- codex
- claude-code
pretty_name: ctx
---

"""
with tempfile.NamedTemporaryFile("w", encoding="utf-8", suffix=".md", delete=False) as fh:
    path = Path(fh.name)
    fh.write(frontmatter)
    fh.write(Path("README.md").read_text(encoding="utf-8"))
try:
    api.upload_file(
        repo_id=repo_id,
        repo_type="dataset",
        path_or_fileobj=path,
        path_in_repo="README.md",
        commit_message="Add Hugging Face dataset card metadata",
        token=token,
    )
finally:
    path.unlink(missing_ok=True)
'@ | python -

Verify

@'
from huggingface_hub import HfApi

api = HfApi()
info = api.repo_info(repo_id="Stevesolun/ctx", repo_type="dataset")
print(info.id, info.sha)
'@ | python -

The dataset page should show the MIT license and the tags from the metadata wrapper.