Setup repo with Phi 3
This commit is contained in:
5
tools/README.md
Normal file
5
tools/README.md
Normal file
@@ -0,0 +1,5 @@
|
||||
# Setup
|
||||
|
||||
```
|
||||
pip install pyyaml huggingface_hub
|
||||
```
|
||||
49
tools/cleanup.sh
Executable file
49
tools/cleanup.sh
Executable file
@@ -0,0 +1,49 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
# cleanup.sh - Commits, pushes, and prunes LFS files.
|
||||
#
|
||||
# - Detects *untracked* files (git status --porcelain), so we don’t skip commits.
|
||||
# - Uses 'git add --renormalize .' so new/changed .gitattributes rules convert
|
||||
# existing files into LFS pointers on re-add.
|
||||
# - Keeps the prune step to free local disk space after a successful push.
|
||||
#
|
||||
# Usage: ./tools/cleanup.sh <commit-message>
|
||||
|
||||
if [ "$#" -ne 1 ]; then
|
||||
echo "Usage: $0 <commit-message>" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
COMMIT_MESSAGE="$1"
|
||||
|
||||
# Detect any changes, including untracked.
|
||||
if [[ -z "$(git status --porcelain=v1)" ]]; then
|
||||
echo "No new files or changes to commit. Skipping commit and push."
|
||||
exit 0
|
||||
fi
|
||||
|
||||
echo "Committing and pushing changes..."
|
||||
|
||||
# Make sure .gitattributes changes are included and normalization runs,
|
||||
# so LFS filters rewrite eligible files as pointers.
|
||||
git add .gitattributes || true
|
||||
git add --renormalize .
|
||||
|
||||
# If nothing ended up staged (e.g. only ignored files changed), exit gracefully.
|
||||
if git diff --cached --quiet; then
|
||||
echo "No staged changes after normalization. Skipping commit and push."
|
||||
exit 0
|
||||
fi
|
||||
|
||||
git commit -m "$COMMIT_MESSAGE"
|
||||
git push
|
||||
|
||||
# Optional but useful: ensure all LFS objects are on the remote.
|
||||
# Uncomment if you want belt-and-suspenders uploads.
|
||||
# git lfs push origin --all
|
||||
|
||||
echo "Pruning local LFS files..."
|
||||
git lfs prune --force
|
||||
|
||||
echo "✅ Cleanup complete."
|
||||
167
tools/download.py
Normal file
167
tools/download.py
Normal file
@@ -0,0 +1,167 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
download.py - Download/repair model files and update model.yaml metadata.
|
||||
|
||||
Usage:
|
||||
./tools/download.py models/llama-2-7b-chat/model.yaml
|
||||
|
||||
- Always (re)runs snapshot_download with resume support, so partially
|
||||
fetched directories get completed instead of being skipped.
|
||||
- Updates YAML after each variant with fresh file list + total size.
|
||||
- Tracks LFS via sensible patterns (plus a size threshold fallback).
|
||||
- Emits clear logs so you can see progress per variant.
|
||||
"""
|
||||
|
||||
import sys
|
||||
import os
|
||||
import yaml
|
||||
import subprocess
|
||||
from pathlib import Path
|
||||
from typing import Iterable
|
||||
from huggingface_hub import snapshot_download
|
||||
|
||||
LFS_PATTERNS: list[str] = [
|
||||
# Extensions commonly used for model artifacts
|
||||
"*.safetensors",
|
||||
"*.bin",
|
||||
"*.pt",
|
||||
"*.gguf",
|
||||
"*.onnx",
|
||||
"*.ckpt",
|
||||
"*.tensors",
|
||||
"*.npz",
|
||||
"*.tar",
|
||||
"*.tar.gz",
|
||||
"*.zip",
|
||||
]
|
||||
|
||||
SIZE_THRESHOLD_BYTES = 1_000_000 # 1 MB fallback if a file doesn't match any pattern
|
||||
|
||||
def run(cmd: list[str], check: bool = True) -> None:
|
||||
subprocess.run(cmd, check=check)
|
||||
|
||||
|
||||
def track_lfs_patterns(patterns: Iterable[str]) -> None:
|
||||
"""
|
||||
Track a set of patterns in Git LFS. This is idempotent; it just
|
||||
appends to .gitattributes as needed.
|
||||
"""
|
||||
for patt in patterns:
|
||||
try:
|
||||
run(["git", "lfs", "track", patt], check=False)
|
||||
except Exception:
|
||||
# Non-fatal: we’ll still fall back to per-file size rule below.
|
||||
pass
|
||||
|
||||
|
||||
def list_files_under(root: Path) -> list[Path]:
|
||||
return [p for p in root.rglob("*") if p.is_file()]
|
||||
|
||||
|
||||
def ensure_repo_root() -> None:
|
||||
# best effort: warn (but don’t die) if not in a git repo
|
||||
try:
|
||||
subprocess.run(["git", "rev-parse", "--is-inside-work-tree"], check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
|
||||
except Exception:
|
||||
print("⚠️ Not inside a Git repository? Git/LFS steps may fail.", file=sys.stderr)
|
||||
|
||||
|
||||
def main() -> None:
|
||||
if len(sys.argv) != 2:
|
||||
print(f"Usage: {sys.argv[0]} <path-to-model.yaml>", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
model_yaml_path = Path(sys.argv[1])
|
||||
if not model_yaml_path.exists():
|
||||
print(f"Model YAML not found: {model_yaml_path}", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
ensure_repo_root()
|
||||
|
||||
# Load YAML
|
||||
with open(model_yaml_path, "r", encoding="utf-8") as f:
|
||||
data = yaml.safe_load(f) or {}
|
||||
|
||||
model_dir = model_yaml_path.parent
|
||||
|
||||
# Proactively set up LFS tracking by patterns (idempotent)
|
||||
track_lfs_patterns(LFS_PATTERNS)
|
||||
|
||||
# Iterate formats & variants
|
||||
formats = (data.get("model") or {}).get("formats") or []
|
||||
for fmt in formats:
|
||||
variants = fmt.get("variants") or []
|
||||
for variant in variants:
|
||||
variant_id = variant.get("id")
|
||||
hf_repo = variant.get("hf_repo")
|
||||
|
||||
if not hf_repo or not variant_id:
|
||||
continue
|
||||
|
||||
dest_path = model_dir / variant_id
|
||||
dest_path.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
repo_id = hf_repo.replace("https://huggingface.co/", "")
|
||||
print(f"\n[DL] Downloading/resuming variant '{variant_id}' from '{repo_id}' into '{dest_path}'")
|
||||
|
||||
# Always call snapshot_download with resume enabled. This will:
|
||||
# - no-op for already-complete files
|
||||
# - resume partials
|
||||
# - fetch any missing files
|
||||
try:
|
||||
snapshot_download(
|
||||
repo_id=repo_id,
|
||||
local_dir=str(dest_path),
|
||||
local_dir_use_symlinks=False,
|
||||
resume_download=True, # explicit
|
||||
# You can add allow_patterns / ignore_patterns if you want to filter
|
||||
# allow_patterns=None,
|
||||
# ignore_patterns=None,
|
||||
)
|
||||
except Exception as e:
|
||||
print(f"❌ snapshot_download failed for {variant_id}: {e}", file=sys.stderr)
|
||||
raise
|
||||
|
||||
# Scan files, compute size, and ensure big files are tracked by LFS
|
||||
files_list: list[str] = []
|
||||
total_size_bytes = 0
|
||||
|
||||
for p in list_files_under(dest_path):
|
||||
rel = p.relative_to(model_dir)
|
||||
files_list.append(str(rel))
|
||||
try:
|
||||
size = p.stat().st_size
|
||||
except FileNotFoundError:
|
||||
# if a file was removed mid-scan, skip it
|
||||
continue
|
||||
total_size_bytes += size
|
||||
|
||||
# Fallback: ensure big files get tracked even if patterns miss them
|
||||
if size > SIZE_THRESHOLD_BYTES:
|
||||
# Idempotent; harmless if already tracked.
|
||||
run(["git", "lfs", "track", str(p)], check=False)
|
||||
|
||||
files_list.sort()
|
||||
variant["files"] = files_list
|
||||
variant["size_bytes"] = int(total_size_bytes)
|
||||
|
||||
# Save updated YAML progressively after each variant
|
||||
with open(model_yaml_path, "w", encoding="utf-8") as f:
|
||||
yaml.dump(data, f, sort_keys=False, allow_unicode=True)
|
||||
|
||||
print(f"✅ Updated {model_yaml_path} for variant '{variant_id}'")
|
||||
# Run cleanup script to commit, push, and prune
|
||||
commit_message = f"Add/update model files for {model_dir.name}/{variant_id}"
|
||||
print(f"🧹 Running cleanup for {variant_id}...")
|
||||
try:
|
||||
run(["./tools/cleanup.sh", commit_message], check=True)
|
||||
except subprocess.CalledProcessError as e:
|
||||
print(f"❌ cleanup.sh failed (continue to next variant): {e}", file=sys.stderr)
|
||||
# Decide whether to continue or abort; continuing is usually fine.
|
||||
# raise # uncomment to abort on failure
|
||||
|
||||
print(f"\n✅ Download and YAML update complete for {model_yaml_path}.")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
87
tools/download.sh
Executable file
87
tools/download.sh
Executable file
@@ -0,0 +1,87 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
# download.sh - Download model files and update model.yaml metadata.
|
||||
#
|
||||
# This script reads a model.yaml file, downloads the complete model data from
|
||||
# the specified Hugging Face repository, and then updates the 'files' array
|
||||
# in the YAML with the paths of the downloaded files.
|
||||
#
|
||||
# This approach is more robust than specifying files manually, as it ensures
|
||||
# the YAML reflects the actual downloaded content.
|
||||
#
|
||||
# Usage: ./tools/download.sh models/llama-2-7b/model.yaml
|
||||
|
||||
if [ "$#" -ne 1 ]; then
|
||||
echo "Usage: $0 <path-to-model.yaml>" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
MODEL_YAML="$1"
|
||||
MODEL_DIR=$(dirname "$MODEL_YAML")
|
||||
|
||||
if [ ! -f "$MODEL_YAML" ]; then
|
||||
echo "Model YAML not found: $MODEL_YAML" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Ensure yq is installed
|
||||
if ! command -v yq &> /dev/null; then
|
||||
echo "Error: yq is not installed. Install it with: pip install yq or brew install yq" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Ensure huggingface-cli is installed
|
||||
if ! command -v huggingface-cli &> /dev/null; then
|
||||
echo "Error: huggingface-cli is not installed. Install it with: pip install huggingface_hub" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "Reading metadata from $MODEL_YAML..."
|
||||
|
||||
# Create a temporary file to store the updated YAML content
|
||||
TMP_YAML=$(mktemp)
|
||||
trap 'rm -f "$TMP_YAML"' EXIT
|
||||
|
||||
cp "$MODEL_YAML" "$TMP_YAML"
|
||||
|
||||
# Loop over each format and variant to download files
|
||||
yq -r '.formats[] | . as $format | .variants[] | . as $variant | "\($format.type)\|\($variant.id)\|\($variant.hf_repo)"' "$MODEL_YAML" | while IFS='|' read -r format_type variant_id hf_repo; do
|
||||
echo
|
||||
echo "Processing variant: $variant_id (format: $format_type) from $hf_repo"
|
||||
|
||||
DEST_PATH="$MODEL_DIR/$variant_id"
|
||||
mkdir -p "$DEST_PATH"
|
||||
|
||||
# Check if files are already downloaded by checking for a non-empty directory
|
||||
if [ -n "$(ls -A "$DEST_PATH" 2>/dev/null)" ]; then
|
||||
echo "[OK] Files for $variant_id already exist in $DEST_PATH. Skipping download."
|
||||
else
|
||||
repo_id=${hf_repo#https://huggingface.co/}
|
||||
echo "[DL] Downloading files for $variant_id from $repo_id..."
|
||||
huggingface-cli download "$repo_id" --local-dir "$DEST_PATH" --local-dir-use-symlinks False
|
||||
fi
|
||||
|
||||
# After downloading, list the downloaded files relative to the model directory
|
||||
downloaded_files=()
|
||||
while IFS= read -r file; do
|
||||
downloaded_files+=("$(realpath --relative-to="$MODEL_DIR" "$file")")
|
||||
done < <(find "$DEST_PATH" -type f)
|
||||
|
||||
# Update the YAML file with the list of downloaded files for the current variant
|
||||
echo "Updating $MODEL_YAML with downloaded file paths for $variant_id..."
|
||||
# Create a yq expression to update the files for the specific variant
|
||||
yq_exp="(.formats[] | select(.type == \"$format_type\") | .variants[] | select(.id == \"$variant_id\") | .files) = []"
|
||||
yq eval -i "$yq_exp" "$TMP_YAML"
|
||||
|
||||
for file in "${downloaded_files[@]}"; do
|
||||
yq_exp="(.formats[] | select(.type == \"$format_type\") | .variants[] | select(.id == \"$variant_id\") | .files) += [\"$file\"]"
|
||||
yq eval -i "$yq_exp" "$TMP_YAML"
|
||||
done
|
||||
done
|
||||
|
||||
# Replace the original YAML with the updated one
|
||||
mv "$TMP_YAML" "$MODEL_YAML"
|
||||
|
||||
echo
|
||||
echo "✅ Download and YAML update complete for $MODEL_YAML."
|
||||
34
tools/generate-registry.py
Normal file
34
tools/generate-registry.py
Normal file
@@ -0,0 +1,34 @@
|
||||
#!/usr/bin/env python3
|
||||
import os
|
||||
import sys
|
||||
import yaml
|
||||
import json
|
||||
|
||||
def collect_models(models_root):
|
||||
registry = []
|
||||
for root, dirs, files in os.walk(models_root):
|
||||
if "model.yaml" in files:
|
||||
model_path = os.path.join(root, "model.yaml")
|
||||
try:
|
||||
with open(model_path, 'r', encoding='utf-8') as f:
|
||||
model_data = yaml.safe_load(f)
|
||||
registry.append(model_data)
|
||||
except Exception as e:
|
||||
print(f"❌ Failed to parse {model_path}: {e}", file=sys.stderr)
|
||||
return registry
|
||||
|
||||
if __name__ == "__main__":
|
||||
repo_root = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
|
||||
models_root = os.path.join(repo_root, "models")
|
||||
output_path = os.path.join(repo_root, "registry.json")
|
||||
|
||||
if not os.path.isdir(models_root):
|
||||
print(f"❌ Models directory not found: {models_root}")
|
||||
sys.exit(1)
|
||||
|
||||
registry = collect_models(models_root)
|
||||
|
||||
with open(output_path, 'w', encoding='utf-8') as f:
|
||||
json.dump(registry, f, indent=2, ensure_ascii=False)
|
||||
|
||||
print(f"✅ Registry written to {output_path} with {len(registry)} models.")
|
||||
134
tools/generate_model_yaml.py
Normal file
134
tools/generate_model_yaml.py
Normal file
@@ -0,0 +1,134 @@
|
||||
from huggingface_hub import HfApi, HfFileSystem
|
||||
from pathlib import Path
|
||||
import yaml
|
||||
import requests
|
||||
import os
|
||||
from datetime import datetime
|
||||
from collections import defaultdict
|
||||
import re
|
||||
import sys
|
||||
|
||||
|
||||
def generate_model_bundle(repo_id: str, output_dir: str):
|
||||
api = HfApi()
|
||||
fs = HfFileSystem()
|
||||
model_info = api.model_info(repo_id)
|
||||
|
||||
# Create output path
|
||||
out_path = Path(output_dir)
|
||||
out_path.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# ----- 1. Fetch metadata -----
|
||||
model_card = model_info.cardData or {}
|
||||
tags = model_info.tags or []
|
||||
files = api.list_repo_files(repo_id)
|
||||
|
||||
# ----- 2. Filter files -----
|
||||
model_files = [f for f in files if f.endswith(".gguf") or f.endswith(".safetensors")]
|
||||
tokenizer_files = [f for f in files if "tokenizer" in f.lower()]
|
||||
license_file = next((f for f in files if "license" in f.lower()), None)
|
||||
|
||||
# ----- 3. Fetch README -----
|
||||
readme_url = f"https://huggingface.co/{repo_id}/raw/main/README.md"
|
||||
readme_path = out_path / "README.md"
|
||||
try:
|
||||
r = requests.get(readme_url)
|
||||
r.raise_for_status()
|
||||
readme_path.write_text(r.text)
|
||||
except Exception:
|
||||
readme_path.write_text(f"# README for {repo_id}\n(Not found on HuggingFace)")
|
||||
|
||||
# ----- 4. Fetch LICENSE -----
|
||||
if license_file:
|
||||
license_text = api.hf_hub_download(repo_id, license_file)
|
||||
license_dst = out_path / Path(license_file).name
|
||||
Path(license_dst).write_text(Path(license_text).read_text())
|
||||
|
||||
# ----- 5. Build variant groups -----
|
||||
variants = []
|
||||
shard_groups = defaultdict(list)
|
||||
unsharded_files = []
|
||||
|
||||
for f in model_files:
|
||||
match = re.match(r"(.+)-\d+-of-\d+\.safetensors$", f)
|
||||
if match:
|
||||
prefix = match.group(1)
|
||||
shard_groups[prefix].append(f)
|
||||
else:
|
||||
unsharded_files.append(f)
|
||||
|
||||
for prefix, files_group in shard_groups.items():
|
||||
total_size = sum(fs.info(f"hf://{repo_id}/{f}").get("size", 0) for f in files_group)
|
||||
context_length = 128000 if "128k" in prefix.lower() else 4096
|
||||
bits = 16 # Assume safetensors shards are FP16
|
||||
|
||||
variants.append({
|
||||
"id": prefix,
|
||||
"label": prefix,
|
||||
"bits": bits,
|
||||
"context_length": context_length,
|
||||
"size_bytes": total_size,
|
||||
"hf_repo": f"https://huggingface.co/{repo_id}",
|
||||
"files": sorted(files_group)
|
||||
})
|
||||
|
||||
for f in unsharded_files:
|
||||
ext = Path(f).suffix
|
||||
size_bytes = fs.info(f"hf://{repo_id}/{f}").get("size", 0)
|
||||
bits = 16 if "fp16" in f.lower() or ext == ".safetensors" else 4 if "q4" in f.lower() else 8
|
||||
context_length = 128000 if "128k" in f.lower() else 4096
|
||||
|
||||
variants.append({
|
||||
"id": Path(f).stem,
|
||||
"label": f,
|
||||
"bits": bits,
|
||||
"context_length": context_length,
|
||||
"size_bytes": size_bytes,
|
||||
"hf_repo": f"https://huggingface.co/{repo_id}",
|
||||
"files": [f]
|
||||
})
|
||||
|
||||
# ----- 6. Handle date -----
|
||||
last_modified = model_info.lastModified
|
||||
if isinstance(last_modified, str):
|
||||
last_modified = datetime.fromisoformat(last_modified.replace("Z", "+00:00"))
|
||||
|
||||
# ----- 7. YAML data -----
|
||||
yaml_data = {
|
||||
"model": {
|
||||
"name": repo_id.split("/")[-1],
|
||||
"display_name": model_card.get("title", repo_id),
|
||||
"description": model_card.get("summary", "No description available."),
|
||||
"publisher_original": model_card.get("license", "other"),
|
||||
"publisher_quantized": "Community",
|
||||
"license": model_card.get("license", "other"),
|
||||
"license_url": f"https://huggingface.co/{repo_id}/blob/main/{license_file}" if license_file else "N/A",
|
||||
"publish_date": last_modified.date().isoformat(),
|
||||
"modality": "text",
|
||||
"thinking_model": True,
|
||||
"tokenizer": {"files": tokenizer_files},
|
||||
"architecture": model_card.get("model_architecture", "transformer"),
|
||||
"formats": [{
|
||||
"type": "gguf" if any(f.endswith(".gguf") for f in model_files) else "safetensors",
|
||||
"variants": variants
|
||||
}]
|
||||
}
|
||||
}
|
||||
|
||||
with open(out_path / "model.yaml", "w") as f:
|
||||
yaml.dump(yaml_data, f, sort_keys=False)
|
||||
|
||||
return str(out_path)
|
||||
|
||||
|
||||
# -------- Entry point for CLI --------
|
||||
if __name__ == "__main__":
|
||||
if len(sys.argv) != 3:
|
||||
print("Usage: python generate_model_yaml.py <huggingface/repo-id> <output-folder>")
|
||||
sys.exit(1)
|
||||
|
||||
repo_id = sys.argv[1]
|
||||
output_dir = sys.argv[2]
|
||||
|
||||
output_path = generate_model_bundle(repo_id, output_dir)
|
||||
print(f"✅ Model bundle generated at: {output_path}")
|
||||
60
tools/verify-checksums.py
Normal file
60
tools/verify-checksums.py
Normal file
@@ -0,0 +1,60 @@
|
||||
#!/usr/bin/env python3
|
||||
import sys
|
||||
import os
|
||||
import yaml
|
||||
import hashlib
|
||||
|
||||
def sha256sum(filename, buf_size=65536):
|
||||
sha256 = hashlib.sha256()
|
||||
with open(filename, 'rb') as f:
|
||||
while True:
|
||||
data = f.read(buf_size)
|
||||
if not data:
|
||||
break
|
||||
sha256.update(data)
|
||||
return sha256.hexdigest()
|
||||
|
||||
def verify_model(model_yaml_path):
|
||||
if not os.path.isfile(model_yaml_path):
|
||||
print(f"❌ Model YAML not found: {model_yaml_path}")
|
||||
sys.exit(1)
|
||||
|
||||
with open(model_yaml_path, 'r', encoding='utf-8') as f:
|
||||
model_data = yaml.safe_load(f)
|
||||
|
||||
base_dir = os.path.dirname(model_yaml_path)
|
||||
all_ok = True
|
||||
|
||||
for fmt in model_data.get("formats", []):
|
||||
for variant in fmt.get("variants", []):
|
||||
for file_path in variant.get("files", []):
|
||||
checksum_expected = variant.get("checksums", {}).get(file_path)
|
||||
abs_path = os.path.join(base_dir, file_path)
|
||||
|
||||
if not os.path.isfile(abs_path):
|
||||
print(f"❌ Missing file: {abs_path}")
|
||||
all_ok = False
|
||||
continue
|
||||
|
||||
if not checksum_expected:
|
||||
print(f"⚠️ No checksum for {file_path}, skipping verification.")
|
||||
continue
|
||||
|
||||
checksum_actual = sha256sum(abs_path)
|
||||
if checksum_actual.lower() == checksum_expected.lower():
|
||||
print(f"✅ {file_path} OK")
|
||||
else:
|
||||
print(f"❌ {file_path} checksum mismatch! Expected {checksum_expected}, got {checksum_actual}")
|
||||
all_ok = False
|
||||
|
||||
if all_ok:
|
||||
print("✅ All files verified successfully.")
|
||||
else:
|
||||
print("❌ Verification failed.")
|
||||
|
||||
if __name__ == "__main__":
|
||||
if len(sys.argv) != 2:
|
||||
print(f"Usage: {sys.argv[0]} <path-to-model.yaml>")
|
||||
sys.exit(1)
|
||||
|
||||
verify_model(sys.argv[1])
|
||||
38
tools/watcher.sh
Executable file
38
tools/watcher.sh
Executable file
@@ -0,0 +1,38 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
# watcher.sh - Watches for new models, downloads their files, and tracks large files with Git LFS.
|
||||
#
|
||||
# This script continuously scans the 'models' directory for 'model.yaml' files.
|
||||
# For each model, it runs the 'download.sh' script to fetch model files from
|
||||
# Hugging Face. After downloading, it identifies files larger than 1MB and
|
||||
# ensures they are tracked by Git LFS.
|
||||
#
|
||||
# Usage: ./tools/watcher.sh
|
||||
# Run from the root of the repository.
|
||||
|
||||
# This script should be run from the root of the repository.
|
||||
if [ ! -d ".git" ]; then
|
||||
echo "Error: This script must be run from the root of the repository." >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
while true; do
|
||||
echo "🔍 Starting model discovery cycle..."
|
||||
|
||||
# Find all model.yaml files in the models directory
|
||||
find models -name model.yaml | while read -r MODEL_YAML; do
|
||||
MODEL_DIR=$(dirname "$MODEL_YAML")
|
||||
|
||||
echo "--------------------------------------------------"
|
||||
echo "Processing model in $MODEL_DIR"
|
||||
|
||||
# The download script will now handle LFS tracking and cleanup for each variant.
|
||||
python3 ./tools/download.py "$MODEL_YAML"
|
||||
done
|
||||
|
||||
echo "--------------------------------------------------"
|
||||
echo "✅ Watcher finished a cycle. Sleeping for 60 seconds before next scan."
|
||||
echo "Press [CTRL+C] to stop."
|
||||
sleep 60
|
||||
done
|
||||
Reference in New Issue
Block a user