feat(skills): add media/transcription helpers

2026-02-09 05:19:32 +08:00 · 2025-12-20 12:53:09 +00:00
parent e0cd5650c5
commit e1a3bab7e5
10 changed files with 579 additions and 31 deletions
--- a/skills/brave-search/SKILL.md
+++ b/skills/brave-search/SKILL.md
@@ -1,35 +1,29 @@
 ---
 name: brave-search
 description: Web search and content extraction via Brave Search API.
-metadata: {"clawdis":{"requires":{"bins":["node","npm"],"env":["BRAVE_API_KEY"]},"primaryEnv":"BRAVE_API_KEY","install":[{"id":"node-brew","kind":"brew","formula":"node","bins":["node","npm"],"label":"Install Node.js (brew)"}]}}
+metadata: {"clawdis":{"requires":{"bins":["node"],"env":["BRAVE_API_KEY"]},"primaryEnv":"BRAVE_API_KEY"}}
 ---

 # Brave Search

-Headless web search and content extraction using Brave Search. No browser required.
-
-## Setup (run once)
-
-```bash
-cd ~/Projects/agent-scripts/skills/brave-search
-npm ci
-```
-
-Needs env: `BRAVE_API_KEY`.
+Headless web search (and lightweight content extraction) using Brave Search API. No browser required.

 ## Search

 ```bash
-./search.js "query"                    # Basic search (5 results)
-./search.js "query" -n 10              # More results
-./search.js "query" --content          # Include page content as markdown
-./search.js "query" -n 3 --content     # Combined
+node {baseDir}/scripts/search.mjs "query"
+node {baseDir}/scripts/search.mjs "query" -n 10
+node {baseDir}/scripts/search.mjs "query" --content
+node {baseDir}/scripts/search.mjs "query" -n 3 --content
 ```

-## Extract Page Content
+## Extract a page

 ```bash
-./content.js https://example.com/article
+node {baseDir}/scripts/content.mjs "https://example.com/article"
 ```

-Fetches a URL and extracts readable content as markdown.
+Notes:
+- Needs `BRAVE_API_KEY`.
+- Content extraction is best-effort (good for articles; not for app-like sites).
+ - If a site is blocked or too JS-heavy, prefer the `summarize` skill (it can use a Firecrawl fallback).
--- a/skills/brave-search/scripts/content.mjs
+++ b/skills/brave-search/scripts/content.mjs
@@ -0,0 +1,53 @@
+#!/usr/bin/env node
+
+function usage() {
+  console.error(`Usage: content.mjs <url>`);
+  process.exit(2);
+}
+
+export async function fetchAsMarkdown(url) {
+  const resp = await fetch(url, {
+    headers: { "User-Agent": "clawdis-brave-search/1.0" },
+  });
+  const html = await resp.text();
+
+  // Very lightweight “readability-ish” extraction without dependencies:
+  // - drop script/style/nav/footer
+  // - strip tags
+  // - keep paragraphs
+  const cleaned = html
+    .replace(/<script[\s\S]*?<\/script>/gi, " ")
+    .replace(/<style[\s\S]*?<\/style>/gi, " ")
+    .replace(/<(nav|footer|header)[\s\S]*?<\/\1>/gi, " ")
+    .replace(/<br\s*\/?>/gi, "\n")
+    .replace(/<\/p>/gi, "\n\n")
+    .replace(/<\/div>/gi, "\n")
+    .replace(/<[^>]+>/g, " ")
+    .replace(/&nbsp;/g, " ")
+    .replace(/&amp;/g, "&")
+    .replace(/&lt;/g, "<")
+    .replace(/&gt;/g, ">")
+    .replace(/&quot;/g, '"')
+    .replace(/&#39;/g, "'")
+    .replace(/\s+\n/g, "\n")
+    .replace(/\n{3,}/g, "\n\n")
+    .replace(/[ \t]{2,}/g, " ")
+    .trim();
+
+  if (!resp.ok) {
+    return `> Fetch failed (${resp.status}).\n\n${cleaned.slice(0, 2000)}\n`;
+  }
+
+  const paras = cleaned
+    .split("\n\n")
+    .map((p) => p.trim())
+    .filter(Boolean)
+    .slice(0, 30);
+
+  return paras.map((p) => `- ${p}`).join("\n") + "\n";
+}
+
+const args = process.argv.slice(2);
+if (args.length === 0 || args[0] === "-h" || args[0] === "--help") usage();
+const url = args[0];
+process.stdout.write(await fetchAsMarkdown(url));
--- a/skills/brave-search/scripts/search.mjs
+++ b/skills/brave-search/scripts/search.mjs
@@ -0,0 +1,80 @@
+#!/usr/bin/env node
+import { readFileSync } from "node:fs";
+
+function usage() {
+  console.error(`Usage: search.mjs "query" [-n 5] [--content]`);
+  process.exit(2);
+}
+
+const args = process.argv.slice(2);
+if (args.length === 0 || args[0] === "-h" || args[0] === "--help") usage();
+
+const query = args[0];
+let n = 5;
+let withContent = false;
+
+for (let i = 1; i < args.length; i++) {
+  const a = args[i];
+  if (a === "-n") {
+    n = Number.parseInt(args[i + 1] ?? "5", 10);
+    i++;
+    continue;
+  }
+  if (a === "--content") {
+    withContent = true;
+    continue;
+  }
+  console.error(`Unknown arg: ${a}`);
+  usage();
+}
+
+const apiKey = (process.env.BRAVE_API_KEY ?? "").trim();
+if (!apiKey) {
+  console.error("Missing BRAVE_API_KEY");
+  process.exit(1);
+}
+
+const endpoint = new URL("https://api.search.brave.com/res/v1/web/search");
+endpoint.searchParams.set("q", query);
+endpoint.searchParams.set("count", String(Math.max(1, Math.min(n, 20))));
+endpoint.searchParams.set("text_decorations", "false");
+endpoint.searchParams.set("safesearch", "moderate");
+
+const resp = await fetch(endpoint, {
+  headers: {
+    Accept: "application/json",
+    "X-Subscription-Token": apiKey,
+  },
+});
+
+if (!resp.ok) {
+  const text = await resp.text().catch(() => "");
+  throw new Error(`Brave Search failed (${resp.status}): ${text}`);
+}
+
+const data = await resp.json();
+const results = (data?.web?.results ?? []).slice(0, n);
+
+const lines = [];
+for (const r of results) {
+  const title = String(r?.title ?? "").trim();
+  const url = String(r?.url ?? "").trim();
+  const desc = String(r?.description ?? "").trim();
+  if (!title || !url) continue;
+  lines.push(`- ${title}\n  ${url}${desc ? `\n  ${desc}` : ""}`);
+}
+
+process.stdout.write(lines.join("\n\n") + "\n");
+
+if (!withContent) process.exit(0);
+
+process.stdout.write("\n---\n\n");
+for (const r of results) {
+  const title = String(r?.title ?? "").trim();
+  const url = String(r?.url ?? "").trim();
+  if (!url) continue;
+  process.stdout.write(`# ${title || url}\n${url}\n\n`);
+  const child = await import("./content.mjs");
+  const text = await child.fetchAsMarkdown(url);
+  process.stdout.write(text.trimEnd() + "\n\n");
+}
--- a/skills/nano-pdf/SKILL.md
+++ b/skills/nano-pdf/SKILL.md
@@ -0,0 +1,19 @@
+---
+name: nano-pdf
+description: Edit PDFs with natural-language instructions using the nano-pdf CLI.
+metadata: {"clawdis":{"requires":{"bins":["nano-pdf"]},"install":[{"id":"pipx","kind":"shell","command":"python3 -m pip install --user pipx && python3 -m pipx ensurepath && pipx install nano-pdf","bins":["nano-pdf"],"label":"Install nano-pdf (pipx)"},{"id":"pip","kind":"shell","command":"python3 -m pip install --user nano-pdf","bins":["nano-pdf"],"label":"Install nano-pdf (pip --user)"}]}}
+---
+
+# nano-pdf
+
+Use `nano-pdf` to apply edits to a specific page in a PDF using a natural-language instruction.
+
+## Quick start
+
+```bash
+nano-pdf edit deck.pdf 1 "Change the title to 'Q3 Results' and fix the typo in the subtitle"
+```
+
+Notes:
+- Page numbers are 0-based or 1-based depending on the tool’s version/config; if the result looks off by one, retry with the other.
+- Always sanity-check the output PDF before sending it out.
--- a/skills/openai-image-gen/SKILL.md
+++ b/skills/openai-image-gen/SKILL.md
@@ -6,31 +6,25 @@ metadata: {"clawdis":{"requires":{"bins":["python3"],"env":["OPENAI_API_KEY"]},"

 # OpenAI Image Gen

-Generate a handful of "random but structured" prompts and render them via OpenAI Images API.
-
-## Setup
-
- Needs env: `OPENAI_API_KEY`
+Generate a handful of “random but structured” prompts and render them via the OpenAI Images API.

 ## Run

-From any directory (outputs to `~/Projects/tmp/...` when present; else `./tmp/...`):
-
 ```bash
-python3 ~/Projects/agent-scripts/skills/openai-image-gen/scripts/gen.py
-open ~/Projects/tmp/openai-image-gen-*/index.html
+python3 {baseDir}/scripts/gen.py
+open ./tmp/openai-image-gen-*/index.html
 ```

 Useful flags:

 ```bash
-python3 ~/Projects/agent-scripts/skills/openai-image-gen/scripts/gen.py --count 16 --model gpt-image-1.5
-python3 ~/Projects/agent-scripts/skills/openai-image-gen/scripts/gen.py --prompt "ultra-detailed studio photo of a lobster astronaut" --count 4
-python3 ~/Projects/agent-scripts/skills/openai-image-gen/scripts/gen.py --size 1536x1024 --quality high --out-dir ./out/images
+python3 {baseDir}/scripts/gen.py --count 16 --model gpt-image-1
+python3 {baseDir}/scripts/gen.py --prompt "ultra-detailed studio photo of a lobster astronaut" --count 4
+python3 {baseDir}/scripts/gen.py --size 1536x1024 --quality high --out-dir ./out/images
 ```

 ## Output

 - `*.png` images
- `prompts.json` (prompt to file mapping)
+- `prompts.json` (prompt → file mapping)
 - `index.html` (thumbnail gallery)
--- a/skills/openai-image-gen/scripts/gen.py
+++ b/skills/openai-image-gen/scripts/gen.py
@@ -0,0 +1,172 @@
+#!/usr/bin/env python3
+import argparse
+import base64
+import datetime as dt
+import json
+import os
+import random
+import re
+import sys
+import urllib.error
+import urllib.request
+from pathlib import Path
+
+
+def slugify(text: str) -> str:
+    text = text.lower().strip()
+    text = re.sub(r"[^a-z0-9]+", "-", text)
+    text = re.sub(r"-{2,}", "-", text).strip("-")
+    return text or "image"
+
+
+def default_out_dir() -> Path:
+    now = dt.datetime.now().strftime("%Y-%m-%d-%H-%M-%S")
+    base = Path("./tmp")
+    base.mkdir(parents=True, exist_ok=True)
+    return base / f"openai-image-gen-{now}"
+
+
+def pick_prompts(count: int) -> list[str]:
+    subjects = [
+        "a lobster astronaut",
+        "a brutalist lighthouse",
+        "a cozy reading nook",
+        "a cyberpunk noodle shop",
+        "a Vienna street at dusk",
+        "a minimalist product photo",
+        "a surreal underwater library",
+    ]
+    styles = [
+        "ultra-detailed studio photo",
+        "35mm film still",
+        "isometric illustration",
+        "editorial photography",
+        "soft watercolor",
+        "architectural render",
+        "high-contrast monochrome",
+    ]
+    lighting = [
+        "golden hour",
+        "overcast soft light",
+        "neon lighting",
+        "dramatic rim light",
+        "candlelight",
+        "foggy atmosphere",
+    ]
+    prompts: list[str] = []
+    for _ in range(count):
+        prompts.append(
+            f"{random.choice(styles)} of {random.choice(subjects)}, {random.choice(lighting)}"
+        )
+    return prompts
+
+
+def request_images(
+    api_key: str,
+    prompt: str,
+    model: str,
+    size: str,
+    quality: str,
+) -> dict:
+    url = "https://api.openai.com/v1/images/generations"
+    body = json.dumps(
+        {
+            "model": model,
+            "prompt": prompt,
+            "size": size,
+            "quality": quality,
+            "n": 1,
+            "response_format": "b64_json",
+        }
+    ).encode("utf-8")
+    req = urllib.request.Request(
+        url,
+        method="POST",
+        headers={
+            "Authorization": f"Bearer {api_key}",
+            "Content-Type": "application/json",
+        },
+        data=body,
+    )
+    try:
+        with urllib.request.urlopen(req, timeout=300) as resp:
+            return json.loads(resp.read().decode("utf-8"))
+    except urllib.error.HTTPError as e:
+        payload = e.read().decode("utf-8", errors="replace")
+        raise RuntimeError(f"OpenAI Images API failed ({e.code}): {payload}") from e
+
+
+def write_gallery(out_dir: Path, items: list[dict]) -> None:
+    thumbs = "\n".join(
+        [
+            f"""
+<figure>
+  <a href="{it["file"]}"><img src="{it["file"]}" loading="lazy" /></a>
+  <figcaption>{it["prompt"]}</figcaption>
+</figure>
+""".strip()
+            for it in items
+        ]
+    )
+    html = f"""<!doctype html>
+<meta charset="utf-8" />
+<title>openai-image-gen</title>
+<style>
+  :root {{ color-scheme: dark; }}
+  body {{ margin: 24px; font: 14px/1.4 ui-sans-serif, system-ui; background: #0b0f14; color: #e8edf2; }}
+  h1 {{ font-size: 18px; margin: 0 0 16px; }}
+  .grid {{ display: grid; grid-template-columns: repeat(auto-fill, minmax(240px, 1fr)); gap: 16px; }}
+  figure {{ margin: 0; padding: 12px; border: 1px solid #1e2a36; border-radius: 14px; background: #0f1620; }}
+  img {{ width: 100%; height: auto; border-radius: 10px; display: block; }}
+  figcaption {{ margin-top: 10px; color: #b7c2cc; }}
+  code {{ color: #9cd1ff; }}
+</style>
+<h1>openai-image-gen</h1>
+<p>Output: <code>{out_dir.as_posix()}</code></p>
+<div class="grid">
+{thumbs}
+</div>
+"""
+    (out_dir / "index.html").write_text(html, encoding="utf-8")
+
+
+def main() -> int:
+    ap = argparse.ArgumentParser(description="Generate images via OpenAI Images API.")
+    ap.add_argument("--prompt", help="Single prompt. If omitted, random prompts are generated.")
+    ap.add_argument("--count", type=int, default=8, help="How many images to generate.")
+    ap.add_argument("--model", default="gpt-image-1", help="Image model id.")
+    ap.add_argument("--size", default="1024x1024", help="Image size (e.g. 1024x1024, 1536x1024).")
+    ap.add_argument("--quality", default="high", help="Image quality (varies by model).")
+    ap.add_argument("--out-dir", default="", help="Output directory (default: ./tmp/openai-image-gen-<ts>).")
+    args = ap.parse_args()
+
+    api_key = (os.environ.get("OPENAI_API_KEY") or "").strip()
+    if not api_key:
+        print("Missing OPENAI_API_KEY", file=sys.stderr)
+        return 2
+
+    out_dir = Path(args.out_dir).expanduser() if args.out_dir else default_out_dir()
+    out_dir.mkdir(parents=True, exist_ok=True)
+
+    prompts = [args.prompt] * args.count if args.prompt else pick_prompts(args.count)
+
+    items: list[dict] = []
+    for idx, prompt in enumerate(prompts, start=1):
+        print(f"[{idx}/{len(prompts)}] {prompt}")
+        res = request_images(api_key, prompt, args.model, args.size, args.quality)
+        b64 = res.get("data", [{}])[0].get("b64_json")
+        if not b64:
+            raise RuntimeError(f"Unexpected response: {json.dumps(res)[:400]}")
+        png = base64.b64decode(b64)
+        filename = f"{idx:03d}-{slugify(prompt)[:40]}.png"
+        (out_dir / filename).write_bytes(png)
+        items.append({"prompt": prompt, "file": filename})
+
+    (out_dir / "prompts.json").write_text(json.dumps(items, indent=2), encoding="utf-8")
+    write_gallery(out_dir, items)
+    print(f"\nWrote: {(out_dir / 'index.html').as_posix()}")
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
--- a/skills/openai-whisper-api/SKILL.md
+++ b/skills/openai-whisper-api/SKILL.md
@@ -0,0 +1,42 @@
+---
+name: openai-whisper-api
+description: Transcribe audio via OpenAI Audio Transcriptions API (Whisper).
+metadata: {"clawdis":{"requires":{"bins":["curl"],"env":["OPENAI_API_KEY"]},"primaryEnv":"OPENAI_API_KEY"}}
+---
+
+# OpenAI Whisper API (curl)
+
+Transcribe an audio file via OpenAI’s `/v1/audio/transcriptions` endpoint.
+
+## Quick start
+
+```bash
+{baseDir}/scripts/transcribe.sh /path/to/audio.m4a
+```
+
+Defaults:
+- Model: `whisper-1`
+- Output: `<input>.txt`
+
+## Useful flags
+
+```bash
+{baseDir}/scripts/transcribe.sh /path/to/audio.ogg --model whisper-1 --out /tmp/transcript.txt
+{baseDir}/scripts/transcribe.sh /path/to/audio.m4a --language en
+{baseDir}/scripts/transcribe.sh /path/to/audio.m4a --prompt "Speaker names: Peter, Daniel"
+{baseDir}/scripts/transcribe.sh /path/to/audio.m4a --json --out /tmp/transcript.json
+```
+
+## API key
+
+Set `OPENAI_API_KEY`, or configure it in `~/.clawdis/clawdis.json`:
+
+```json5
+{
+  skills: {
+    "openai-whisper-api": {
+      apiKey: "OPENAI_KEY_HERE"
+    }
+  }
+}
+```
--- a/skills/openai-whisper-api/scripts/transcribe.sh
+++ b/skills/openai-whisper-api/scripts/transcribe.sh
@@ -0,0 +1,85 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+usage() {
+  cat >&2 <<'EOF'
+Usage:
+  transcribe.sh <audio-file> [--model whisper-1] [--out /path/to/out.txt] [--language en] [--prompt "hint"] [--json]
+EOF
+  exit 2
+}
+
+if [[ "${1:-}" == "" || "${1:-}" == "-h" || "${1:-}" == "--help" ]]; then
+  usage
+fi
+
+in="${1:-}"
+shift || true
+
+model="whisper-1"
+out=""
+language=""
+prompt=""
+response_format="text"
+
+while [[ $# -gt 0 ]]; do
+  case "$1" in
+    --model)
+      model="${2:-}"
+      shift 2
+      ;;
+    --out)
+      out="${2:-}"
+      shift 2
+      ;;
+    --language)
+      language="${2:-}"
+      shift 2
+      ;;
+    --prompt)
+      prompt="${2:-}"
+      shift 2
+      ;;
+    --json)
+      response_format="json"
+      shift 1
+      ;;
+    *)
+      echo "Unknown arg: $1" >&2
+      usage
+      ;;
+  esac
+done
+
+if [[ ! -f "$in" ]]; then
+  echo "File not found: $in" >&2
+  exit 1
+fi
+
+if [[ "${OPENAI_API_KEY:-}" == "" ]]; then
+  echo "Missing OPENAI_API_KEY" >&2
+  exit 1
+fi
+
+if [[ "$out" == "" ]]; then
+  base="${in%.*}"
+  if [[ "$response_format" == "json" ]]; then
+    out="${base}.json"
+  else
+    out="${base}.txt"
+  fi
+fi
+
+mkdir -p "$(dirname "$out")"
+
+curl -sS https://api.openai.com/v1/audio/transcriptions \
+  -H "Authorization: Bearer $OPENAI_API_KEY" \
+  -H "Accept: application/json" \
+  -F "file=@${in}" \
+  -F "model=${model}" \
+  -F "response_format=${response_format}" \
+  ${language:+-F "language=${language}"} \
+  ${prompt:+-F "prompt=${prompt}"} \
+  >"$out"
+
+echo "$out"
--- a/skills/video-frames/SKILL.md
+++ b/skills/video-frames/SKILL.md
@@ -0,0 +1,28 @@
+---
+name: video-frames
+description: Extract frames or short clips from videos using ffmpeg.
+metadata: {"clawdis":{"requires":{"bins":["ffmpeg"]},"install":[{"id":"brew","kind":"brew","formula":"ffmpeg","bins":["ffmpeg"],"label":"Install ffmpeg (brew)"}]}}
+---
+
+# Video Frames (ffmpeg)
+
+Extract a single frame from a video, or create quick thumbnails for inspection.
+
+## Quick start
+
+First frame:
+
+```bash
+{baseDir}/scripts/frame.sh /path/to/video.mp4 --out /tmp/frame.jpg
+```
+
+At a timestamp:
+
+```bash
+{baseDir}/scripts/frame.sh /path/to/video.mp4 --time 00:00:10 --out /tmp/frame-10s.jpg
+```
+
+## Notes
+
+- Prefer `--time` for “what is happening around here?”.
+- Use a `.jpg` for quick share; use `.png` for crisp UI frames.
--- a/skills/video-frames/scripts/frame.sh
+++ b/skills/video-frames/scripts/frame.sh
@@ -0,0 +1,81 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+usage() {
+  cat >&2 <<'EOF'
+Usage:
+  frame.sh <video-file> [--time HH:MM:SS] [--index N] --out /path/to/frame.jpg
+
+Examples:
+  frame.sh video.mp4 --out /tmp/frame.jpg
+  frame.sh video.mp4 --time 00:00:10 --out /tmp/frame-10s.jpg
+  frame.sh video.mp4 --index 0 --out /tmp/frame0.png
+EOF
+  exit 2
+}
+
+if [[ "${1:-}" == "" || "${1:-}" == "-h" || "${1:-}" == "--help" ]]; then
+  usage
+fi
+
+in="${1:-}"
+shift || true
+
+time=""
+index=""
+out=""
+
+while [[ $# -gt 0 ]]; do
+  case "$1" in
+    --time)
+      time="${2:-}"
+      shift 2
+      ;;
+    --index)
+      index="${2:-}"
+      shift 2
+      ;;
+    --out)
+      out="${2:-}"
+      shift 2
+      ;;
+    *)
+      echo "Unknown arg: $1" >&2
+      usage
+      ;;
+  esac
+done
+
+if [[ ! -f "$in" ]]; then
+  echo "File not found: $in" >&2
+  exit 1
+fi
+
+if [[ "$out" == "" ]]; then
+  echo "Missing --out" >&2
+  usage
+fi
+
+mkdir -p "$(dirname "$out")"
+
+if [[ "$index" != "" ]]; then
+  ffmpeg -hide_banner -loglevel error -y \
+    -i "$in" \
+    -vf "select=eq(n\\,${index})" \
+    -vframes 1 \
+    "$out"
+elif [[ "$time" != "" ]]; then
+  ffmpeg -hide_banner -loglevel error -y \
+    -ss "$time" \
+    -i "$in" \
+    -frames:v 1 \
+    "$out"
+else
+  ffmpeg -hide_banner -loglevel error -y \
+    -i "$in" \
+    -vf "select=eq(n\\,0)" \
+    -vframes 1 \
+    "$out"
+fi
+
+echo "$out"