feat(skills): add media/transcription helpers

This commit is contained in:
Peter Steinberger
2025-12-20 12:53:09 +00:00
parent e0cd5650c5
commit e1a3bab7e5
10 changed files with 579 additions and 31 deletions

View File

@@ -1,35 +1,29 @@
---
name: brave-search
description: Web search and content extraction via Brave Search API.
metadata: {"clawdis":{"requires":{"bins":["node","npm"],"env":["BRAVE_API_KEY"]},"primaryEnv":"BRAVE_API_KEY","install":[{"id":"node-brew","kind":"brew","formula":"node","bins":["node","npm"],"label":"Install Node.js (brew)"}]}}
metadata: {"clawdis":{"requires":{"bins":["node"],"env":["BRAVE_API_KEY"]},"primaryEnv":"BRAVE_API_KEY"}}
---
# Brave Search
Headless web search and content extraction using Brave Search. No browser required.
## Setup (run once)
```bash
cd ~/Projects/agent-scripts/skills/brave-search
npm ci
```
Needs env: `BRAVE_API_KEY`.
Headless web search (and lightweight content extraction) using Brave Search API. No browser required.
## Search
```bash
./search.js "query" # Basic search (5 results)
./search.js "query" -n 10 # More results
./search.js "query" --content # Include page content as markdown
./search.js "query" -n 3 --content # Combined
node {baseDir}/scripts/search.mjs "query"
node {baseDir}/scripts/search.mjs "query" -n 10
node {baseDir}/scripts/search.mjs "query" --content
node {baseDir}/scripts/search.mjs "query" -n 3 --content
```
## Extract Page Content
## Extract a page
```bash
./content.js https://example.com/article
node {baseDir}/scripts/content.mjs "https://example.com/article"
```
Fetches a URL and extracts readable content as markdown.
Notes:
- Needs `BRAVE_API_KEY`.
- Content extraction is best-effort (good for articles; not for app-like sites).
- If a site is blocked or too JS-heavy, prefer the `summarize` skill (it can use a Firecrawl fallback).

View File

@@ -0,0 +1,53 @@
#!/usr/bin/env node
function usage() {
console.error(`Usage: content.mjs <url>`);
process.exit(2);
}
export async function fetchAsMarkdown(url) {
const resp = await fetch(url, {
headers: { "User-Agent": "clawdis-brave-search/1.0" },
});
const html = await resp.text();
// Very lightweight “readability-ish” extraction without dependencies:
// - drop script/style/nav/footer
// - strip tags
// - keep paragraphs
const cleaned = html
.replace(/<script[\s\S]*?<\/script>/gi, " ")
.replace(/<style[\s\S]*?<\/style>/gi, " ")
.replace(/<(nav|footer|header)[\s\S]*?<\/\1>/gi, " ")
.replace(/<br\s*\/?>/gi, "\n")
.replace(/<\/p>/gi, "\n\n")
.replace(/<\/div>/gi, "\n")
.replace(/<[^>]+>/g, " ")
.replace(/&nbsp;/g, " ")
.replace(/&amp;/g, "&")
.replace(/&lt;/g, "<")
.replace(/&gt;/g, ">")
.replace(/&quot;/g, '"')
.replace(/&#39;/g, "'")
.replace(/\s+\n/g, "\n")
.replace(/\n{3,}/g, "\n\n")
.replace(/[ \t]{2,}/g, " ")
.trim();
if (!resp.ok) {
return `> Fetch failed (${resp.status}).\n\n${cleaned.slice(0, 2000)}\n`;
}
const paras = cleaned
.split("\n\n")
.map((p) => p.trim())
.filter(Boolean)
.slice(0, 30);
return paras.map((p) => `- ${p}`).join("\n") + "\n";
}
const args = process.argv.slice(2);
if (args.length === 0 || args[0] === "-h" || args[0] === "--help") usage();
const url = args[0];
process.stdout.write(await fetchAsMarkdown(url));

View File

@@ -0,0 +1,80 @@
#!/usr/bin/env node
import { readFileSync } from "node:fs";
function usage() {
console.error(`Usage: search.mjs "query" [-n 5] [--content]`);
process.exit(2);
}
const args = process.argv.slice(2);
if (args.length === 0 || args[0] === "-h" || args[0] === "--help") usage();
const query = args[0];
let n = 5;
let withContent = false;
for (let i = 1; i < args.length; i++) {
const a = args[i];
if (a === "-n") {
n = Number.parseInt(args[i + 1] ?? "5", 10);
i++;
continue;
}
if (a === "--content") {
withContent = true;
continue;
}
console.error(`Unknown arg: ${a}`);
usage();
}
const apiKey = (process.env.BRAVE_API_KEY ?? "").trim();
if (!apiKey) {
console.error("Missing BRAVE_API_KEY");
process.exit(1);
}
const endpoint = new URL("https://api.search.brave.com/res/v1/web/search");
endpoint.searchParams.set("q", query);
endpoint.searchParams.set("count", String(Math.max(1, Math.min(n, 20))));
endpoint.searchParams.set("text_decorations", "false");
endpoint.searchParams.set("safesearch", "moderate");
const resp = await fetch(endpoint, {
headers: {
Accept: "application/json",
"X-Subscription-Token": apiKey,
},
});
if (!resp.ok) {
const text = await resp.text().catch(() => "");
throw new Error(`Brave Search failed (${resp.status}): ${text}`);
}
const data = await resp.json();
const results = (data?.web?.results ?? []).slice(0, n);
const lines = [];
for (const r of results) {
const title = String(r?.title ?? "").trim();
const url = String(r?.url ?? "").trim();
const desc = String(r?.description ?? "").trim();
if (!title || !url) continue;
lines.push(`- ${title}\n ${url}${desc ? `\n ${desc}` : ""}`);
}
process.stdout.write(lines.join("\n\n") + "\n");
if (!withContent) process.exit(0);
process.stdout.write("\n---\n\n");
for (const r of results) {
const title = String(r?.title ?? "").trim();
const url = String(r?.url ?? "").trim();
if (!url) continue;
process.stdout.write(`# ${title || url}\n${url}\n\n`);
const child = await import("./content.mjs");
const text = await child.fetchAsMarkdown(url);
process.stdout.write(text.trimEnd() + "\n\n");
}

19
skills/nano-pdf/SKILL.md Normal file
View File

@@ -0,0 +1,19 @@
---
name: nano-pdf
description: Edit PDFs with natural-language instructions using the nano-pdf CLI.
metadata: {"clawdis":{"requires":{"bins":["nano-pdf"]},"install":[{"id":"pipx","kind":"shell","command":"python3 -m pip install --user pipx && python3 -m pipx ensurepath && pipx install nano-pdf","bins":["nano-pdf"],"label":"Install nano-pdf (pipx)"},{"id":"pip","kind":"shell","command":"python3 -m pip install --user nano-pdf","bins":["nano-pdf"],"label":"Install nano-pdf (pip --user)"}]}}
---
# nano-pdf
Use `nano-pdf` to apply edits to a specific page in a PDF using a natural-language instruction.
## Quick start
```bash
nano-pdf edit deck.pdf 1 "Change the title to 'Q3 Results' and fix the typo in the subtitle"
```
Notes:
- Page numbers are 0-based or 1-based depending on the tools version/config; if the result looks off by one, retry with the other.
- Always sanity-check the output PDF before sending it out.

View File

@@ -6,31 +6,25 @@ metadata: {"clawdis":{"requires":{"bins":["python3"],"env":["OPENAI_API_KEY"]},"
# OpenAI Image Gen
Generate a handful of "random but structured" prompts and render them via OpenAI Images API.
## Setup
- Needs env: `OPENAI_API_KEY`
Generate a handful of random but structured prompts and render them via the OpenAI Images API.
## Run
From any directory (outputs to `~/Projects/tmp/...` when present; else `./tmp/...`):
```bash
python3 ~/Projects/agent-scripts/skills/openai-image-gen/scripts/gen.py
open ~/Projects/tmp/openai-image-gen-*/index.html
python3 {baseDir}/scripts/gen.py
open ./tmp/openai-image-gen-*/index.html
```
Useful flags:
```bash
python3 ~/Projects/agent-scripts/skills/openai-image-gen/scripts/gen.py --count 16 --model gpt-image-1.5
python3 ~/Projects/agent-scripts/skills/openai-image-gen/scripts/gen.py --prompt "ultra-detailed studio photo of a lobster astronaut" --count 4
python3 ~/Projects/agent-scripts/skills/openai-image-gen/scripts/gen.py --size 1536x1024 --quality high --out-dir ./out/images
python3 {baseDir}/scripts/gen.py --count 16 --model gpt-image-1
python3 {baseDir}/scripts/gen.py --prompt "ultra-detailed studio photo of a lobster astronaut" --count 4
python3 {baseDir}/scripts/gen.py --size 1536x1024 --quality high --out-dir ./out/images
```
## Output
- `*.png` images
- `prompts.json` (prompt to file mapping)
- `prompts.json` (prompt file mapping)
- `index.html` (thumbnail gallery)

View File

@@ -0,0 +1,172 @@
#!/usr/bin/env python3
import argparse
import base64
import datetime as dt
import json
import os
import random
import re
import sys
import urllib.error
import urllib.request
from pathlib import Path
def slugify(text: str) -> str:
text = text.lower().strip()
text = re.sub(r"[^a-z0-9]+", "-", text)
text = re.sub(r"-{2,}", "-", text).strip("-")
return text or "image"
def default_out_dir() -> Path:
now = dt.datetime.now().strftime("%Y-%m-%d-%H-%M-%S")
base = Path("./tmp")
base.mkdir(parents=True, exist_ok=True)
return base / f"openai-image-gen-{now}"
def pick_prompts(count: int) -> list[str]:
subjects = [
"a lobster astronaut",
"a brutalist lighthouse",
"a cozy reading nook",
"a cyberpunk noodle shop",
"a Vienna street at dusk",
"a minimalist product photo",
"a surreal underwater library",
]
styles = [
"ultra-detailed studio photo",
"35mm film still",
"isometric illustration",
"editorial photography",
"soft watercolor",
"architectural render",
"high-contrast monochrome",
]
lighting = [
"golden hour",
"overcast soft light",
"neon lighting",
"dramatic rim light",
"candlelight",
"foggy atmosphere",
]
prompts: list[str] = []
for _ in range(count):
prompts.append(
f"{random.choice(styles)} of {random.choice(subjects)}, {random.choice(lighting)}"
)
return prompts
def request_images(
api_key: str,
prompt: str,
model: str,
size: str,
quality: str,
) -> dict:
url = "https://api.openai.com/v1/images/generations"
body = json.dumps(
{
"model": model,
"prompt": prompt,
"size": size,
"quality": quality,
"n": 1,
"response_format": "b64_json",
}
).encode("utf-8")
req = urllib.request.Request(
url,
method="POST",
headers={
"Authorization": f"Bearer {api_key}",
"Content-Type": "application/json",
},
data=body,
)
try:
with urllib.request.urlopen(req, timeout=300) as resp:
return json.loads(resp.read().decode("utf-8"))
except urllib.error.HTTPError as e:
payload = e.read().decode("utf-8", errors="replace")
raise RuntimeError(f"OpenAI Images API failed ({e.code}): {payload}") from e
def write_gallery(out_dir: Path, items: list[dict]) -> None:
thumbs = "\n".join(
[
f"""
<figure>
<a href="{it["file"]}"><img src="{it["file"]}" loading="lazy" /></a>
<figcaption>{it["prompt"]}</figcaption>
</figure>
""".strip()
for it in items
]
)
html = f"""<!doctype html>
<meta charset="utf-8" />
<title>openai-image-gen</title>
<style>
:root {{ color-scheme: dark; }}
body {{ margin: 24px; font: 14px/1.4 ui-sans-serif, system-ui; background: #0b0f14; color: #e8edf2; }}
h1 {{ font-size: 18px; margin: 0 0 16px; }}
.grid {{ display: grid; grid-template-columns: repeat(auto-fill, minmax(240px, 1fr)); gap: 16px; }}
figure {{ margin: 0; padding: 12px; border: 1px solid #1e2a36; border-radius: 14px; background: #0f1620; }}
img {{ width: 100%; height: auto; border-radius: 10px; display: block; }}
figcaption {{ margin-top: 10px; color: #b7c2cc; }}
code {{ color: #9cd1ff; }}
</style>
<h1>openai-image-gen</h1>
<p>Output: <code>{out_dir.as_posix()}</code></p>
<div class="grid">
{thumbs}
</div>
"""
(out_dir / "index.html").write_text(html, encoding="utf-8")
def main() -> int:
ap = argparse.ArgumentParser(description="Generate images via OpenAI Images API.")
ap.add_argument("--prompt", help="Single prompt. If omitted, random prompts are generated.")
ap.add_argument("--count", type=int, default=8, help="How many images to generate.")
ap.add_argument("--model", default="gpt-image-1", help="Image model id.")
ap.add_argument("--size", default="1024x1024", help="Image size (e.g. 1024x1024, 1536x1024).")
ap.add_argument("--quality", default="high", help="Image quality (varies by model).")
ap.add_argument("--out-dir", default="", help="Output directory (default: ./tmp/openai-image-gen-<ts>).")
args = ap.parse_args()
api_key = (os.environ.get("OPENAI_API_KEY") or "").strip()
if not api_key:
print("Missing OPENAI_API_KEY", file=sys.stderr)
return 2
out_dir = Path(args.out_dir).expanduser() if args.out_dir else default_out_dir()
out_dir.mkdir(parents=True, exist_ok=True)
prompts = [args.prompt] * args.count if args.prompt else pick_prompts(args.count)
items: list[dict] = []
for idx, prompt in enumerate(prompts, start=1):
print(f"[{idx}/{len(prompts)}] {prompt}")
res = request_images(api_key, prompt, args.model, args.size, args.quality)
b64 = res.get("data", [{}])[0].get("b64_json")
if not b64:
raise RuntimeError(f"Unexpected response: {json.dumps(res)[:400]}")
png = base64.b64decode(b64)
filename = f"{idx:03d}-{slugify(prompt)[:40]}.png"
(out_dir / filename).write_bytes(png)
items.append({"prompt": prompt, "file": filename})
(out_dir / "prompts.json").write_text(json.dumps(items, indent=2), encoding="utf-8")
write_gallery(out_dir, items)
print(f"\nWrote: {(out_dir / 'index.html').as_posix()}")
return 0
if __name__ == "__main__":
raise SystemExit(main())

View File

@@ -0,0 +1,42 @@
---
name: openai-whisper-api
description: Transcribe audio via OpenAI Audio Transcriptions API (Whisper).
metadata: {"clawdis":{"requires":{"bins":["curl"],"env":["OPENAI_API_KEY"]},"primaryEnv":"OPENAI_API_KEY"}}
---
# OpenAI Whisper API (curl)
Transcribe an audio file via OpenAIs `/v1/audio/transcriptions` endpoint.
## Quick start
```bash
{baseDir}/scripts/transcribe.sh /path/to/audio.m4a
```
Defaults:
- Model: `whisper-1`
- Output: `<input>.txt`
## Useful flags
```bash
{baseDir}/scripts/transcribe.sh /path/to/audio.ogg --model whisper-1 --out /tmp/transcript.txt
{baseDir}/scripts/transcribe.sh /path/to/audio.m4a --language en
{baseDir}/scripts/transcribe.sh /path/to/audio.m4a --prompt "Speaker names: Peter, Daniel"
{baseDir}/scripts/transcribe.sh /path/to/audio.m4a --json --out /tmp/transcript.json
```
## API key
Set `OPENAI_API_KEY`, or configure it in `~/.clawdis/clawdis.json`:
```json5
{
skills: {
"openai-whisper-api": {
apiKey: "OPENAI_KEY_HERE"
}
}
}
```

View File

@@ -0,0 +1,85 @@
#!/usr/bin/env bash
set -euo pipefail
usage() {
cat >&2 <<'EOF'
Usage:
transcribe.sh <audio-file> [--model whisper-1] [--out /path/to/out.txt] [--language en] [--prompt "hint"] [--json]
EOF
exit 2
}
if [[ "${1:-}" == "" || "${1:-}" == "-h" || "${1:-}" == "--help" ]]; then
usage
fi
in="${1:-}"
shift || true
model="whisper-1"
out=""
language=""
prompt=""
response_format="text"
while [[ $# -gt 0 ]]; do
case "$1" in
--model)
model="${2:-}"
shift 2
;;
--out)
out="${2:-}"
shift 2
;;
--language)
language="${2:-}"
shift 2
;;
--prompt)
prompt="${2:-}"
shift 2
;;
--json)
response_format="json"
shift 1
;;
*)
echo "Unknown arg: $1" >&2
usage
;;
esac
done
if [[ ! -f "$in" ]]; then
echo "File not found: $in" >&2
exit 1
fi
if [[ "${OPENAI_API_KEY:-}" == "" ]]; then
echo "Missing OPENAI_API_KEY" >&2
exit 1
fi
if [[ "$out" == "" ]]; then
base="${in%.*}"
if [[ "$response_format" == "json" ]]; then
out="${base}.json"
else
out="${base}.txt"
fi
fi
mkdir -p "$(dirname "$out")"
curl -sS https://api.openai.com/v1/audio/transcriptions \
-H "Authorization: Bearer $OPENAI_API_KEY" \
-H "Accept: application/json" \
-F "file=@${in}" \
-F "model=${model}" \
-F "response_format=${response_format}" \
${language:+-F "language=${language}"} \
${prompt:+-F "prompt=${prompt}"} \
>"$out"
echo "$out"

View File

@@ -0,0 +1,28 @@
---
name: video-frames
description: Extract frames or short clips from videos using ffmpeg.
metadata: {"clawdis":{"requires":{"bins":["ffmpeg"]},"install":[{"id":"brew","kind":"brew","formula":"ffmpeg","bins":["ffmpeg"],"label":"Install ffmpeg (brew)"}]}}
---
# Video Frames (ffmpeg)
Extract a single frame from a video, or create quick thumbnails for inspection.
## Quick start
First frame:
```bash
{baseDir}/scripts/frame.sh /path/to/video.mp4 --out /tmp/frame.jpg
```
At a timestamp:
```bash
{baseDir}/scripts/frame.sh /path/to/video.mp4 --time 00:00:10 --out /tmp/frame-10s.jpg
```
## Notes
- Prefer `--time` for “what is happening around here?”.
- Use a `.jpg` for quick share; use `.png` for crisp UI frames.

View File

@@ -0,0 +1,81 @@
#!/usr/bin/env bash
set -euo pipefail
usage() {
cat >&2 <<'EOF'
Usage:
frame.sh <video-file> [--time HH:MM:SS] [--index N] --out /path/to/frame.jpg
Examples:
frame.sh video.mp4 --out /tmp/frame.jpg
frame.sh video.mp4 --time 00:00:10 --out /tmp/frame-10s.jpg
frame.sh video.mp4 --index 0 --out /tmp/frame0.png
EOF
exit 2
}
if [[ "${1:-}" == "" || "${1:-}" == "-h" || "${1:-}" == "--help" ]]; then
usage
fi
in="${1:-}"
shift || true
time=""
index=""
out=""
while [[ $# -gt 0 ]]; do
case "$1" in
--time)
time="${2:-}"
shift 2
;;
--index)
index="${2:-}"
shift 2
;;
--out)
out="${2:-}"
shift 2
;;
*)
echo "Unknown arg: $1" >&2
usage
;;
esac
done
if [[ ! -f "$in" ]]; then
echo "File not found: $in" >&2
exit 1
fi
if [[ "$out" == "" ]]; then
echo "Missing --out" >&2
usage
fi
mkdir -p "$(dirname "$out")"
if [[ "$index" != "" ]]; then
ffmpeg -hide_banner -loglevel error -y \
-i "$in" \
-vf "select=eq(n\\,${index})" \
-vframes 1 \
"$out"
elif [[ "$time" != "" ]]; then
ffmpeg -hide_banner -loglevel error -y \
-ss "$time" \
-i "$in" \
-frames:v 1 \
"$out"
else
ffmpeg -hide_banner -loglevel error -y \
-i "$in" \
-vf "select=eq(n\\,0)" \
-vframes 1 \
"$out"
fi
echo "$out"