From f3a63d5d54d949829bfdd4fa88cabb1b3071ee7b Mon Sep 17 00:00:00 2001 From: nickviljoen Date: Tue, 10 Mar 2026 11:56:36 +0200 Subject: [PATCH] added system prompt syncing for audit and project documentation - Removed instructions exclusion from export pipeline so system prompts flow through - Added system_prompt field to registration payload for compliance audits - Added tool_resources and actions to metadata - Created README.md and CLAUDE.md for project documentation Co-Authored-By: Claude Opus 4.6 --- CLAUDE.md | 47 ++++++++++++++++++ PLAN-prompt-audit.md | 106 ++++++++++++++++++++++++++++++++++++++++ README.md | 96 ++++++++++++++++++++++++++++++++++++ export_shared_agents.js | 1 - register_agents.py | 4 ++ 5 files changed, 253 insertions(+), 1 deletion(-) create mode 100644 CLAUDE.md create mode 100644 PLAN-prompt-audit.md create mode 100644 README.md diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 0000000..c55b8f5 --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,47 @@ +# Agent Sync - Project Context + +## Overview + +This is the **agent-sync** tool — a lightweight pipeline that exports agent metadata from LibreChat's MongoDB and registers it with the Agent Tracker API. It runs on the `optical-librechat` GCP server as a daily cron job. + +## Architecture + +- **No running service** — these are standalone scripts triggered by cron +- Pipeline: `export_shared_agents.js` (MongoDB export) -> `shared_agents.json` -> `register_agents.py` (API POST) +- Orchestrated by `weekly_agent_sync.sh` + +## Key Files + +- `export_shared_agents.js` — MongoDB aggregation pipeline run via `mongosh` inside the `chat-mongodb` Docker container. Queries `aclentries`, `agents`, `users`, `conversations`, `messages`, and `transactions` collections. +- `register_agents.py` — Python script that maps exported fields to the Agent Tracker `AgentCollectorCreate` schema and POSTs them. Has retry logic, dry-run mode, and result logging. +- `weekly_agent_sync.sh` — Bash wrapper for the full pipeline. Deployed to `/opt/agent-sync/` on the server. + +## Server Details + +- **Server:** `optical-librechat` (GCP) +- **Deploy path:** `/opt/agent-sync/` +- **LibreChat source:** `/home/michael_clervi/LibreChat/` (Docker Compose, owned by michael_clervi) +- **Cron:** `0 0 * * *` (daily midnight, root crontab) +- **MongoDB container:** `chat-mongodb`, database `LibreChat` + +## API Target + +- **Endpoint:** `https://ai-sandbox.oliver.solutions/agent_collector/agents` +- **Auth:** Static API key via `X-API-Key` header +- SSL verification is disabled (internal/dev API) + +## Conventions + +- The JS export script uses MongoDB Extended JSON (relaxed mode) +- The Python script prunes `None`/empty fields before POSTing +- `shared_agents.json` and `registration_results.json` are generated output — not committed +- Python dependencies are minimal: `requests` and `urllib3` only + +## Dependencies + +- Agent Tracker API must accept any new fields added to the payload (Pydantic model: `AgentCollectorCreate`). Unknown fields are silently stripped. +- The `system_prompt` field requires Agent Tracker to be updated to accept it. + +## Deployment + +Files are copied manually to `/opt/agent-sync/` on the server. No restart needed — scripts execute on-demand via cron. diff --git a/PLAN-prompt-audit.md b/PLAN-prompt-audit.md new file mode 100644 index 0000000..16f19d4 --- /dev/null +++ b/PLAN-prompt-audit.md @@ -0,0 +1,106 @@ +# Agent-Sync: Prompt Audit — Implementation Plan + +## Context + +The agent-sync tool currently syncs agent metadata from LibreChat to Agent Tracker, but **explicitly excludes** the `instructions` field (system prompt) from the export. The Agent Tracker team needs these system prompts to perform automated compliance audits using AI analysis. + +This plan adds system prompt syncing through the existing pipeline — a minimal change to two files. + +## What Changes + +The `instructions` field from LibreChat's `agents` collection needs to flow through the existing pipeline: + +``` +LibreChat MongoDB → export_shared_agents.js → shared_agents.json → register_agents.py → POST /agents API + (currently excludes instructions) (doesn't send system_prompt) +``` + +After changes: +``` +LibreChat MongoDB → export_shared_agents.js → shared_agents.json → register_agents.py → POST /agents API + (now INCLUDES instructions) (sends system_prompt field) +``` + +## Files to Modify + +### 1. `export_shared_agents.js` — Remove instructions exclusion (line 201) + +**Current code** (lines 199-206): +```js +{ $project: { + "agentDetails.versions": 0, + "agentDetails.instructions": 0, // ← DELETE THIS LINE + "authorDetails": 0, + "usageTimeline": 0, + "usageSummary": 0, + "tokenUsage": 0 + } +} +``` + +**After change** — just remove line 201: +```js +{ $project: { + "agentDetails.versions": 0, + "authorDetails": 0, + "usageTimeline": 0, + "usageSummary": 0, + "tokenUsage": 0 + } +} +``` + +This means the `instructions` field will now be included in the exported `shared_agents.json`. + +### 2. `register_agents.py` — Add system_prompt to payload (in `build_payload()`) + +**Add to the payload dict** (around line 170, alongside the other optional fields): +```python +# System prompt for audit analysis: +"system_prompt": agent.get("instructions") or None, +``` + +**Also add tool detail to metadata** (in the `metadata` dict, around line 129-140): +```python +"tool_resources": agent.get("tool_resources"), +"actions": agent.get("actions"), +``` + +The final prune at line 184 will automatically remove these if they're None/empty. + +## Verification + +1. Run the export on the LibreChat server: + ```bash + docker exec chat-mongodb mongosh LibreChat --quiet --file /opt/agent-sync/export_shared_agents.js > /opt/agent-sync/shared_agents.json + ``` + +2. Check that instructions appear in the JSON: + ```bash + python3 -c "import json; data=json.load(open('shared_agents.json')); print(any(d.get('agentDetails',{}).get('instructions') for d in data))" + # Should print: True + ``` + +3. Dry-run the registration to confirm `system_prompt` is in the payload: + ```bash + python register_agents.py --input shared_agents.json --dry-run 2>&1 | grep system_prompt + ``` + +4. Once Agent Tracker has been updated to accept the `system_prompt` field in the collector API, run a full sync: + ```bash + ./weekly_agent_sync.sh + ``` + +## Dependencies + +- **Agent Tracker must be updated first** (or simultaneously) to accept the new `system_prompt` field in the `AgentCollectorCreate` model. Without this, the field will be silently ignored by the API (Pydantic strips unknown fields). +- No new Python packages needed. +- No environment variable changes needed. + +## Rollback + +If needed, simply re-add the exclusion line to `export_shared_agents.js`: +```js +"agentDetails.instructions": 0, +``` +The system prompt data already stored in Agent Tracker will remain but won't be updated on future syncs. diff --git a/README.md b/README.md new file mode 100644 index 0000000..f9a3adf --- /dev/null +++ b/README.md @@ -0,0 +1,96 @@ +# Agent Sync + +Syncs agent metadata from LibreChat (MongoDB) to the Agent Tracker API. Runs as a daily cron job on the LibreChat web server (`optical-librechat`). + +## Pipeline + +``` +LibreChat MongoDB --> export_shared_agents.js --> shared_agents.json --> register_agents.py --> Agent Tracker API + (aclentries, (mongosh script) (intermediate) (Python script) (POST /agents) + agents, users, + conversations, + messages, + transactions) +``` + +## Files + +| File | Description | +|------|-------------| +| `export_shared_agents.js` | MongoDB aggregation pipeline that exports shared agents with usage stats, token usage, and system prompts | +| `register_agents.py` | Python script that maps exported agent data to the Agent Tracker API payload and POSTs it | +| `weekly_agent_sync.sh` | Shell wrapper that runs the full export-then-register pipeline | +| `shared_agents.json` | Intermediate export output (generated, not committed) | +| `registration_results.json` | Log of API registration results from the last run | +| `requirements.txt` | Python dependencies (`requests`, `urllib3`) | + +## Server Deployment + +- **Server:** `optical-librechat` (GCP) +- **Install path:** `/opt/agent-sync/` +- **LibreChat install:** `/home/michael_clervi/LibreChat/` (Docker Compose) +- **LibreChat Docker containers:** `LibreChat-API`, `LibreChat-NGINX`, `chat-mongodb` +- **Cron schedule:** Daily at midnight (`0 0 * * *`), runs under root's crontab +- **Logs:** `/opt/agent-sync/log/agent_sync.log` +- **Python venv:** `/opt/agent-sync/venv/` + +## Setup (on the server) + +```bash +cd /opt/agent-sync +python3 -m venv venv +venv/bin/pip install -r requirements.txt +``` + +## Usage + +### Manual run + +```bash +sudo /opt/agent-sync/weekly_agent_sync.sh +``` + +### Dry run (preview payloads without sending) + +```bash +# Export first +docker exec -i chat-mongodb env MONGOSH_NO_RC=1 mongosh --norc --quiet LibreChat --file /dev/stdin < /opt/agent-sync/export_shared_agents.js > /opt/agent-sync/shared_agents.json + +# Preview +/opt/agent-sync/venv/bin/python /opt/agent-sync/register_agents.py --input /opt/agent-sync/shared_agents.json --dry-run +``` + +### Environment variables (optional) + +| Variable | Default | Description | +|----------|---------|-------------| +| `AGENT_REG_URL` | `https://ai-sandbox.oliver.solutions/agent_collector/agents` | Agent Tracker API endpoint | +| `AGENT_REG_KEY` | (built-in static key) | API key for authentication | + +## What gets synced + +For each shared agent (agents used by more than one user): + +- **Core fields:** name, description, purpose, model/provider, author email +- **System prompt:** the agent's `instructions` field (for compliance audit) +- **Usage timeline:** daily message counts per agent +- **Usage summary:** conversation count, unique users, total messages, first/last used dates +- **Token usage:** prompt tokens, completion tokens, total tokens (from LibreChat transactions) +- **Metadata:** tool resources, actions, avatar, category, project IDs +- **Agent URL:** direct link to the agent in LibreChat + +## Updating + +To deploy changes: + +1. Edit files locally and commit +2. Copy the changed files to `/opt/agent-sync/` on the server +3. No restart needed — scripts run on-demand via cron + +## Rollback + +To stop syncing system prompts, re-add this line to the `$project` stage in `export_shared_agents.js`: + +```js +"agentDetails.instructions": 0, +``` diff --git a/export_shared_agents.js b/export_shared_agents.js index 6b6fc0c..4e1ff99 100755 --- a/export_shared_agents.js +++ b/export_shared_agents.js @@ -198,7 +198,6 @@ const pipeline = [ { $project: { "agentDetails.versions": 0, - "agentDetails.instructions": 0, "authorDetails": 0, "usageTimeline": 0, "usageSummary": 0, diff --git a/register_agents.py b/register_agents.py index 5e741bb..d4ab93d 100755 --- a/register_agents.py +++ b/register_agents.py @@ -137,6 +137,8 @@ def build_payload(agent: Dict[str, Any]) -> Dict[str, Any]: "avatar": avatar_path, "author_email": agent.get("author"), "raw_category": agent.get("category"), + "tool_resources": agent.get("tool_resources"), + "actions": agent.get("actions"), } # Prune empty metadata metadata = {k: v for k, v in metadata.items() if v not in (None, "", [], {})} @@ -166,6 +168,8 @@ def build_payload(agent: Dict[str, Any]) -> Dict[str, Any]: "contact_person": contact_person, "tags": tags or None, "metadata": metadata or None, + # System prompt for audit analysis: + "system_prompt": agent.get("instructions") or None, # Agent URL: "url": agent_url, # Usage data: