diff --git a/src/routers/ingest.py b/src/routers/ingest.py index 77711ad..27d357a 100644 --- a/src/routers/ingest.py +++ b/src/routers/ingest.py @@ -42,9 +42,14 @@ async def ingest( slug=s.project_slug, display_name=_slug_to_name(s.project_slug), root_path=body.root_path, + repo_url=s.repo_url, ) db.add(project) await db.flush() + else: + # Update repo_url if we now have one and didn't before + if s.repo_url and not project.repo_url: + project.repo_url = s.repo_url # Upsert session (dedup by user_id + session_id + date) stmt = insert(Session).values( diff --git a/src/schemas.py b/src/schemas.py index d1ff511..d7ae52f 100644 --- a/src/schemas.py +++ b/src/schemas.py @@ -89,6 +89,7 @@ class SessionPayload(BaseModel): commits: list[str] = [] tools_used: dict[str, int] = {} files_changed: list[str] = [] + repo_url: str = "" raw_stats: dict[str, Any] = {} diff --git a/src/static/collector/cc-collector.py b/src/static/collector/cc-collector.py index 5bed872..3d0a5a4 100644 --- a/src/static/collector/cc-collector.py +++ b/src/static/collector/cc-collector.py @@ -234,6 +234,8 @@ def collect() -> list: # Derive repo path from folder name by replacing dashes with slashes after common prefix repo_path = _infer_repo_path(folder_key) + repo_url = _get_repo_url(repo_path) + for sid, data in raw_sessions.items(): if not data["timestamps"]: continue @@ -268,6 +270,7 @@ def collect() -> list: "commits": commits, "tools_used": tools, "files_changed": work["files_changed"], + "repo_url": repo_url, "raw_stats": {}, }) @@ -276,14 +279,18 @@ def collect() -> list: def _infer_slug(folder_name: str) -> str: - """Convert folder name like '-Volumes-SSD-Projects-Oliver-semblance' → 'semblance'.""" - # Remove leading dashes + """Strip root prefix, return full project slug. + '-Volumes-SSD-Projects-Oliver-ford-gechub-sftp' → 'ford-gechub-sftp' + '-Volumes-SSD-Projects-Oliver' (root itself) → 'general' + """ + prefix = _root_prefix(ROOT_PATH).lstrip("-") # e.g. "Volumes-SSD-Projects-Oliver" name = folder_name.lstrip("-") - # Split on dashes, take last meaningful segment - parts = name.split("-") - if len(parts) >= 1: - return parts[-1] or name - return name + if name == prefix: + return "general" + if name.startswith(prefix + "-"): + return name[len(prefix) + 1:] + # fallback: last segment + return name.split("-")[-1] or name def _infer_repo_path(folder_name: str) -> Path | None: @@ -301,6 +308,31 @@ def _infer_repo_path(folder_name: str) -> Path | None: return None +def _get_repo_url(repo_path: Path | None) -> str: + """Return the HTTPS remote URL for the repo at repo_path, or ''.""" + if not repo_path or not (repo_path / ".git").exists(): + return "" + try: + result = subprocess.run( + ["git", "-C", str(repo_path), "remote", "get-url", "origin"], + capture_output=True, text=True, timeout=3, + ) + raw = result.stdout.strip() + if not raw: + return "" + # git@bitbucket.org:zlalani/repo.git → https://bitbucket.org/zlalani/repo + # git@github.com:org/repo.git → https://github.com/org/repo + if raw.startswith("git@"): + raw = re.sub(r"^git@([^:]+):", r"https://\1/", raw) + # Strip basic-auth userinfo: https://user@host/path → https://host/path + raw = re.sub(r"https://[^@/]+@", "https://", raw) + # Strip trailing .git + raw = raw.removesuffix(".git") + return raw + except Exception: + return "" + + # ── Send ────────────────────────────────────────────────────────────────────── def send(sessions: list): if not sessions: