271 lines
9.5 KiB
Python
271 lines
9.5 KiB
Python
# This module contains Git utilities, used by our [`load_git`][griffe.load_git] function,
|
|
# which in turn is used to load the API for different snapshots of a Git repository
|
|
# and find breaking changes between them.
|
|
|
|
from __future__ import annotations
|
|
|
|
import os
|
|
import re
|
|
import shutil
|
|
import subprocess
|
|
import unicodedata
|
|
from contextlib import contextmanager
|
|
from dataclasses import dataclass
|
|
from pathlib import Path
|
|
from tempfile import TemporaryDirectory
|
|
from typing import TYPE_CHECKING, Literal
|
|
from urllib.parse import urlsplit, urlunsplit
|
|
|
|
from griffe._internal.exceptions import BuiltinModuleError, GitError
|
|
|
|
if TYPE_CHECKING:
|
|
from collections.abc import Iterator
|
|
|
|
from griffe._internal.models import Module
|
|
|
|
|
|
_WORKTREE_PREFIX = "griffe-worktree-"
|
|
|
|
|
|
def _normalize(value: str) -> str:
|
|
value = unicodedata.normalize("NFKC", value)
|
|
value = re.sub(r"[^\w]+", "-", value)
|
|
return re.sub(r"[-\s]+", "-", value).strip("-")
|
|
|
|
|
|
def _git(*args: str, check: bool = True) -> str:
|
|
process = subprocess.run(
|
|
["git", *args],
|
|
check=False,
|
|
text=True,
|
|
stdout=subprocess.PIPE,
|
|
stderr=subprocess.STDOUT,
|
|
encoding="utf8",
|
|
)
|
|
if check and process.returncode != 0:
|
|
raise GitError(process.stdout.strip())
|
|
return process.stdout.strip()
|
|
|
|
|
|
def _assert_git_repo(path: str | Path) -> None:
|
|
"""Deprecated. Assert that a directory is a Git repository.
|
|
|
|
Parameters:
|
|
path: Path to a directory.
|
|
|
|
Raises:
|
|
OSError: When the directory is not a Git repository.
|
|
"""
|
|
if not shutil.which("git"):
|
|
raise RuntimeError("Could not find git executable. Please install git.")
|
|
try:
|
|
_git("-C", str(path), "rev-parse", "--is-inside-work-tree")
|
|
except GitError as error:
|
|
raise OSError(f"Not a git repository: {path}") from error
|
|
|
|
|
|
def _get_latest_tag(repo: str | Path) -> str:
|
|
"""Deprecated. Get latest tag of a Git repository.
|
|
|
|
Parameters:
|
|
repo: The path to Git repository.
|
|
|
|
Returns:
|
|
The latest tag.
|
|
"""
|
|
if isinstance(repo, str):
|
|
repo = Path(repo)
|
|
if not repo.is_dir():
|
|
repo = repo.parent
|
|
try:
|
|
output = _git("tag", "-l", "--sort=-creatordate")
|
|
except GitError as error:
|
|
raise GitError(f"Cannot list Git tags in {repo}: {error or 'no tags'}") from error
|
|
return output.split("\n", 1)[0]
|
|
|
|
|
|
def _get_repo_root(repo: str | Path) -> Path:
|
|
"""Deprecated. Get the root of a Git repository.
|
|
|
|
Parameters:
|
|
repo: The path to a Git repository.
|
|
|
|
Returns:
|
|
The root of the repository.
|
|
"""
|
|
if isinstance(repo, str):
|
|
repo = Path(repo)
|
|
if not repo.is_dir():
|
|
repo = repo.parent
|
|
return Path(_git("-C", str(repo), "rev-parse", "--show-toplevel"))
|
|
|
|
|
|
@contextmanager
|
|
def _tmp_worktree(repo: str | Path = ".", ref: str = "HEAD") -> Iterator[Path]:
|
|
"""Deprecated. Context manager that checks out the given reference in the given repository to a temporary worktree.
|
|
|
|
Parameters:
|
|
repo: Path to the repository (i.e. the directory *containing* the `.git` directory)
|
|
ref: A Git reference such as a commit, tag or branch.
|
|
|
|
Yields:
|
|
The path to the temporary worktree.
|
|
|
|
Raises:
|
|
OSError: If `repo` is not a valid `.git` repository
|
|
RuntimeError: If the `git` executable is unavailable, or if it cannot create a worktree
|
|
"""
|
|
_assert_git_repo(repo)
|
|
repo_name = Path(repo).resolve().name
|
|
normref = _normalize(ref) # Branch names can contain slashes.
|
|
with TemporaryDirectory(prefix=f"{_WORKTREE_PREFIX}{repo_name}-{normref}-") as tmp_dir:
|
|
location = os.path.join(tmp_dir, normref) # noqa: PTH118
|
|
tmp_branch = f"griffe-{normref}" # Temporary branch name must not already exist.
|
|
try:
|
|
_git("-C", str(repo), "worktree", "add", "-b", tmp_branch, location, ref)
|
|
except GitError as error:
|
|
raise RuntimeError(f"Could not create git worktree: {error}") from error
|
|
|
|
try:
|
|
yield Path(location)
|
|
finally:
|
|
_git("-C", str(repo), "worktree", "remove", location, check=False)
|
|
_git("-C", str(repo), "worktree", "prune", check=False)
|
|
_git("-C", str(repo), "branch", "-D", tmp_branch, check=False)
|
|
|
|
|
|
def _get_git_remote_url(repo: str | Path = ".") -> str:
|
|
if git_url := os.getenv("GRIFFE_GIT_REMOTE_URL"):
|
|
return git_url
|
|
|
|
remote = "remote." + os.getenv("GRIFFE_GIT_REMOTE", "origin") + ".url"
|
|
git_url = _git("-C", str(repo), "config", "--default", "", "--get", remote)
|
|
if git_url.startswith("git@"):
|
|
git_url = git_url.replace(":", "/", 1).replace("git@", "https://", 1)
|
|
git_url = git_url.removesuffix(".git")
|
|
|
|
# Remove credentials from the URL.
|
|
if git_url.startswith(("http://", "https://")):
|
|
# (addressing scheme, network location, path, query, fragment identifier)
|
|
urlparts = list(urlsplit(git_url))
|
|
urlparts[1] = urlparts[1].split("@", 1)[-1]
|
|
git_url = urlunsplit(urlparts)
|
|
|
|
return git_url
|
|
|
|
|
|
KnownGitService = Literal["github", "gitlab", "sourcehut", "gitea", "gogs", "forgejo", "codeberg", "radicle"]
|
|
"""Known Git hosting services."""
|
|
|
|
_service_re = re.compile(rf"({'|'.join(KnownGitService.__args__)})") # type: ignore[attr-defined]
|
|
|
|
|
|
def _get_git_known_service(git_remote_url: str) -> KnownGitService | None:
|
|
if service := os.getenv("GRIFFE_GIT_SERVICE"):
|
|
if service not in KnownGitService.__args__: # type: ignore[attr-defined]
|
|
return None
|
|
return service # type: ignore[return-value]
|
|
if match := _service_re.search(urlsplit(git_remote_url).netloc):
|
|
return match.group(1) # type: ignore[return-value]
|
|
return None
|
|
|
|
|
|
# For Radicle we use https://app.radicle.at/nodes/seed.radicle.at which I believe seeds everything?
|
|
# Line ranges do not seem to be supported.
|
|
# The rad remote is declared as such in .git/config:
|
|
#
|
|
# ```ini
|
|
# [remote "rad"]
|
|
# url = rad://z4M5XTPDD4Wh1sm8iPCenF85J3z8Z
|
|
# ```
|
|
_RADICLE_URL = "https://app.radicle.at/nodes/seed.radicle.at"
|
|
|
|
|
|
def _get_radicle_url(url_or_rid: str, commit_hash: str, filepath: str, lineno: int, endlineno: int) -> str: # noqa: ARG001
|
|
# This lets users override the full URL with `GRIFFE_GIT_REMOTE_URL=https://.../rad:...`.
|
|
url = f"{_RADICLE_URL}/{url_or_rid.replace('//', '')}" if url_or_rid.startswith("rad://") else url_or_rid
|
|
return f"{url}/tree/{commit_hash}/{filepath}#L{lineno}"
|
|
|
|
|
|
_service_to_url = {
|
|
"github": lambda url, ch, fp, ln, eln: f"{url}/blob/{ch}/{fp}#L{ln}-L{eln}",
|
|
"gitlab": lambda url, ch, fp, ln, eln: f"{url}/-/blob/{ch}/{fp}#L{ln}-L{eln}",
|
|
# SourceHut does not seem to support line ranges.
|
|
"sourcehut": lambda url, ch, fp, ln, eln: f"{url}/tree/{ch}/{fp}#L{ln}",
|
|
# Cannot find a demo Gogs instance so not sure about this URL template.
|
|
"gogs": lambda url, ch, fp, ln, eln: f"{url}/blob/{ch}/{fp}#L{ln}-L{eln}",
|
|
"gitea": lambda url, ch, fp, ln, eln: f"{url}/src/commit/{ch}/{fp}#L{ln}-L{eln}",
|
|
"codeberg": lambda url, ch, fp, ln, eln: f"{url}/src/commit/{ch}/{fp}#L{ln}-L{eln}",
|
|
"forgejo": lambda url, ch, fp, ln, eln: f"{url}/src/commit/{ch}/{fp}#L{ln}-L{eln}",
|
|
"radicle": _get_radicle_url,
|
|
}
|
|
|
|
|
|
def _get_source_link(
|
|
service: KnownGitService,
|
|
remote_url: str,
|
|
commit_hash: str,
|
|
filepath: str | Path,
|
|
lineno: int,
|
|
endlineno: int,
|
|
) -> str | None:
|
|
if isinstance(filepath, Path):
|
|
filepath = filepath.as_posix()
|
|
return _service_to_url[service](remote_url, commit_hash, filepath, lineno, endlineno)
|
|
|
|
|
|
def _get_git_commit_hash(repo: str | Path = ".") -> str:
|
|
if commit_hash := os.getenv("GRIFFE_GIT_COMMIT_HASH"):
|
|
return commit_hash
|
|
return _git("-C", str(repo), "rev-parse", "HEAD")
|
|
|
|
|
|
def _is_tracked(filepath: str | Path, repo: str | Path = ".") -> bool:
|
|
return not _git("-C", str(repo), "check-ignore", str(filepath), check=False)
|
|
|
|
|
|
@dataclass
|
|
class GitInfo:
|
|
"""Information about a Git repository."""
|
|
|
|
repository: Path
|
|
"""The path to the Git repository."""
|
|
service: KnownGitService
|
|
"""The Git hosting service (used to build the right URLs)."""
|
|
remote_url: str
|
|
"""The remote URL of the Git repository."""
|
|
commit_hash: str
|
|
"""A commit hash (usually the current checked-out one)."""
|
|
|
|
@classmethod
|
|
def from_package(cls, package: Module) -> GitInfo | None:
|
|
"""Create a GitInfo instance from a Griffe package.
|
|
|
|
Returns:
|
|
The GitInfo instance, or None if unknown.
|
|
"""
|
|
try:
|
|
path = package.filepath[0] if isinstance(package.filepath, list) else package.filepath
|
|
except BuiltinModuleError:
|
|
return None
|
|
try:
|
|
repo = _get_repo_root(path)
|
|
if not _is_tracked(path.relative_to(repo), repo):
|
|
return None
|
|
remote_url = _get_git_remote_url(repo)
|
|
if not (service := _get_git_known_service(remote_url)):
|
|
return None
|
|
commit_hash = _get_git_commit_hash(repo)
|
|
except (GitError, ValueError, OSError):
|
|
# `ValueError` can happen if `path` is not relative to `repo`.
|
|
# `OSError` is caught just to be safe.
|
|
return None
|
|
return cls(repository=repo, service=service, remote_url=remote_url, commit_hash=commit_hash)
|
|
|
|
def get_source_link(self, filepath: str | Path, lineno: int, endlineno: int) -> str | None:
|
|
"""Get the source link for the file at the given line numbers.
|
|
|
|
Returns:
|
|
The source link, or None if unknown.
|
|
"""
|
|
return _get_source_link(self.service, self.remote_url, self.commit_hash, filepath, lineno, endlineno)
|