148 lines
6.3 KiB
Python
148 lines
6.3 KiB
Python
|
import shutil
|
||
|
from pathlib import Path
|
||
|
from typing import Tuple
|
||
|
|
||
|
from wasabi import msg
|
||
|
|
||
|
from .commands import run_command
|
||
|
from .filesystem import is_subpath_of, make_tempdir
|
||
|
|
||
|
|
||
|
def git_checkout(
|
||
|
repo: str, subpath: str, dest: Path, *, branch: str = "master", sparse: bool = False
|
||
|
):
|
||
|
git_version = get_git_version()
|
||
|
if dest.exists():
|
||
|
msg.fail("Destination of checkout must not exist", exits=1)
|
||
|
if not dest.parent.exists():
|
||
|
msg.fail("Parent of destination of checkout must exist", exits=1)
|
||
|
if sparse and git_version >= (2, 22):
|
||
|
return git_sparse_checkout(repo, subpath, dest, branch)
|
||
|
elif sparse:
|
||
|
# Only show warnings if the user explicitly wants sparse checkout but
|
||
|
# the Git version doesn't support it
|
||
|
err_old = (
|
||
|
f"You're running an old version of Git (v{git_version[0]}.{git_version[1]}) "
|
||
|
f"that doesn't fully support sparse checkout yet."
|
||
|
)
|
||
|
err_unk = "You're running an unknown version of Git, so sparse checkout has been disabled."
|
||
|
msg.warn(
|
||
|
f"{err_unk if git_version == (0, 0) else err_old} "
|
||
|
f"This means that more files than necessary may be downloaded "
|
||
|
f"temporarily. To only download the files needed, make sure "
|
||
|
f"you're using Git v2.22 or above."
|
||
|
)
|
||
|
with make_tempdir() as tmp_dir:
|
||
|
cmd = f"git -C {tmp_dir} clone {repo} . -b {branch}"
|
||
|
run_command(cmd, capture=True)
|
||
|
# We need Path(name) to make sure we also support subdirectories
|
||
|
try:
|
||
|
source_path = tmp_dir / Path(subpath)
|
||
|
if not is_subpath_of(tmp_dir, source_path):
|
||
|
err = f"'{subpath}' is a path outside of the cloned repository."
|
||
|
msg.fail(err, repo, exits=1)
|
||
|
shutil.copytree(str(source_path), str(dest))
|
||
|
except FileNotFoundError:
|
||
|
err = f"Can't clone {subpath}. Make sure the directory exists in the repo (branch '{branch}')"
|
||
|
msg.fail(err, repo, exits=1)
|
||
|
|
||
|
|
||
|
def git_sparse_checkout(repo, subpath, dest, branch):
|
||
|
# We're using Git, partial clone and sparse checkout to
|
||
|
# only clone the files we need
|
||
|
# This ends up being RIDICULOUS. omg.
|
||
|
# So, every tutorial and SO post talks about 'sparse checkout'...But they
|
||
|
# go and *clone* the whole repo. Worthless. And cloning part of a repo
|
||
|
# turns out to be completely broken. The only way to specify a "path" is..
|
||
|
# a path *on the server*? The contents of which, specifies the paths. Wat.
|
||
|
# Obviously this is hopelessly broken and insecure, because you can query
|
||
|
# arbitrary paths on the server! So nobody enables this.
|
||
|
# What we have to do is disable *all* files. We could then just checkout
|
||
|
# the path, and it'd "work", but be hopelessly slow...Because it goes and
|
||
|
# transfers every missing object one-by-one. So the final piece is that we
|
||
|
# need to use some weird git internals to fetch the missings in bulk, and
|
||
|
# *that* we can do by path.
|
||
|
# We're using Git and sparse checkout to only clone the files we need
|
||
|
with make_tempdir() as tmp_dir:
|
||
|
# This is the "clone, but don't download anything" part.
|
||
|
cmd = (
|
||
|
f"git clone {repo} {tmp_dir} --no-checkout --depth 1 "
|
||
|
f"-b {branch} --filter=blob:none"
|
||
|
)
|
||
|
run_command(cmd)
|
||
|
# Now we need to find the missing filenames for the subpath we want.
|
||
|
# Looking for this 'rev-list' command in the git --help? Hah.
|
||
|
cmd = f"git -C {tmp_dir} rev-list --objects --all --missing=print -- {subpath}"
|
||
|
ret = run_command(cmd, capture=True)
|
||
|
git_repo = _http_to_git(repo)
|
||
|
# Now pass those missings into another bit of git internals
|
||
|
missings = " ".join([x[1:] for x in ret.stdout.split() if x.startswith("?")])
|
||
|
if not missings:
|
||
|
err = (
|
||
|
f"Could not find any relevant files for '{subpath}'. "
|
||
|
f"Did you specify a correct and complete path within repo '{repo}' "
|
||
|
f"and branch {branch}?"
|
||
|
)
|
||
|
msg.fail(err, exits=1)
|
||
|
cmd = f"git -C {tmp_dir} fetch-pack {git_repo} {missings}"
|
||
|
run_command(cmd, capture=True)
|
||
|
# And finally, we can checkout our subpath
|
||
|
cmd = f"git -C {tmp_dir} checkout {branch} {subpath}"
|
||
|
run_command(cmd, capture=True)
|
||
|
|
||
|
# Get a subdirectory of the cloned path, if appropriate
|
||
|
source_path = tmp_dir / Path(subpath)
|
||
|
if not is_subpath_of(tmp_dir, source_path):
|
||
|
err = f"'{subpath}' is a path outside of the cloned repository."
|
||
|
msg.fail(err, repo, exits=1)
|
||
|
|
||
|
shutil.move(str(source_path), str(dest))
|
||
|
|
||
|
|
||
|
def git_repo_branch_exists(repo: str, branch: str) -> bool:
|
||
|
"""Uses 'git ls-remote' to check if a repository and branch exists
|
||
|
|
||
|
repo (str): URL to get repo.
|
||
|
branch (str): Branch on repo to check.
|
||
|
RETURNS (bool): True if repo:branch exists.
|
||
|
"""
|
||
|
get_git_version()
|
||
|
cmd = f"git ls-remote {repo} {branch}"
|
||
|
# We might be tempted to use `--exit-code` with `git ls-remote`, but
|
||
|
# `run_command` handles the `returncode` for us, so we'll rely on
|
||
|
# the fact that stdout returns '' if the requested branch doesn't exist
|
||
|
ret = run_command(cmd, capture=True)
|
||
|
exists = ret.stdout != ""
|
||
|
return exists
|
||
|
|
||
|
|
||
|
def get_git_version(
|
||
|
error: str = "Could not run 'git'. Make sure it's installed and the executable is available.",
|
||
|
) -> Tuple[int, int]:
|
||
|
"""Get the version of git and raise an error if calling 'git --version' fails.
|
||
|
|
||
|
error (str): The error message to show.
|
||
|
RETURNS (Tuple[int, int]): The version as a (major, minor) tuple. Returns
|
||
|
(0, 0) if the version couldn't be determined.
|
||
|
"""
|
||
|
try:
|
||
|
ret = run_command("git --version", capture=True)
|
||
|
except Exception:
|
||
|
raise RuntimeError(error)
|
||
|
stdout = ret.stdout.strip()
|
||
|
if not stdout or not stdout.startswith("git version"):
|
||
|
return 0, 0
|
||
|
version = stdout[11:].strip().split(".")
|
||
|
return int(version[0]), int(version[1])
|
||
|
|
||
|
|
||
|
def _http_to_git(repo: str) -> str:
|
||
|
if repo.startswith("http://"):
|
||
|
repo = repo.replace(r"http://", r"https://")
|
||
|
if repo.startswith(r"https://"):
|
||
|
repo = repo.replace("https://", "git@").replace("/", ":", 1)
|
||
|
if repo.endswith("/"):
|
||
|
repo = repo[:-1]
|
||
|
repo = f"{repo}.git"
|
||
|
return repo
|