#! /usr/bin/env bash set -e -o pipefail url= rev= expHash= hashType=$NIX_HASH_ALGO deepClone=$NIX_PREFETCH_GIT_DEEP_CLONE leaveDotGit=$NIX_PREFETCH_GIT_LEAVE_DOT_GIT fetchSubmodules= builder= branchName=$NIX_PREFETCH_GIT_BRANCH_NAME # ENV params out=${out:-} http_proxy=${http_proxy:-} # populated by clone_user_rev() fullRev= humanReadableRev= commitDate= commitDateStrict8601= if test -n "$deepClone"; then deepClone=true else deepClone= fi if test "$leaveDotGit" != 1; then leaveDotGit= else leaveDotGit=true fi usage(){ echo >&2 "syntax: nix-prefetch-git [options] [URL [REVISION [EXPECTED-HASH]]] Options: --out path Path where the output would be stored. --url url Any url understood by 'git clone'. --rev ref Any sha1 or references (such as refs/heads/master) --hash h Expected hash. --branch-name Branch name to check out into --deepClone Clone the entire repository. --no-deepClone Make a shallow clone of just the required ref. --leave-dotGit Keep the .git directories. --fetch-submodules Fetch submodules. --builder Clone as fetchgit does, but url, rev, and out option are mandatory. --quiet Only print the final json summary. " exit 1 } # some git commands print to stdout, which would contaminate our JSON output clean_git(){ git "$@" >&2 } argi=0 argfun="" for arg; do if test -z "$argfun"; then case $arg in --out) argfun=set_out;; --url) argfun=set_url;; --rev) argfun=set_rev;; --hash) argfun=set_hashType;; --branch-name) argfun=set_branchName;; --deepClone) deepClone=true;; --quiet) QUIET=true;; --no-deepClone) deepClone=;; --leave-dotGit) leaveDotGit=true;; --fetch-submodules) fetchSubmodules=true;; --builder) builder=true;; -h|--help) usage; exit;; *) : $((++argi)) case $argi in 1) url=$arg;; 2) rev=$arg;; 3) expHash=$arg;; *) exit 1;; esac ;; esac else case $argfun in set_*) var=${argfun#set_} eval $var=$arg ;; esac argfun="" fi done if test -z "$url"; then usage fi init_remote(){ local url=$1 clean_git init clean_git remote add origin "$url" ( [ -n "$http_proxy" ] && clean_git config http.proxy "$http_proxy" ) || true } # Return the reference of an hash if it exists on the remote repository. ref_from_hash(){ local hash=$1 git ls-remote origin | sed -n "\,$hash\t, { s,\(.*\)\t\(.*\),\2,; p; q}" } # Return the hash of a reference if it exists on the remote repository. hash_from_ref(){ local ref=$1 git ls-remote origin | sed -n "\,\t$ref, { s,\(.*\)\t\(.*\),\1,; p; q}" } # Returns a name based on the url and reference # # This function needs to be in sync with nix's fetchgit implementation # of urlToName() to re-use the same nix store paths. url_to_name(){ local url=$1 local ref=$2 local base base=$(basename "$url" .git | cut -d: -f2) if [[ $ref =~ ^[a-z0-9]+$ ]]; then echo "$base-${ref:0:7}" else echo "$base" fi } # Fetch everything and checkout the right sha1 checkout_hash(){ local hash="$1" local ref="$2" if test -z "$hash"; then hash=$(hash_from_ref "$ref") fi clean_git fetch -t ${builder:+--progress} origin || return 1 local object_type=$(git cat-file -t "$hash") if [[ "$object_type" == "commit" ]]; then clean_git checkout -b "$branchName" "$hash" || return 1 elif [[ "$object_type" == "tree" ]]; then clean_git config user.email "nix-prefetch-git@localhost" clean_git config user.name "nix-prefetch-git" commit_id=$(git commit-tree "$hash" -m "Commit created from tree hash $hash") clean_git checkout -b "$branchName" "$commit_id" || return 1 else echo "Unrecognized git object type: $object_type" return 1 fi } # Fetch only a branch/tag and checkout it. checkout_ref(){ local hash="$1" local ref="$2" if [[ -n "$deepClone" ]]; then # The caller explicitly asked for a deep clone. Deep clones # allow "git describe" and similar tools to work. See # https://marc.info/?l=nix-dev&m=139641582514772 # for a discussion. return 1 fi if test -z "$ref"; then ref=$(ref_from_hash "$hash") fi if test -n "$ref"; then # --depth option is ignored on http repository. clean_git fetch ${builder:+--progress} --depth 1 origin +"$ref" || return 1 clean_git checkout -b "$branchName" FETCH_HEAD || return 1 else return 1 fi } # Update submodules init_submodules(){ # Add urls into .git/config file clean_git submodule init # list submodule directories and their hashes git submodule status | while read -r l; do local hash local dir local name local url # checkout each submodule hash=$(echo "$l" | awk '{print $1}' | tr -d '-') dir=$(echo "$l" | sed -n 's/^.[0-9a-f]\+ \(.*[^)]*\)\( (.*)\)\?$/\1/p') name=$( git config -f .gitmodules --get-regexp submodule\..*\.path | sed -n "s,^\(.*\)\.path $dir\$,\\1,p") url=$(git config --get "${name}.url") clone "$dir" "$url" "$hash" "" done } clone(){ local top=$PWD local dir="$1" local url="$2" local hash="$3" local ref="$4" cd "$dir" # Initialize the repository. init_remote "$url" # Download data from the repository. checkout_ref "$hash" "$ref" || checkout_hash "$hash" "$ref" || ( echo 1>&2 "Unable to checkout $hash$ref from $url." exit 1 ) # Checkout linked sources. if test -n "$fetchSubmodules"; then init_submodules fi if [ -z "$builder" ] && [ -f .topdeps ]; then if tg help &>/dev/null; then echo "populating TopGit branches..." tg remote --populate origin else echo "WARNING: would populate TopGit branches but TopGit is not available" >&2 echo "WARNING: install TopGit to fix the problem" >&2 fi fi cd "$top" } # Remove all remote branches, remove tags not reachable from HEAD, do a full # repack and then garbage collect unreferenced objects. make_deterministic_repo(){ local repo="$1" # run in sub-shell to not touch current working directory ( cd "$repo" # Remove files that contain timestamps or otherwise have non-deterministic # properties. rm -rf .git/logs/ .git/hooks/ .git/index .git/FETCH_HEAD .git/ORIG_HEAD \ .git/refs/remotes/origin/HEAD .git/config # Remove all remote branches. git branch -r | while read -r branch; do clean_git branch -rD "$branch" done # Remove tags not reachable from HEAD. If we're exactly on a tag, don't # delete it. maybe_tag=$(git tag --points-at HEAD) git tag --contains HEAD | while read -r tag; do if [ "$tag" != "$maybe_tag" ]; then clean_git tag -d "$tag" fi done # Do a full repack. Must run single-threaded, or else we lose determinism. clean_git config pack.threads 1 clean_git repack -A -d -f rm -f .git/config # Garbage collect unreferenced objects. # Note: --keep-largest-pack prevents non-deterministic ordering of packs # listed in .git/objects/info/packs by only using a single pack clean_git gc --prune=all --keep-largest-pack ) } clone_user_rev() { local dir="$1" local url="$2" local rev="${3:-HEAD}" # Perform the checkout. case "$rev" in HEAD|refs/*) clone "$dir" "$url" "" "$rev" 1>&2;; *) if test -z "$(echo "$rev" | tr -d 0123456789abcdef)"; then clone "$dir" "$url" "$rev" "" 1>&2 else # if revision is not hexadecimal it might be a tag clone "$dir" "$url" "" "refs/tags/$rev" 1>&2 fi;; esac pushd "$dir" >/dev/null fullRev=$( (git rev-parse "$rev" 2>/dev/null || git rev-parse "refs/heads/$branchName") | tail -n1) humanReadableRev=$(git describe "$fullRev" 2> /dev/null || git describe --tags "$fullRev" 2> /dev/null || echo -- none --) commitDate=$(git show -1 --no-patch --pretty=%ci "$fullRev") commitDateStrict8601=$(git show -1 --no-patch --pretty=%cI "$fullRev") popd >/dev/null # Allow doing additional processing before .git removal eval "$NIX_PREFETCH_GIT_CHECKOUT_HOOK" if test -z "$leaveDotGit"; then echo "removing \`.git'..." >&2 find "$dir" -name .git -print0 | xargs -0 rm -rf else find "$dir" -name .git | while read -r gitdir; do make_deterministic_repo "$(readlink -f "$gitdir/..")" done fi } exit_handlers=() run_exit_handlers() { exit_status=$? for handler in "${exit_handlers[@]}"; do eval "$handler $exit_status" done } trap run_exit_handlers EXIT quiet_exit_handler() { exec 2>&3 3>&- if [ $1 -ne 0 ]; then cat "$errfile" >&2 fi rm -f "$errfile" } quiet_mode() { errfile="$(mktemp "${TMPDIR:-/tmp}/git-checkout-err-XXXXXXXX")" exit_handlers+=(quiet_exit_handler) exec 3>&2 2>"$errfile" } json_escape() { local s="$1" s="${s//\\/\\\\}" # \ s="${s//\"/\\\"}" # " s="${s//^H/\\\b}" # \b (backspace) s="${s//^L/\\\f}" # \f (form feed) s="${s// /\\\n}" # \n (newline) s="${s//^M/\\\r}" # \r (carriage return) s="${s// /\\t}" # \t (tab) echo "$s" } print_results() { hash="$1" if ! test -n "$QUIET"; then echo "" >&2 echo "git revision is $fullRev" >&2 if test -n "$finalPath"; then echo "path is $finalPath" >&2 fi echo "git human-readable version is $humanReadableRev" >&2 echo "Commit date is $commitDate" >&2 if test -n "$hash"; then echo "hash is $hash" >&2 fi fi if test -n "$hash"; then cat < /dev/null; then finalPath= fi hash=$expHash fi # If we don't know the hash or a path with that hash doesn't exist, # download the file and add it to the store. if test -z "$finalPath"; then tmpPath="$(mktemp -d "${TMPDIR:-/tmp}/git-checkout-tmp-XXXXXXXX")" exit_handlers+=(remove_tmpPath) tmpFile="$tmpPath/$(url_to_name "$url" "$rev")" mkdir -p "$tmpFile" # Perform the checkout. clone_user_rev "$tmpFile" "$url" "$rev" # Compute the hash. hash=$(nix-hash --type $hashType --base32 "$tmpFile") # Add the downloaded file to the Nix store. finalPath=$(nix-store --add-fixed --recursive "$hashType" "$tmpFile") if test -n "$expHash" -a "$expHash" != "$hash"; then echo "hash mismatch for URL \`$url'. Got \`$hash'; expected \`$expHash'." >&2 exit 1 fi fi print_results "$hash" if test -n "$PRINT_PATH"; then echo "$finalPath" fi fi