From 369cfa02da5b3e2b3c99ab33e09553c63d00ef71 Mon Sep 17 00:00:00 2001 From: Silvan Mosberger Date: Fri, 4 Oct 2024 01:49:44 +0200 Subject: [PATCH] ci: Add review request scripts Also post a comment in case base branch is wrong This guides newcomers in how to smoothly handle the potentially scary situation of having thousands of commits listed in a PR. While CI shows the same, people might not even look at CI if the PR looks botched. --- ci/default.nix | 1 + ci/request-reviews/default.nix | 43 ++++++++++ ci/request-reviews/dev-branches.txt | 7 ++ ci/request-reviews/get-reviewers.sh | 87 +++++++++++++++++++ ci/request-reviews/request-reviews.sh | 97 +++++++++++++++++++++ ci/request-reviews/verify-base-branch.sh | 103 +++++++++++++++++++++++ 6 files changed, 338 insertions(+) create mode 100644 ci/request-reviews/default.nix create mode 100644 ci/request-reviews/dev-branches.txt create mode 100755 ci/request-reviews/get-reviewers.sh create mode 100755 ci/request-reviews/request-reviews.sh create mode 100755 ci/request-reviews/verify-base-branch.sh diff --git a/ci/default.nix b/ci/default.nix index fb5ca04fe728..8a4341048f67 100644 --- a/ci/default.nix +++ b/ci/default.nix @@ -24,4 +24,5 @@ let in { inherit pkgs; + requestReviews = pkgs.callPackage ./request-reviews { }; } diff --git a/ci/request-reviews/default.nix b/ci/request-reviews/default.nix new file mode 100644 index 000000000000..b51d896539d8 --- /dev/null +++ b/ci/request-reviews/default.nix @@ -0,0 +1,43 @@ +{ + lib, + stdenvNoCC, + makeWrapper, + coreutils, + codeowners, + jq, + curl, + github-cli, + gitMinimal, +}: +stdenvNoCC.mkDerivation { + name = "request-reviews"; + src = lib.fileset.toSource { + root = ./.; + fileset = lib.fileset.unions [ + ./get-reviewers.sh + ./request-reviews.sh + ./verify-base-branch.sh + ./dev-branches.txt + ]; + }; + nativeBuildInputs = [ makeWrapper ]; + dontBuild = true; + installPhase = '' + mkdir -p $out/bin + mv dev-branches.txt $out/bin + for bin in *.sh; do + mv "$bin" "$out/bin" + wrapProgram "$out/bin/$bin" \ + --set PATH ${ + lib.makeBinPath [ + coreutils + codeowners + jq + curl + github-cli + gitMinimal + ] + } + done + ''; +} diff --git a/ci/request-reviews/dev-branches.txt b/ci/request-reviews/dev-branches.txt new file mode 100644 index 000000000000..2282529881ba --- /dev/null +++ b/ci/request-reviews/dev-branches.txt @@ -0,0 +1,7 @@ +# Trusted development branches: +# These generally require PRs to update and are built by Hydra. +master +staging +release-* +staging-* +haskell-updates diff --git a/ci/request-reviews/get-reviewers.sh b/ci/request-reviews/get-reviewers.sh new file mode 100755 index 000000000000..d5cd9e0a4fd4 --- /dev/null +++ b/ci/request-reviews/get-reviewers.sh @@ -0,0 +1,87 @@ +#!/usr/bin/env bash + +# Get the code owners of the files changed by a PR, +# suitable to be consumed by the API endpoint to request reviews: +# https://docs.github.com/en/rest/pulls/review-requests?apiVersion=2022-11-28#request-reviewers-for-a-pull-request + +set -euo pipefail + +log() { + echo "$@" >&2 +} + +if (( "$#" < 5 )); then + log "Usage: $0 GIT_REPO BASE_REF HEAD_REF OWNERS_FILE PR_AUTHOR" + exit 1 +fi + +gitRepo=$1 +baseRef=$2 +headRef=$3 +ownersFile=$4 +prAuthor=$5 + +tmp=$(mktemp -d) +trap 'rm -rf "$tmp"' exit + +git -C "$gitRepo" diff --name-only --merge-base "$baseRef" "$headRef" > "$tmp/touched-files" +readarray -t touchedFiles < "$tmp/touched-files" +log "This PR touches ${#touchedFiles[@]} files" + +# Get the owners file from the base, because we don't want to allow PRs to +# remove code owners to avoid pinging them +git -C "$gitRepo" show "$baseRef":"$ownersFile" > "$tmp"/codeowners + +# Associative arrays with the team/user as the key for easy deduplication +declare -A teams users + +for file in "${touchedFiles[@]}"; do + result=$(codeowners --file "$tmp"/codeowners "$file") + + read -r file owners <<< "$result" + if [[ "$owners" == "(unowned)" ]]; then + log "File $file is unowned" + continue + fi + log "File $file is owned by $owners" + + # Split up multiple owners, separated by arbitrary amounts of spaces + IFS=" " read -r -a entries <<< "$owners" + + for entry in "${entries[@]}"; do + # GitHub technically also supports Emails as code owners, + # but we can't easily support that, so let's not + if [[ ! "$entry" =~ @(.*) ]]; then + warn -e "\e[33mCodeowner \"$entry\" for file $file is not valid: Must start with \"@\"\e[0m" >&2 + # Don't fail, because the PR for which this script runs can't fix it, + # it has to be fixed in the base branch + continue + fi + # The first regex match is everything after the @ + entry=${BASH_REMATCH[1]} + if [[ "$entry" =~ .*/(.*) ]]; then + # Teams look like $org/$team, where we only need $team for the API + # call to request reviews from teams + teams[${BASH_REMATCH[1]}]= + else + # Everything else is a user + users[$entry]= + fi + done + +done + +# Cannot request a review from the author +if [[ -v users[$prAuthor] ]]; then + log "One or more files are owned by the PR author, ignoring" + unset 'users[$prAuthor]' +fi + +# Turn it into a JSON for the GitHub API call to request PR reviewers +jq -n \ + --arg users "${!users[*]}" \ + --arg teams "${!teams[*]}" \ + '{ + reviewers: $users | split(" "), + team_reviewers: $teams | split(" ") + }' diff --git a/ci/request-reviews/request-reviews.sh b/ci/request-reviews/request-reviews.sh new file mode 100755 index 000000000000..d62ab309bcc8 --- /dev/null +++ b/ci/request-reviews/request-reviews.sh @@ -0,0 +1,97 @@ +#!/usr/bin/env bash + +# Requests reviews for a PR after verifying that the base branch is correct + +set -euo pipefail +tmp=$(mktemp -d) +trap 'rm -rf "$tmp"' exit +SCRIPT_DIR=$(dirname "$0") + +log() { + echo "$@" >&2 +} + +effect() { + if [[ -n "${DRY_MODE:-}" ]]; then + log "Skipping in dry mode:" "${@@Q}" + else + "$@" + fi +} + +if (( $# < 3 )); then + log "Usage: $0 GITHUB_REPO PR_NUMBER OWNERS_FILE" + exit 1 +fi +baseRepo=$1 +prNumber=$2 +ownersFile=$3 + +log "Fetching PR info" +prInfo=$(gh api \ + -H "Accept: application/vnd.github+json" \ + -H "X-GitHub-Api-Version: 2022-11-28" \ + "/repos/$baseRepo/pulls/$prNumber") + +baseBranch=$(jq -r .base.ref <<< "$prInfo") +log "Base branch: $baseBranch" +prRepo=$(jq -r .head.repo.full_name <<< "$prInfo") +log "PR repo: $prRepo" +prBranch=$(jq -r .head.ref <<< "$prInfo") +log "PR branch: $prBranch" +prAuthor=$(jq -r .user.login <<< "$prInfo") +log "PR author: $prAuthor" + +extraArgs=() +if pwdRepo=$(git rev-parse --show-toplevel 2>/dev/null); then + # Speedup for local runs + extraArgs+=(--reference-if-able "$pwdRepo") +fi + +log "Fetching Nixpkgs commit history" +# We only need the commit history, not the contents, so we can do a tree-less clone using tree:0 +# https://github.blog/open-source/git/get-up-to-speed-with-partial-clone-and-shallow-clone/#user-content-quick-summary +git clone --bare --filter=tree:0 --no-tags --origin upstream "${extraArgs[@]}" https://github.com/"$baseRepo".git "$tmp"/nixpkgs.git + +log "Fetching the PR commit history" +# Fetch the PR +git -C "$tmp/nixpkgs.git" remote add fork https://github.com/"$prRepo".git +# This remote config is the same as --filter=tree:0 when cloning +git -C "$tmp/nixpkgs.git" config remote.fork.partialclonefilter tree:0 +git -C "$tmp/nixpkgs.git" config remote.fork.promisor true + +# This should not conflict with any refs in Nixpkgs +headRef=refs/remotes/fork/pr +# Only fetch into a remote ref, because the local ref namespace is used by Nixpkgs, don't want any conflicts +git -C "$tmp/nixpkgs.git" fetch --no-tags fork "$prBranch":"$headRef" + +log "Checking correctness of the base branch" +if ! "$SCRIPT_DIR"/verify-base-branch.sh "$tmp/nixpkgs.git" "$headRef" "$baseRepo" "$baseBranch" "$prRepo" "$prBranch" | tee "$tmp/invalid-base-error" >&2; then + log "Posting error as comment" + if ! response=$(effect gh api \ + --method POST \ + -H "Accept: application/vnd.github+json" \ + -H "X-GitHub-Api-Version: 2022-11-28" \ + "/repos/$baseRepo/issues/$prNumber/comments" \ + -F "body=@$tmp/invalid-base-error"); then + log "Failed to post the comment: $response" + fi + exit 1 +fi + +log "Getting code owners to request reviews from" +"$SCRIPT_DIR"/get-reviewers.sh "$tmp/nixpkgs.git" "$baseBranch" "$headRef" "$ownersFile" "$prAuthor" > "$tmp/reviewers.json" + +log "Requesting reviews from: $(<"$tmp/reviewers.json")" + +if ! response=$(effect gh api \ + --method POST \ + -H "Accept: application/vnd.github+json" \ + -H "X-GitHub-Api-Version: 2022-11-28" \ + "/repos/$baseRepo/pulls/$prNumber/requested_reviewers" \ + --input "$tmp/reviewers.json"); then + log "Failed to request reviews: $response" + exit 1 +fi + +log "Successfully requested reviews" diff --git a/ci/request-reviews/verify-base-branch.sh b/ci/request-reviews/verify-base-branch.sh new file mode 100755 index 000000000000..17d721c5ddd1 --- /dev/null +++ b/ci/request-reviews/verify-base-branch.sh @@ -0,0 +1,103 @@ +#!/usr/bin/env bash + +# Check that a PR doesn't include commits from other development branches. +# Fails with next steps if it does + +set -euo pipefail +tmp=$(mktemp -d) +trap 'rm -rf "$tmp"' exit +SCRIPT_DIR=$(dirname "$0") + +log() { + echo "$@" >&2 +} + +# Small helper to check whether an element is in a list +# Usage: `elementIn foo "${list[@]}"` +elementIn() { + local e match=$1 + shift + for e; do + if [[ "$e" == "$match" ]]; then + return 0 + fi + done + return 1 +} + +if (( $# < 6 )); then + log "Usage: $0 LOCAL_REPO HEAD_REF BASE_REPO BASE_BRANCH PR_REPO PR_BRANCH" + exit 1 +fi +localRepo=$1 +headRef=$2 +baseRepo=$3 +baseBranch=$4 +prRepo=$5 +prBranch=$6 + +# All development branches +devBranchPatterns=() +while read -r pattern; do + if [[ "$pattern" != '#'* ]]; then + devBranchPatterns+=("$pattern") + fi +done < "$SCRIPT_DIR/dev-branches.txt" + +git -C "$localRepo" branch --list --format "%(refname:short)" "${devBranchPatterns[@]}" > "$tmp/dev-branches" +readarray -t devBranches < "$tmp/dev-branches" + +if [[ "$baseRepo" == "$prRepo" ]] && elementIn "$prBranch" "${devBranches[@]}"; then + log "This PR merges $prBranch into $baseBranch, no commit check necessary" + exit 0 +fi + +# The current merge base of the PR +prMergeBase=$(git -C "$localRepo" merge-base "$baseBranch" "$headRef") +log "The PR's merge base with the base branch $baseBranch is $prMergeBase" + +# This is purely for debugging +git -C "$localRepo" rev-list --reverse "$baseBranch".."$headRef" > "$tmp/pr-commits" +log "The PR includes these $(wc -l < "$tmp/pr-commits") commits:" +cat <"$tmp/pr-commits" >&2 + +for testBranch in "${devBranches[@]}"; do + + if [[ -z "$(git -C "$localRepo" rev-list -1 --since="1 month ago" "$testBranch")" ]]; then + log "Not checking $testBranch, was inactive for the last month" + continue + fi + log "Checking if commits from $testBranch are included in the PR" + + # We need to check for any commits that are in the PR which are also in the test branch. + # We could check each commit from the PR individually, but that's unnecessarily slow. + # + # This does _almost_ what we want: `git rev-list --count headRef testBranch ^baseBranch`, + # except that it includes commits that are reachable from _either_ headRef or testBranch, + # instead of restricting it to ones reachable by both + + # Easily fixable though, because we can use `git merge-base testBranch headRef` + # to get the least common ancestor (aka merge base) commit reachable by both. + # If the branch being tested is indeed the right base branch, + # this is then also the commit from that branch that the PR is based on top of. + testMergeBase=$(git -C "$localRepo" merge-base "$testBranch" "$headRef") + + # And then use the `git rev-list --count`, but replacing the non-working + # `headRef testBranch` with the merge base of the two. + extraCommits=$(git -C "$localRepo" rev-list --count "$testMergeBase" ^"$baseBranch") + + if (( extraCommits != 0 )); then + log -e "\e[33m" + echo "The PR's base branch is set to $baseBranch, but $extraCommits commits from the $testBranch branch are included. Make sure you know the [right base branch for your changes](https://github.com/NixOS/nixpkgs/blob/master/CONTRIBUTING.md#branch-conventions), then:" + echo "- If the changes should go to the $testBranch branch, [change the base branch](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/proposing-changes-to-your-work-with-pull-requests/changing-the-base-branch-of-a-pull-request) to $testBranch" + echo "- If the changes should go to the $baseBranch branch, rebase your PR onto the merge base with the $testBranch branch:" + echo " \`\`\`" + echo " git rebase --onto $prMergeBase $testMergeBase" + echo " git push --force-with-lease" + echo " \`\`\`" + log -e "\e[m" + exit 1 + fi +done + +log "Base branch is correct, no commits from development branches are included"