From 140aead96d36fe548bc16317ec71845929477b0b Mon Sep 17 00:00:00 2001 From: Azure SDK Bot <53356347+azure-sdk@users.noreply.github.com> Date: Mon, 12 May 2025 12:33:18 -0700 Subject: [PATCH] Sync eng/common directory with azure-sdk-tools for PR 10593 (#6565) * Improve link checking for github links Use locally cloned repos to check github links to avoid rate-limiting. * Add default clone path to the link checker template --------- Co-authored-by: Wes Haggard --- .../pipelines/templates/steps/verify-links.yml | 1 + eng/common/scripts/Verify-Links.ps1 | 12 +++++++++++- 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/eng/common/pipelines/templates/steps/verify-links.yml b/eng/common/pipelines/templates/steps/verify-links.yml index 4d84e124a..83394040c 100644 --- a/eng/common/pipelines/templates/steps/verify-links.yml +++ b/eng/common/pipelines/templates/steps/verify-links.yml @@ -30,4 +30,5 @@ steps: -branchReplacementName ${{ parameters.BranchReplacementName }} -devOpsLogging: $true -checkLinkGuidance: ${{ parameters.CheckLinkGuidance }} + -localGithubClonedRoot "$(Pipeline.Workspace)" -inputCacheFile "https://azuresdkartifacts.blob.core.windows.net/verify-links-cache/verify-links-cache.txt" diff --git a/eng/common/scripts/Verify-Links.ps1 b/eng/common/scripts/Verify-Links.ps1 index 003e83f0e..4a25d6bdc 100644 --- a/eng/common/scripts/Verify-Links.ps1 +++ b/eng/common/scripts/Verify-Links.ps1 @@ -72,6 +72,7 @@ param ( [string] $userAgent, [string] $inputCacheFile, [string] $outputCacheFile, + [string] $localGithubClonedRoot = "", [string] $requestTimeoutSec = 15 ) @@ -80,6 +81,14 @@ Set-StrictMode -Version 3.0 $ProgressPreference = "SilentlyContinue"; # Disable invoke-webrequest progress dialog function ProcessLink([System.Uri]$linkUri) { + # To help improve performance and rate limiting issues with github links we try to resolve them based on a local clone if one exists. + if ($localGithubClonedRoot -and $linkUri -match '^https://github.com/Azure/(?[^/]+)/(?:blob|tree)/(main|.*_[^/]+|.*/v[^/]+)/(?.*)$') { + $localPath = Join-Path $localGithubClonedRoot $matches['repo'] $matches['path'] + if (Test-Path $localPath) { + return $true + } + return ProcessStandardLink $linkUri + } if ($linkUri -match '^https?://?github\.com/(?)[^/]+/(?)[^/]+/wiki/.+') { # in an unauthenticated session, urls for missing pages will redirect to the wiki root return ProcessRedirectLink $linkUri -invalidStatusCodes 302 @@ -507,6 +516,7 @@ if ($inputCacheFile) $goodLinks = $cacheContent.Split("`n").Where({ $_.Trim() -ne "" -and !$_.StartsWith("#") }) foreach ($goodLink in $goodLinks) { + $goodLink = $goodLink.Trim() $checkedLinks[$goodLink] = $true } } @@ -587,7 +597,7 @@ try { if ($outputCacheFile) { - $goodLinks = $checkedLinks.Keys.Where({ "True" -eq $checkedLinks[$_].ToString() }) | Sort-Object + $goodLinks = $checkedLinks.Keys.Where({ "True" -eq $checkedLinks[$_].ToString()}) | Sort-Object -Unique Write-Host "Writing the list of validated links to $outputCacheFile" $goodLinks | Set-Content $outputCacheFile