From 129a36dbc939021038ccce46d22704114eae734e Mon Sep 17 00:00:00 2001 From: Azure SDK Bot <53356347+azure-sdk@users.noreply.github.com> Date: Thu, 7 Sep 2023 14:12:30 -0700 Subject: [PATCH] Sync eng/common directory with azure-sdk-tools for PR 6903 (#4926) * fixes * ongoing * ongoing * add top-level throw/catches showing exception info * fix handling of cases when there is 1 link and when there is no RetryAfter.Delta * handle lack of Exception.Headers property * handle gracefully obtaining status code from $_.Exception.InnerException.ErrorCode --------- Co-authored-by: Konrad Jamrozik --- eng/common/scripts/Verify-Links.ps1 | 162 ++++++++++++++++++---------- 1 file changed, 107 insertions(+), 55 deletions(-) diff --git a/eng/common/scripts/Verify-Links.ps1 b/eng/common/scripts/Verify-Links.ps1 index 4bfc21243..a77f57133 100644 --- a/eng/common/scripts/Verify-Links.ps1 +++ b/eng/common/scripts/Verify-Links.ps1 @@ -12,13 +12,14 @@ Specifies the file that contains a set of links to ignore when verifying. .PARAMETER devOpsLogging - Switch that will enable devops specific logging for warnings + Switch that will enable devops specific logging for warnings. .PARAMETER recursive - Check the links recurisvely based on recursivePattern. + Check the links recurisvely. Applies to links starting with 'baseUrl' parameter. Defaults to true. .PARAMETER baseUrl Recursively check links for all links verified that begin with this baseUrl, defaults to the folder the url is contained in. + If 'recursive' parameter is set to false, this parameter has no effect. .PARAMETER rootUrl Path to the root of the site for resolving rooted relative links, defaults to host root for http and file directory for local files. @@ -74,6 +75,8 @@ param ( [string] $requestTimeoutSec = 15 ) +Set-StrictMode -Version 3.0 + $ProgressPreference = "SilentlyContinue"; # Disable invoke-webrequest progress dialog # Regex of the locale keywords. $locale = "/en-us/" @@ -184,11 +187,15 @@ function ParseLinks([string]$baseUri, [string]$htmlContent) #$hrefs | Foreach-Object { Write-Host $_ } Write-Verbose "Found $($hrefs.Count) raw href's in page $baseUri"; - $links = $hrefs | ForEach-Object { ResolveUri $baseUri $_.Groups["href"].Value } + [string[]] $links = $hrefs | ForEach-Object { ResolveUri $baseUri $_.Groups["href"].Value } #$links | Foreach-Object { Write-Host $_ } - return $links + if ($null -eq $links) { + $links = @() + } + + return ,$links } function CheckLink ([System.Uri]$linkUri, $allowRetry=$true) @@ -239,11 +246,27 @@ function CheckLink ([System.Uri]$linkUri, $allowRetry=$true) } } catch { - $statusCode = $_.Exception.Response.StatusCode.value__ + + $responsePresent = $_.Exception.psobject.Properties.name -contains "Response" + if ($responsePresent) { + $statusCode = $_.Exception.Response.StatusCode.value__ + } else { + $statusCode = $null + } - if(!$statusCode) { + if (!$statusCode) { # Try to pull the error code from any inner SocketException we might hit - $statusCode = $_.Exception.InnerException.ErrorCode + + $innerExceptionPresent = $_.Exception.psobject.Properties.name -contains "InnerException" + + $errorCodePresent = $false + if ($innerExceptionPresent) { + $errorCodePresent = $_.Exception.InnerException.psobject.Properties.name -contains "ErrorCode" + } + + if ($errorCodePresent) { + $statusCode = $_.Exception.InnerException.ErrorCode + } } if ($statusCode -in $errorStatusCodes) { @@ -257,13 +280,30 @@ function CheckLink ([System.Uri]$linkUri, $allowRetry=$true) $linkValid = $false } else { + if ($null -ne $statusCode) { + # For 429 rate-limiting try to pause if possible - if ($allowRetry -and $_.Exception.Response -and $statusCode -eq 429) { - $retryAfter = $_.Exception.Response.Headers.RetryAfter.Delta.TotalSeconds + if ($allowRetry -and $responsePresent -and $statusCode -eq 429) { + + $headersPresent = $_.Exception.psobject.Properties.name -contains "Headers" + + $retryAfterPresent = $false + if ($headersPresent) { + $retryAfterPresent = $_.Exception.Headers.psobject.Properties.name -contains "RetryAfter" + } + + $retryAfterDeltaPresent = $false + if ($retryAfterPresent) { + $retryAfterDeltaPresent = $_.Exception.Headers.RetryAfter.psobject.Properties.name -contains "Delta" + } + + if ($retryAfterDeltaPresent) { + $retryAfter = $_.Exception.Response.Headers.RetryAfter.Delta.TotalSeconds + } # Default retry after 60 (arbitrary) seconds if no header given - if (!$retryAfter -or $retryAfter -gt 60) { $retryAfter = 60 } + if (!$retryAfterDeltaPresent -or $retryAfter -gt 60) { $retryAfter = 60 } Write-Host "Rate-Limited for $retryAfter seconds while requesting $linkUri" Start-Sleep -Seconds $retryAfter @@ -366,9 +406,9 @@ function GetLinks([System.Uri]$pageUri) LogError "Don't know how to process uri $pageUri" } - $links = ParseLinks $pageUri $content + [string[]] $links = ParseLinks $pageUri $content - return $links; + return ,$links; } if ($urls) { @@ -433,59 +473,71 @@ if ($devOpsLogging) { while ($pageUrisToCheck.Count -ne 0) { $pageUri = $pageUrisToCheck.Dequeue(); - if ($checkedPages.ContainsKey($pageUri)) { continue } - $checkedPages[$pageUri] = $true; + Write-Verbose "Processing pageUri $pageUri" + try { + if ($checkedPages.ContainsKey($pageUri)) { continue } + $checkedPages[$pageUri] = $true; - $linkUris = GetLinks $pageUri - Write-Host "Checking $($linkUris.Count) links found on page $pageUri"; - $badLinksPerPage = @(); - foreach ($linkUri in $linkUris) { - $isLinkValid = CheckLink $linkUri - if (!$isLinkValid -and !$badLinksPerPage.Contains($linkUri)) { - if (!$linkUri.ToString().Trim()) { - $linkUri = $emptyLinkMessage + [string[]] $linkUris = GetLinks $pageUri + Write-Host "Checking $($linkUris.Count) links found on page $pageUri"; + $badLinksPerPage = @(); + foreach ($linkUri in $linkUris) { + $isLinkValid = CheckLink $linkUri + if (!$isLinkValid -and !$badLinksPerPage.Contains($linkUri)) { + if (!$linkUri.ToString().Trim()) { + $linkUri = $emptyLinkMessage + } + $badLinksPerPage += $linkUri } - $badLinksPerPage += $linkUri - } - if ($recursive -and $isLinkValid) { - if ($linkUri.ToString().StartsWith($baseUrl) -and !$checkedPages.ContainsKey($linkUri)) { - $pageUrisToCheck.Enqueue($linkUri); + if ($recursive -and $isLinkValid) { + if ($linkUri.ToString().StartsWith($baseUrl) -and !$checkedPages.ContainsKey($linkUri)) { + $pageUrisToCheck.Enqueue($linkUri); + } } } - } - if ($badLinksPerPage.Count -gt 0) { - $badLinks[$pageUri] = $badLinksPerPage - } -} -if ($devOpsLogging) { - Write-Host "##[endgroup]" -} - -if ($badLinks.Count -gt 0) { - Write-Host "Summary of broken links:" -} -foreach ($pageLink in $badLinks.Keys) { - Write-Host "'$pageLink' has $($badLinks[$pageLink].Count) broken link(s):" - foreach ($brokenLink in $badLinks[$pageLink]) { - Write-Host " $brokenLink" + if ($badLinksPerPage.Count -gt 0) { + $badLinks[$pageUri] = $badLinksPerPage + } + } catch { + Write-Host "Exception encountered while processing pageUri $pageUri : $($_.Exception)" + throw } } -$linksChecked = $checkedLinks.Count - $cachedLinksCount +try { + if ($devOpsLogging) { + Write-Host "##[endgroup]" + } -if ($badLinks.Count -gt 0) { - Write-Host "Checked $linksChecked links with $($badLinks.Count) broken link(s) found." -} -else { - Write-Host "Checked $linksChecked links. No broken links found." -} + if ($badLinks.Count -gt 0) { + Write-Host "Summary of broken links:" + } + foreach ($pageLink in $badLinks.Keys) { + Write-Host "'$pageLink' has $($badLinks[$pageLink].Count) broken link(s):" + foreach ($brokenLink in $badLinks[$pageLink]) { + Write-Host " $brokenLink" + } + } -if ($outputCacheFile) -{ - $goodLinks = $checkedLinks.Keys.Where({ "True" -eq $checkedLinks[$_].ToString() }) | Sort-Object + $linksChecked = $checkedLinks.Count - $cachedLinksCount - Write-Host "Writing the list of validated links to $outputCacheFile" - $goodLinks | Set-Content $outputCacheFile + if ($badLinks.Count -gt 0) { + Write-Host "Checked $linksChecked links with $($badLinks.Count) broken link(s) found." + } + else { + Write-Host "Checked $linksChecked links. No broken links found." + } + + if ($outputCacheFile) + { + $goodLinks = $checkedLinks.Keys.Where({ "True" -eq $checkedLinks[$_].ToString() }) | Sort-Object + + Write-Host "Writing the list of validated links to $outputCacheFile" + $goodLinks | Set-Content $outputCacheFile + } +} catch { + Write-Host "Exception encountered after all pageUris have been processed : $($_.Exception)" + throw } exit $badLinks.Count