Sync eng/common directory with azure-sdk-tools for PR 6903 (#4926)

* fixes

* ongoing

* ongoing

* add top-level throw/catches showing exception info

* fix handling of cases when there is 1 link and when there is no RetryAfter.Delta

* handle lack of Exception.Headers property

* handle gracefully obtaining status code from $_.Exception.InnerException.ErrorCode

---------

Co-authored-by: Konrad Jamrozik <kojamroz@microsoft.com>
This commit is contained in:
Azure SDK Bot 2023-09-07 14:12:30 -07:00 committed by GitHub
parent 2f7728794d
commit 129a36dbc9
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -12,13 +12,14 @@
Specifies the file that contains a set of links to ignore when verifying.
.PARAMETER devOpsLogging
Switch that will enable devops specific logging for warnings
Switch that will enable devops specific logging for warnings.
.PARAMETER recursive
Check the links recurisvely based on recursivePattern.
Check the links recurisvely. Applies to links starting with 'baseUrl' parameter. Defaults to true.
.PARAMETER baseUrl
Recursively check links for all links verified that begin with this baseUrl, defaults to the folder the url is contained in.
If 'recursive' parameter is set to false, this parameter has no effect.
.PARAMETER rootUrl
Path to the root of the site for resolving rooted relative links, defaults to host root for http and file directory for local files.
@ -74,6 +75,8 @@ param (
[string] $requestTimeoutSec = 15
)
Set-StrictMode -Version 3.0
$ProgressPreference = "SilentlyContinue"; # Disable invoke-webrequest progress dialog
# Regex of the locale keywords.
$locale = "/en-us/"
@ -184,11 +187,15 @@ function ParseLinks([string]$baseUri, [string]$htmlContent)
#$hrefs | Foreach-Object { Write-Host $_ }
Write-Verbose "Found $($hrefs.Count) raw href's in page $baseUri";
$links = $hrefs | ForEach-Object { ResolveUri $baseUri $_.Groups["href"].Value }
[string[]] $links = $hrefs | ForEach-Object { ResolveUri $baseUri $_.Groups["href"].Value }
#$links | Foreach-Object { Write-Host $_ }
return $links
if ($null -eq $links) {
$links = @()
}
return ,$links
}
function CheckLink ([System.Uri]$linkUri, $allowRetry=$true)
@ -239,11 +246,27 @@ function CheckLink ([System.Uri]$linkUri, $allowRetry=$true)
}
}
catch {
$statusCode = $_.Exception.Response.StatusCode.value__
$responsePresent = $_.Exception.psobject.Properties.name -contains "Response"
if ($responsePresent) {
$statusCode = $_.Exception.Response.StatusCode.value__
} else {
$statusCode = $null
}
if(!$statusCode) {
if (!$statusCode) {
# Try to pull the error code from any inner SocketException we might hit
$statusCode = $_.Exception.InnerException.ErrorCode
$innerExceptionPresent = $_.Exception.psobject.Properties.name -contains "InnerException"
$errorCodePresent = $false
if ($innerExceptionPresent) {
$errorCodePresent = $_.Exception.InnerException.psobject.Properties.name -contains "ErrorCode"
}
if ($errorCodePresent) {
$statusCode = $_.Exception.InnerException.ErrorCode
}
}
if ($statusCode -in $errorStatusCodes) {
@ -257,13 +280,30 @@ function CheckLink ([System.Uri]$linkUri, $allowRetry=$true)
$linkValid = $false
}
else {
if ($null -ne $statusCode) {
# For 429 rate-limiting try to pause if possible
if ($allowRetry -and $_.Exception.Response -and $statusCode -eq 429) {
$retryAfter = $_.Exception.Response.Headers.RetryAfter.Delta.TotalSeconds
if ($allowRetry -and $responsePresent -and $statusCode -eq 429) {
$headersPresent = $_.Exception.psobject.Properties.name -contains "Headers"
$retryAfterPresent = $false
if ($headersPresent) {
$retryAfterPresent = $_.Exception.Headers.psobject.Properties.name -contains "RetryAfter"
}
$retryAfterDeltaPresent = $false
if ($retryAfterPresent) {
$retryAfterDeltaPresent = $_.Exception.Headers.RetryAfter.psobject.Properties.name -contains "Delta"
}
if ($retryAfterDeltaPresent) {
$retryAfter = $_.Exception.Response.Headers.RetryAfter.Delta.TotalSeconds
}
# Default retry after 60 (arbitrary) seconds if no header given
if (!$retryAfter -or $retryAfter -gt 60) { $retryAfter = 60 }
if (!$retryAfterDeltaPresent -or $retryAfter -gt 60) { $retryAfter = 60 }
Write-Host "Rate-Limited for $retryAfter seconds while requesting $linkUri"
Start-Sleep -Seconds $retryAfter
@ -366,9 +406,9 @@ function GetLinks([System.Uri]$pageUri)
LogError "Don't know how to process uri $pageUri"
}
$links = ParseLinks $pageUri $content
[string[]] $links = ParseLinks $pageUri $content
return $links;
return ,$links;
}
if ($urls) {
@ -433,59 +473,71 @@ if ($devOpsLogging) {
while ($pageUrisToCheck.Count -ne 0)
{
$pageUri = $pageUrisToCheck.Dequeue();
if ($checkedPages.ContainsKey($pageUri)) { continue }
$checkedPages[$pageUri] = $true;
Write-Verbose "Processing pageUri $pageUri"
try {
if ($checkedPages.ContainsKey($pageUri)) { continue }
$checkedPages[$pageUri] = $true;
$linkUris = GetLinks $pageUri
Write-Host "Checking $($linkUris.Count) links found on page $pageUri";
$badLinksPerPage = @();
foreach ($linkUri in $linkUris) {
$isLinkValid = CheckLink $linkUri
if (!$isLinkValid -and !$badLinksPerPage.Contains($linkUri)) {
if (!$linkUri.ToString().Trim()) {
$linkUri = $emptyLinkMessage
[string[]] $linkUris = GetLinks $pageUri
Write-Host "Checking $($linkUris.Count) links found on page $pageUri";
$badLinksPerPage = @();
foreach ($linkUri in $linkUris) {
$isLinkValid = CheckLink $linkUri
if (!$isLinkValid -and !$badLinksPerPage.Contains($linkUri)) {
if (!$linkUri.ToString().Trim()) {
$linkUri = $emptyLinkMessage
}
$badLinksPerPage += $linkUri
}
$badLinksPerPage += $linkUri
}
if ($recursive -and $isLinkValid) {
if ($linkUri.ToString().StartsWith($baseUrl) -and !$checkedPages.ContainsKey($linkUri)) {
$pageUrisToCheck.Enqueue($linkUri);
if ($recursive -and $isLinkValid) {
if ($linkUri.ToString().StartsWith($baseUrl) -and !$checkedPages.ContainsKey($linkUri)) {
$pageUrisToCheck.Enqueue($linkUri);
}
}
}
}
if ($badLinksPerPage.Count -gt 0) {
$badLinks[$pageUri] = $badLinksPerPage
}
}
if ($devOpsLogging) {
Write-Host "##[endgroup]"
}
if ($badLinks.Count -gt 0) {
Write-Host "Summary of broken links:"
}
foreach ($pageLink in $badLinks.Keys) {
Write-Host "'$pageLink' has $($badLinks[$pageLink].Count) broken link(s):"
foreach ($brokenLink in $badLinks[$pageLink]) {
Write-Host " $brokenLink"
if ($badLinksPerPage.Count -gt 0) {
$badLinks[$pageUri] = $badLinksPerPage
}
} catch {
Write-Host "Exception encountered while processing pageUri $pageUri : $($_.Exception)"
throw
}
}
$linksChecked = $checkedLinks.Count - $cachedLinksCount
try {
if ($devOpsLogging) {
Write-Host "##[endgroup]"
}
if ($badLinks.Count -gt 0) {
Write-Host "Checked $linksChecked links with $($badLinks.Count) broken link(s) found."
}
else {
Write-Host "Checked $linksChecked links. No broken links found."
}
if ($badLinks.Count -gt 0) {
Write-Host "Summary of broken links:"
}
foreach ($pageLink in $badLinks.Keys) {
Write-Host "'$pageLink' has $($badLinks[$pageLink].Count) broken link(s):"
foreach ($brokenLink in $badLinks[$pageLink]) {
Write-Host " $brokenLink"
}
}
if ($outputCacheFile)
{
$goodLinks = $checkedLinks.Keys.Where({ "True" -eq $checkedLinks[$_].ToString() }) | Sort-Object
$linksChecked = $checkedLinks.Count - $cachedLinksCount
Write-Host "Writing the list of validated links to $outputCacheFile"
$goodLinks | Set-Content $outputCacheFile
if ($badLinks.Count -gt 0) {
Write-Host "Checked $linksChecked links with $($badLinks.Count) broken link(s) found."
}
else {
Write-Host "Checked $linksChecked links. No broken links found."
}
if ($outputCacheFile)
{
$goodLinks = $checkedLinks.Keys.Where({ "True" -eq $checkedLinks[$_].ToString() }) | Sort-Object
Write-Host "Writing the list of validated links to $outputCacheFile"
$goodLinks | Set-Content $outputCacheFile
}
} catch {
Write-Host "Exception encountered after all pageUris have been processed : $($_.Exception)"
throw
}
exit $badLinks.Count