Sync eng/common directory with azure-sdk-tools for PR 6903 (#4926)
* fixes * ongoing * ongoing * add top-level throw/catches showing exception info * fix handling of cases when there is 1 link and when there is no RetryAfter.Delta * handle lack of Exception.Headers property * handle gracefully obtaining status code from $_.Exception.InnerException.ErrorCode --------- Co-authored-by: Konrad Jamrozik <kojamroz@microsoft.com>
This commit is contained in:
parent
2f7728794d
commit
129a36dbc9
@ -12,13 +12,14 @@
|
||||
Specifies the file that contains a set of links to ignore when verifying.
|
||||
|
||||
.PARAMETER devOpsLogging
|
||||
Switch that will enable devops specific logging for warnings
|
||||
Switch that will enable devops specific logging for warnings.
|
||||
|
||||
.PARAMETER recursive
|
||||
Check the links recurisvely based on recursivePattern.
|
||||
Check the links recurisvely. Applies to links starting with 'baseUrl' parameter. Defaults to true.
|
||||
|
||||
.PARAMETER baseUrl
|
||||
Recursively check links for all links verified that begin with this baseUrl, defaults to the folder the url is contained in.
|
||||
If 'recursive' parameter is set to false, this parameter has no effect.
|
||||
|
||||
.PARAMETER rootUrl
|
||||
Path to the root of the site for resolving rooted relative links, defaults to host root for http and file directory for local files.
|
||||
@ -74,6 +75,8 @@ param (
|
||||
[string] $requestTimeoutSec = 15
|
||||
)
|
||||
|
||||
Set-StrictMode -Version 3.0
|
||||
|
||||
$ProgressPreference = "SilentlyContinue"; # Disable invoke-webrequest progress dialog
|
||||
# Regex of the locale keywords.
|
||||
$locale = "/en-us/"
|
||||
@ -184,11 +187,15 @@ function ParseLinks([string]$baseUri, [string]$htmlContent)
|
||||
#$hrefs | Foreach-Object { Write-Host $_ }
|
||||
|
||||
Write-Verbose "Found $($hrefs.Count) raw href's in page $baseUri";
|
||||
$links = $hrefs | ForEach-Object { ResolveUri $baseUri $_.Groups["href"].Value }
|
||||
[string[]] $links = $hrefs | ForEach-Object { ResolveUri $baseUri $_.Groups["href"].Value }
|
||||
|
||||
#$links | Foreach-Object { Write-Host $_ }
|
||||
|
||||
return $links
|
||||
if ($null -eq $links) {
|
||||
$links = @()
|
||||
}
|
||||
|
||||
return ,$links
|
||||
}
|
||||
|
||||
function CheckLink ([System.Uri]$linkUri, $allowRetry=$true)
|
||||
@ -239,11 +246,27 @@ function CheckLink ([System.Uri]$linkUri, $allowRetry=$true)
|
||||
}
|
||||
}
|
||||
catch {
|
||||
$statusCode = $_.Exception.Response.StatusCode.value__
|
||||
|
||||
$responsePresent = $_.Exception.psobject.Properties.name -contains "Response"
|
||||
if ($responsePresent) {
|
||||
$statusCode = $_.Exception.Response.StatusCode.value__
|
||||
} else {
|
||||
$statusCode = $null
|
||||
}
|
||||
|
||||
if(!$statusCode) {
|
||||
if (!$statusCode) {
|
||||
# Try to pull the error code from any inner SocketException we might hit
|
||||
$statusCode = $_.Exception.InnerException.ErrorCode
|
||||
|
||||
$innerExceptionPresent = $_.Exception.psobject.Properties.name -contains "InnerException"
|
||||
|
||||
$errorCodePresent = $false
|
||||
if ($innerExceptionPresent) {
|
||||
$errorCodePresent = $_.Exception.InnerException.psobject.Properties.name -contains "ErrorCode"
|
||||
}
|
||||
|
||||
if ($errorCodePresent) {
|
||||
$statusCode = $_.Exception.InnerException.ErrorCode
|
||||
}
|
||||
}
|
||||
|
||||
if ($statusCode -in $errorStatusCodes) {
|
||||
@ -257,13 +280,30 @@ function CheckLink ([System.Uri]$linkUri, $allowRetry=$true)
|
||||
$linkValid = $false
|
||||
}
|
||||
else {
|
||||
|
||||
if ($null -ne $statusCode) {
|
||||
|
||||
# For 429 rate-limiting try to pause if possible
|
||||
if ($allowRetry -and $_.Exception.Response -and $statusCode -eq 429) {
|
||||
$retryAfter = $_.Exception.Response.Headers.RetryAfter.Delta.TotalSeconds
|
||||
if ($allowRetry -and $responsePresent -and $statusCode -eq 429) {
|
||||
|
||||
$headersPresent = $_.Exception.psobject.Properties.name -contains "Headers"
|
||||
|
||||
$retryAfterPresent = $false
|
||||
if ($headersPresent) {
|
||||
$retryAfterPresent = $_.Exception.Headers.psobject.Properties.name -contains "RetryAfter"
|
||||
}
|
||||
|
||||
$retryAfterDeltaPresent = $false
|
||||
if ($retryAfterPresent) {
|
||||
$retryAfterDeltaPresent = $_.Exception.Headers.RetryAfter.psobject.Properties.name -contains "Delta"
|
||||
}
|
||||
|
||||
if ($retryAfterDeltaPresent) {
|
||||
$retryAfter = $_.Exception.Response.Headers.RetryAfter.Delta.TotalSeconds
|
||||
}
|
||||
|
||||
# Default retry after 60 (arbitrary) seconds if no header given
|
||||
if (!$retryAfter -or $retryAfter -gt 60) { $retryAfter = 60 }
|
||||
if (!$retryAfterDeltaPresent -or $retryAfter -gt 60) { $retryAfter = 60 }
|
||||
Write-Host "Rate-Limited for $retryAfter seconds while requesting $linkUri"
|
||||
|
||||
Start-Sleep -Seconds $retryAfter
|
||||
@ -366,9 +406,9 @@ function GetLinks([System.Uri]$pageUri)
|
||||
LogError "Don't know how to process uri $pageUri"
|
||||
}
|
||||
|
||||
$links = ParseLinks $pageUri $content
|
||||
[string[]] $links = ParseLinks $pageUri $content
|
||||
|
||||
return $links;
|
||||
return ,$links;
|
||||
}
|
||||
|
||||
if ($urls) {
|
||||
@ -433,59 +473,71 @@ if ($devOpsLogging) {
|
||||
while ($pageUrisToCheck.Count -ne 0)
|
||||
{
|
||||
$pageUri = $pageUrisToCheck.Dequeue();
|
||||
if ($checkedPages.ContainsKey($pageUri)) { continue }
|
||||
$checkedPages[$pageUri] = $true;
|
||||
Write-Verbose "Processing pageUri $pageUri"
|
||||
try {
|
||||
if ($checkedPages.ContainsKey($pageUri)) { continue }
|
||||
$checkedPages[$pageUri] = $true;
|
||||
|
||||
$linkUris = GetLinks $pageUri
|
||||
Write-Host "Checking $($linkUris.Count) links found on page $pageUri";
|
||||
$badLinksPerPage = @();
|
||||
foreach ($linkUri in $linkUris) {
|
||||
$isLinkValid = CheckLink $linkUri
|
||||
if (!$isLinkValid -and !$badLinksPerPage.Contains($linkUri)) {
|
||||
if (!$linkUri.ToString().Trim()) {
|
||||
$linkUri = $emptyLinkMessage
|
||||
[string[]] $linkUris = GetLinks $pageUri
|
||||
Write-Host "Checking $($linkUris.Count) links found on page $pageUri";
|
||||
$badLinksPerPage = @();
|
||||
foreach ($linkUri in $linkUris) {
|
||||
$isLinkValid = CheckLink $linkUri
|
||||
if (!$isLinkValid -and !$badLinksPerPage.Contains($linkUri)) {
|
||||
if (!$linkUri.ToString().Trim()) {
|
||||
$linkUri = $emptyLinkMessage
|
||||
}
|
||||
$badLinksPerPage += $linkUri
|
||||
}
|
||||
$badLinksPerPage += $linkUri
|
||||
}
|
||||
if ($recursive -and $isLinkValid) {
|
||||
if ($linkUri.ToString().StartsWith($baseUrl) -and !$checkedPages.ContainsKey($linkUri)) {
|
||||
$pageUrisToCheck.Enqueue($linkUri);
|
||||
if ($recursive -and $isLinkValid) {
|
||||
if ($linkUri.ToString().StartsWith($baseUrl) -and !$checkedPages.ContainsKey($linkUri)) {
|
||||
$pageUrisToCheck.Enqueue($linkUri);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if ($badLinksPerPage.Count -gt 0) {
|
||||
$badLinks[$pageUri] = $badLinksPerPage
|
||||
}
|
||||
}
|
||||
if ($devOpsLogging) {
|
||||
Write-Host "##[endgroup]"
|
||||
}
|
||||
|
||||
if ($badLinks.Count -gt 0) {
|
||||
Write-Host "Summary of broken links:"
|
||||
}
|
||||
foreach ($pageLink in $badLinks.Keys) {
|
||||
Write-Host "'$pageLink' has $($badLinks[$pageLink].Count) broken link(s):"
|
||||
foreach ($brokenLink in $badLinks[$pageLink]) {
|
||||
Write-Host " $brokenLink"
|
||||
if ($badLinksPerPage.Count -gt 0) {
|
||||
$badLinks[$pageUri] = $badLinksPerPage
|
||||
}
|
||||
} catch {
|
||||
Write-Host "Exception encountered while processing pageUri $pageUri : $($_.Exception)"
|
||||
throw
|
||||
}
|
||||
}
|
||||
|
||||
$linksChecked = $checkedLinks.Count - $cachedLinksCount
|
||||
try {
|
||||
if ($devOpsLogging) {
|
||||
Write-Host "##[endgroup]"
|
||||
}
|
||||
|
||||
if ($badLinks.Count -gt 0) {
|
||||
Write-Host "Checked $linksChecked links with $($badLinks.Count) broken link(s) found."
|
||||
}
|
||||
else {
|
||||
Write-Host "Checked $linksChecked links. No broken links found."
|
||||
}
|
||||
if ($badLinks.Count -gt 0) {
|
||||
Write-Host "Summary of broken links:"
|
||||
}
|
||||
foreach ($pageLink in $badLinks.Keys) {
|
||||
Write-Host "'$pageLink' has $($badLinks[$pageLink].Count) broken link(s):"
|
||||
foreach ($brokenLink in $badLinks[$pageLink]) {
|
||||
Write-Host " $brokenLink"
|
||||
}
|
||||
}
|
||||
|
||||
if ($outputCacheFile)
|
||||
{
|
||||
$goodLinks = $checkedLinks.Keys.Where({ "True" -eq $checkedLinks[$_].ToString() }) | Sort-Object
|
||||
$linksChecked = $checkedLinks.Count - $cachedLinksCount
|
||||
|
||||
Write-Host "Writing the list of validated links to $outputCacheFile"
|
||||
$goodLinks | Set-Content $outputCacheFile
|
||||
if ($badLinks.Count -gt 0) {
|
||||
Write-Host "Checked $linksChecked links with $($badLinks.Count) broken link(s) found."
|
||||
}
|
||||
else {
|
||||
Write-Host "Checked $linksChecked links. No broken links found."
|
||||
}
|
||||
|
||||
if ($outputCacheFile)
|
||||
{
|
||||
$goodLinks = $checkedLinks.Keys.Where({ "True" -eq $checkedLinks[$_].ToString() }) | Sort-Object
|
||||
|
||||
Write-Host "Writing the list of validated links to $outputCacheFile"
|
||||
$goodLinks | Set-Content $outputCacheFile
|
||||
}
|
||||
} catch {
|
||||
Write-Host "Exception encountered after all pageUris have been processed : $($_.Exception)"
|
||||
throw
|
||||
}
|
||||
|
||||
exit $badLinks.Count
|
||||
|
||||
Loading…
Reference in New Issue
Block a user