From bff63e1fbd3735c9b0dcf3aae28076e83ae3d046 Mon Sep 17 00:00:00 2001 From: Azure SDK Bot <53356347+azure-sdk@users.noreply.github.com> Date: Wed, 14 May 2025 10:21:32 -0700 Subject: [PATCH] HtmlDecode hrefs (#6570) Co-authored-by: Patrick Hallisey --- eng/common/scripts/Verify-Links.ps1 | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/eng/common/scripts/Verify-Links.ps1 b/eng/common/scripts/Verify-Links.ps1 index 8162270de..7bba07fe1 100644 --- a/eng/common/scripts/Verify-Links.ps1 +++ b/eng/common/scripts/Verify-Links.ps1 @@ -283,14 +283,14 @@ function ParseLinks([string]$baseUri, [string]$htmlContent) $hrefRegex = "]+href\s*=\s*[""']?(?[^""']*)[""']?" $regexOptions = [System.Text.RegularExpressions.RegexOptions]"Singleline, IgnoreCase"; - $hrefs = [RegEx]::Matches($htmlContent, $hrefRegex, $regexOptions); + $matches = [RegEx]::Matches($htmlContent, $hrefRegex, $regexOptions); - #$hrefs | Foreach-Object { Write-Host $_ } + Write-Verbose "Found $($matches.Count) raw href's in page $baseUri"; - Write-Verbose "Found $($hrefs.Count) raw href's in page $baseUri"; - [string[]] $links = $hrefs | ForEach-Object { ResolveUri $baseUri $_.Groups["href"].Value } + # Html encoded urls in anchor hrefs need to be decoded + $urls = $matches | ForEach-Object { [System.Web.HttpUtility]::HtmlDecode($_.Groups["href"].Value) } - #$links | Foreach-Object { Write-Host $_ } + [string[]] $links = $urls | ForEach-Object { ResolveUri $baseUri $_ } if ($null -eq $links) { $links = @()