Fix BOM issue Generate-DocIndex.ps1 (#5318)

Co-authored-by: Daniel Jurek <djurek@microsoft.com>
This commit is contained in:
Azure SDK Bot 2024-02-05 15:34:41 -08:00 committed by GitHub
parent 025c06715c
commit 0b01dc6614
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 26 additions and 43 deletions

View File

@ -10,50 +10,33 @@ Param (
) )
. "${PSScriptRoot}\..\scripts\common.ps1" . "${PSScriptRoot}\..\scripts\common.ps1"
# Given the github io blob storage url and language regex, # Fetch a list of "artifacts" from blob storage corresponding to the given
# the helper function will return a list of artifact names. # language (-storagePrefix). Remove the prefix from the path names to arrive at
function Get-BlobStorage-Artifacts($blobStorageUrl, $blobDirectoryRegex, $blobArtifactsReplacement) { # an "artifact" name.
function Get-BlobStorage-Artifacts(
$blobDirectoryRegex,
$blobArtifactsReplacement,
$storageAccountName,
$storageContainerName,
$storagePrefix
) {
LogDebug "Reading artifact from storage blob ..." LogDebug "Reading artifact from storage blob ..."
$returnedArtifacts = @() # "--only-show-errors" suppresses warnings about the fact that the az CLI is not authenticated
$pageToken = "" # "--query '[].name'" returns a list of only blob names
Do { # "--num-results *" handles pagination so the caller does not have to
$resp = "" $artifacts = az storage blob list `
if (!$pageToken) { --account-name $storageAccountName `
# First page call. --container-name $storageContainerName `
$resp = Invoke-RestMethod -Method Get -Uri $blobStorageUrl --prefix $storagePrefix `
} --delimiter / `
else { --only-show-errors `
# Next page call --query '[].name' `
$blobStorageUrlPageToken = $blobStorageUrl + "&marker=$pageToken" --num-results * | ConvertFrom-Json
$resp = Invoke-RestMethod -Method Get -Uri $blobStorageUrlPageToken LogDebug "Number of artifacts found: $($artifacts.Length)"
}
# Convert to xml documents.
$xmlDoc = [xml](removeBomFromString $resp)
foreach ($elem in $xmlDoc.EnumerationResults.Blobs.BlobPrefix) {
# What service return like "dotnet/Azure.AI.Anomalydetector/", needs to fetch out "Azure.AI.Anomalydetector"
$artifact = $elem.Name -replace $blobDirectoryRegex, $blobArtifactsReplacement
$returnedArtifacts += $artifact
}
# Fetch page token
$pageToken = $xmlDoc.EnumerationResults.NextMarker
} while ($pageToken)
return $returnedArtifacts
}
# The sequence of Bom bytes differs by different encoding. # example: "python/azure-storage-blob" -> "azure-storage-blob"
# The helper function here is only to strip the utf-8 encoding system as it is used by blob storage list api. $artifacts = $artifacts.ForEach({ $_ -replace $blobDirectoryRegex, $blobArtifactsReplacement })
# Return the original string if not in BOM utf-8 sequence. return $artifacts
function RemoveBomFromString([string]$bomAwareString) {
if ($bomAwareString.length -le 3) {
return $bomAwareString
}
$bomPatternByteArray = [byte[]] (0xef, 0xbb, 0xbf)
# The default encoding for powershell is ISO-8859-1, so converting bytes with the encoding.
$bomAwareBytes = [Text.Encoding]::GetEncoding(28591).GetBytes($bomAwareString.Substring(0, 3))
if (@(Compare-Object $bomPatternByteArray $bomAwareBytes -SyncWindow 0).Length -eq 0) {
return $bomAwareString.Substring(3)
}
return $bomAwareString
} }
function Get-TocMapping { function Get-TocMapping {

View File

@ -1,7 +1,7 @@
jobs: jobs:
- job: CreateDocIndex - job: CreateDocIndex
pool: pool:
vmImage: windows-2022 name: azsdk-pool-mms-win-2022-general
steps: steps:
- task: UsePythonVersion@0 - task: UsePythonVersion@0
displayName: 'Use Python 3.9' displayName: 'Use Python 3.9'