204 lines
8.6 KiB
PowerShell
204 lines
8.6 KiB
PowerShell
|
function Read-Xml {
|
||
|
param (
|
||
|
[Parameter(Mandatory = $false)]
|
||
|
[ValidateNotNullOrEmpty()]
|
||
|
[string]$Path
|
||
|
)
|
||
|
|
||
|
$logLead = (Get-LogLeadName)
|
||
|
|
||
|
if (!(Test-Path -Path $Path)) {
|
||
|
throw "$logLead : Not able to resolve [$Path]"
|
||
|
}
|
||
|
|
||
|
function parseTag {
|
||
|
param (
|
||
|
[string]$tagContent
|
||
|
)
|
||
|
|
||
|
# content should look like
|
||
|
# tagName attribute-name="attribute-value"
|
||
|
# or
|
||
|
# tagName attribute-name='attribute-value'
|
||
|
# but because we aren't assholes, we can use both ' and " and just look for the next matching non-escaped one
|
||
|
|
||
|
$charEscapedSingleQuote = [char]([byte]17)
|
||
|
$charEscapedDoubleQuote = [char]([byte]18)
|
||
|
$charEscapedStringSpace = [char]([byte]19)
|
||
|
$tagContent = $tagContent.Replace('\"',$charEscapedDoubleQuote).Replace("\'",$charEscapedSingleQuote)
|
||
|
|
||
|
$tagContentBytes = [System.Text.Encoding]::UTF8.GetBytes($tagContent)
|
||
|
|
||
|
$node = @{ Name = ''; }
|
||
|
$tagContentLength = $tagContent.Length
|
||
|
$currentToken = ''
|
||
|
$inString = $false
|
||
|
$inSingleString = $false
|
||
|
$inDoubleString = $false
|
||
|
for($i = 0; $i -lt $tagContentLength; $i++) {
|
||
|
$char = $tagContent[$i]
|
||
|
if ($inString) {
|
||
|
if (($char -eq '"') -and $inDoubleString) {
|
||
|
# we found the end of the string
|
||
|
$inString = $false
|
||
|
$inDoubleString = $false
|
||
|
} elseif (($char -eq "'") -and $inSingleString) {
|
||
|
# we found the end of the string
|
||
|
$inString = $false
|
||
|
$inSingleString = $false
|
||
|
} elseif ($char -eq ' ') {
|
||
|
# replace the space so we can quickly token parse our strings
|
||
|
$tagContentBytes[$i] = 19 # this matches $charEscapedStringSpace above
|
||
|
} elseif ($char -eq '"') {
|
||
|
$tagContentBytes[$i] = 18 # this matches $charEscapedDoubleQuote above
|
||
|
} elseif ($char -eq "'") {
|
||
|
$tagContentBytes[$i] = 17 # this matches $charEscapedSingleQuote above
|
||
|
}
|
||
|
} elseif ($char -eq '"') {
|
||
|
$inString = $true
|
||
|
$inDoubleString = $true
|
||
|
} elseif ($char -eq "'") {
|
||
|
$inString = $true
|
||
|
$inSingleString = $true
|
||
|
}
|
||
|
}
|
||
|
|
||
|
$tagContent = [System.Text.Encoding]::UTF8.GetString($tagContentBytes)
|
||
|
|
||
|
# Now handle the case of <tagName attribute = "escaped-string-content">
|
||
|
# It should be <tagName attribute="escaped-string-content">
|
||
|
|
||
|
while (($tagContent.IndexOf(' =') -gt -1) -or ($tagContent.IndexOf('= ') -gt -1)) {
|
||
|
$tagContent = $tagContent.Replace(' =','=').Replace('= ','=')
|
||
|
}
|
||
|
|
||
|
# now $tagContent has been escaped, so we can split on spaces, then equals, then remove quotes
|
||
|
$splits = $tagContent -split ' '
|
||
|
$splitCount = $splits.Count
|
||
|
$node.Name = $splits[0]
|
||
|
if ($splitCount -gt 1) {
|
||
|
$node.Attributes = @{}
|
||
|
}
|
||
|
for ($i = 1; $i -le $splitCount; $i++) {
|
||
|
$attributeRawValue = $splits[$i]
|
||
|
if ([string]::IsNullOrWhiteSpace($attributeRawValue)) {
|
||
|
# can't parse empty spaces, sadly :D
|
||
|
continue
|
||
|
}
|
||
|
$attributeEqualsIndex = $attributeRawValue.IndexOf('=')
|
||
|
$name = $attributeRawValue
|
||
|
$value = $attributeRawValue
|
||
|
if ($attributeEqualsIndex -eq -1) {
|
||
|
# the attribute stands alone, so we set it equal to itself (above)
|
||
|
} else {
|
||
|
$name = $attributeRawValue.Substring(0, $attributeEqualsIndex)
|
||
|
$value = $attributeRawValue.Substring($attributeEqualsIndex + 1).Replace('"','').Replace("'","").Replace($charEscapedStringSpace,' ').Replace($charEscapedDoubleQuote,'"').Replace($charEscapedSingleQuote,"'")
|
||
|
}
|
||
|
$node.Attributes.$name = $value
|
||
|
}
|
||
|
return $node.Name, $node.Attributes
|
||
|
}
|
||
|
|
||
|
$rawcontent = (Get-Content -Raw -Path $Path)
|
||
|
|
||
|
function parseNodes {
|
||
|
param (
|
||
|
$content
|
||
|
)
|
||
|
|
||
|
$parsedElements = @{}
|
||
|
|
||
|
$currentTag = $null
|
||
|
$beginTag = $false
|
||
|
$contentLength = $content.Length
|
||
|
for ($i = 0; $i -lt $contentLength; $i++) {
|
||
|
$char = $content[$i]
|
||
|
if ($char -eq '<') {
|
||
|
if ($content[$i+1] -eq '?') {
|
||
|
# We are in the xml chunk
|
||
|
$skipTo = $content.IndexOf('?>',$i+1)
|
||
|
$i = $skipTo + 1
|
||
|
continue
|
||
|
} elseif ($content[$i+1] -eq '!') {
|
||
|
# check if we are in CDATA mode, so we can skip to the end with the content in our node
|
||
|
Write-Host $content.Substring($i+1,7)
|
||
|
if ($content.Substring($i+1,7) -eq "!CDATA[") {
|
||
|
# We are in a CDATA chunk and can skip ahead to the end of it which is the next occurrence of ]]>
|
||
|
# We assume that only someone who truly hates us would do a nested CDATA block, cos of our limited scope of audience
|
||
|
# In a full fledged parser we would use a stack to track that we were in X
|
||
|
$skipTo = $content.IndexOf(']]>',$i+1)
|
||
|
$i = $skipTo + 1
|
||
|
}
|
||
|
} else {
|
||
|
$beginTag = $true
|
||
|
$currentTag = ''
|
||
|
}
|
||
|
} elseif ($char -eq '>') {
|
||
|
$beginTag = $false
|
||
|
if ($currentTag.Length -eq 0) {
|
||
|
throw "$logLead : Found an empty or invalid tag at [$i]"
|
||
|
}
|
||
|
$isSelfClosing = $false
|
||
|
if ($currentTag.EndsWith('/')) {
|
||
|
$isSelfClosing = $true
|
||
|
$currentTag = $currentTag.Substring(0,$currentTag.Length - 1)
|
||
|
}
|
||
|
$parsedTag,$nodeSet = (parseTag $currentTag)
|
||
|
|
||
|
$foundNodes = $null
|
||
|
$innerText = $null
|
||
|
|
||
|
# we hit the close tag, so let's find the end-tag of our current tag, unless the previous character was a / (thus forming /> or a self-closing tag)
|
||
|
if ($isSelfClosing) {
|
||
|
# don't look for the end-tag
|
||
|
} else {
|
||
|
# look for the end-tag
|
||
|
$closingTag = "</$($parsedTag)>"
|
||
|
$closingTagIndex = $content.IndexOf($closingTag,$i)
|
||
|
|
||
|
if ($closingTagIndex -eq -1) {
|
||
|
Write-Host $content.Substring($i)
|
||
|
throw "$logLead : Couldn't find a closing tag for [$($parsedTag)] starting at or around [$i]"
|
||
|
}
|
||
|
$endIndex = $closingTagIndex + $closingTag.Length
|
||
|
|
||
|
$innerContent = $content.Substring($i + 1,$closingTagIndex - $i - 1)
|
||
|
if ($innerContent.IndexOf("<$($parsedTag)") -gt -1) {
|
||
|
# We have a case of a recursive tag, where we contain ourselves, so we need to skip past nested same-as-self tags
|
||
|
# Ugh, what a disaster of an edge-case
|
||
|
$lastIndexOfSameTag = $content.LastIndexOf("<$($parsedTag)")
|
||
|
|
||
|
# Now find the next index of the closing tag from here
|
||
|
# Then find the next index of the closing tag from _that_ place
|
||
|
$closingTagIndex = $content.IndexOf($closingTag,$lastIndexOfSameTag + 1)
|
||
|
|
||
|
$newClosingTagIndex = $content.IndexOf($closingTag,$closingTagIndex + 1)
|
||
|
|
||
|
$endIndex = $newClosingTagIndex + $closingTag.Length
|
||
|
$innerContent = $content.Substring($i + 1,$newClosingTagIndex - $i - 1)
|
||
|
}
|
||
|
if (![string]::IsNullOrWhiteSpace($innerContent)) {
|
||
|
if ($innerContent.IndexOf('<') -gt -1) {
|
||
|
$childNodes = parseNodes($innerContent)
|
||
|
if ($null -ne $childNodes) {
|
||
|
$foundNodes = $childNodes
|
||
|
}
|
||
|
} else {
|
||
|
$innerText = @{ Text = $innerContent}
|
||
|
}
|
||
|
}
|
||
|
$i = $endIndex + 1
|
||
|
}
|
||
|
$parsedElements.$parsedTag = Merge-Objects -Objects $parsedElements.$parsedTag,$nodeSet,$foundNodes,$innerText -DontClobber -DontDeepMerge
|
||
|
} elseif ($beginTag) {
|
||
|
$currentTag += $char
|
||
|
}
|
||
|
}
|
||
|
|
||
|
return $parsedElements
|
||
|
}
|
||
|
|
||
|
$capture = parseNodes($rawcontent)
|
||
|
|
||
|
return $capture
|
||
|
}
|