function Read-Xml { param ( [Parameter(Mandatory = $false)] [ValidateNotNullOrEmpty()] [string]$Path ) $logLead = (Get-LogLeadName) if (!(Test-Path -Path $Path)) { throw "$logLead : Not able to resolve [$Path]" } function parseTag { param ( [string]$tagContent ) # content should look like # tagName attribute-name="attribute-value" # or # tagName attribute-name='attribute-value' # but because we aren't assholes, we can use both ' and " and just look for the next matching non-escaped one $charEscapedSingleQuote = [char]([byte]17) $charEscapedDoubleQuote = [char]([byte]18) $charEscapedStringSpace = [char]([byte]19) $tagContent = $tagContent.Replace('\"',$charEscapedDoubleQuote).Replace("\'",$charEscapedSingleQuote) $tagContentBytes = [System.Text.Encoding]::UTF8.GetBytes($tagContent) $node = @{ Name = ''; } $tagContentLength = $tagContent.Length $currentToken = '' $inString = $false $inSingleString = $false $inDoubleString = $false for($i = 0; $i -lt $tagContentLength; $i++) { $char = $tagContent[$i] if ($inString) { if (($char -eq '"') -and $inDoubleString) { # we found the end of the string $inString = $false $inDoubleString = $false } elseif (($char -eq "'") -and $inSingleString) { # we found the end of the string $inString = $false $inSingleString = $false } elseif ($char -eq ' ') { # replace the space so we can quickly token parse our strings $tagContentBytes[$i] = 19 # this matches $charEscapedStringSpace above } elseif ($char -eq '"') { $tagContentBytes[$i] = 18 # this matches $charEscapedDoubleQuote above } elseif ($char -eq "'") { $tagContentBytes[$i] = 17 # this matches $charEscapedSingleQuote above } } elseif ($char -eq '"') { $inString = $true $inDoubleString = $true } elseif ($char -eq "'") { $inString = $true $inSingleString = $true } } $tagContent = [System.Text.Encoding]::UTF8.GetString($tagContentBytes) # Now handle the case of # It should be while (($tagContent.IndexOf(' =') -gt -1) -or ($tagContent.IndexOf('= ') -gt -1)) { $tagContent = $tagContent.Replace(' =','=').Replace('= ','=') } # now $tagContent has been escaped, so we can split on spaces, then equals, then remove quotes $splits = $tagContent -split ' ' $splitCount = $splits.Count $node.Name = $splits[0] if ($splitCount -gt 1) { $node.Attributes = @{} } for ($i = 1; $i -le $splitCount; $i++) { $attributeRawValue = $splits[$i] if ([string]::IsNullOrWhiteSpace($attributeRawValue)) { # can't parse empty spaces, sadly :D continue } $attributeEqualsIndex = $attributeRawValue.IndexOf('=') $name = $attributeRawValue $value = $attributeRawValue if ($attributeEqualsIndex -eq -1) { # the attribute stands alone, so we set it equal to itself (above) } else { $name = $attributeRawValue.Substring(0, $attributeEqualsIndex) $value = $attributeRawValue.Substring($attributeEqualsIndex + 1).Replace('"','').Replace("'","").Replace($charEscapedStringSpace,' ').Replace($charEscapedDoubleQuote,'"').Replace($charEscapedSingleQuote,"'") } $node.Attributes.$name = $value } return $node.Name, $node.Attributes } $rawcontent = (Get-Content -Raw -Path $Path) function parseNodes { param ( $content ) $parsedElements = @{} $currentTag = $null $beginTag = $false $contentLength = $content.Length for ($i = 0; $i -lt $contentLength; $i++) { $char = $content[$i] if ($char -eq '<') { if ($content[$i+1] -eq '?') { # We are in the xml chunk $skipTo = $content.IndexOf('?>',$i+1) $i = $skipTo + 1 continue } elseif ($content[$i+1] -eq '!') { # check if we are in CDATA mode, so we can skip to the end with the content in our node Write-Host $content.Substring($i+1,7) if ($content.Substring($i+1,7) -eq "!CDATA[") { # We are in a CDATA chunk and can skip ahead to the end of it which is the next occurrence of ]]> # We assume that only someone who truly hates us would do a nested CDATA block, cos of our limited scope of audience # In a full fledged parser we would use a stack to track that we were in X $skipTo = $content.IndexOf(']]>',$i+1) $i = $skipTo + 1 } } else { $beginTag = $true $currentTag = '' } } elseif ($char -eq '>') { $beginTag = $false if ($currentTag.Length -eq 0) { throw "$logLead : Found an empty or invalid tag at [$i]" } $isSelfClosing = $false if ($currentTag.EndsWith('/')) { $isSelfClosing = $true $currentTag = $currentTag.Substring(0,$currentTag.Length - 1) } $parsedTag,$nodeSet = (parseTag $currentTag) $foundNodes = $null $innerText = $null # we hit the close tag, so let's find the end-tag of our current tag, unless the previous character was a / (thus forming /> or a self-closing tag) if ($isSelfClosing) { # don't look for the end-tag } else { # look for the end-tag $closingTag = "" $closingTagIndex = $content.IndexOf($closingTag,$i) if ($closingTagIndex -eq -1) { Write-Host $content.Substring($i) throw "$logLead : Couldn't find a closing tag for [$($parsedTag)] starting at or around [$i]" } $endIndex = $closingTagIndex + $closingTag.Length $innerContent = $content.Substring($i + 1,$closingTagIndex - $i - 1) if ($innerContent.IndexOf("<$($parsedTag)") -gt -1) { # We have a case of a recursive tag, where we contain ourselves, so we need to skip past nested same-as-self tags # Ugh, what a disaster of an edge-case $lastIndexOfSameTag = $content.LastIndexOf("<$($parsedTag)") # Now find the next index of the closing tag from here # Then find the next index of the closing tag from _that_ place $closingTagIndex = $content.IndexOf($closingTag,$lastIndexOfSameTag + 1) $newClosingTagIndex = $content.IndexOf($closingTag,$closingTagIndex + 1) $endIndex = $newClosingTagIndex + $closingTag.Length $innerContent = $content.Substring($i + 1,$newClosingTagIndex - $i - 1) } if (![string]::IsNullOrWhiteSpace($innerContent)) { if ($innerContent.IndexOf('<') -gt -1) { $childNodes = parseNodes($innerContent) if ($null -ne $childNodes) { $foundNodes = $childNodes } } else { $innerText = @{ Text = $innerContent} } } $i = $endIndex + 1 } $parsedElements.$parsedTag = Merge-Objects -Objects $parsedElements.$parsedTag,$nodeSet,$foundNodes,$innerText -DontClobber -DontDeepMerge } elseif ($beginTag) { $currentTag += $char } } return $parsedElements } $capture = parseNodes($rawcontent) return $capture }