ps/Modules/Cole.PowerShell.Developer/Public/Get-UrlComponents.ps1
2023-05-30 22:51:22 -07:00

113 lines
3.7 KiB
PowerShell

function Get-UrlComponents {
<#
.SYNOPSIS
Used to decompose a string as a url into the consituent components
.PARAMETER Url
The parameter to be parsed
#>
[CmdletBinding()]
[OutputType([object])]
param (
[Parameter(Mandatory = $true, ValueFromPipeline = $true)]
[ValidateNotNullOrEmpty()]
[string]$Url
)
$logLead = (Get-LogLeadName)
$originalString = $Url
$schemeDelimiter = "://"
$defaultPorts = @{
ftp = 21;
ssh = 22;
telnet = 23;
mailto = 25;
http = 80;
ldap = 389;
https = 443;
"net.tcp" = 808;
}
# Yes I could rely on hoisting but I don't like that. Harder to reason what's missing.
$username = $null
$password = $null
$Scheme = $null
$Hostname = $null
$Port = $null
$Credential = $null
$Query = $null
$Fragment = $null
$schemeAt = $Url.IndexOf($schemeDelimiter)
if ($schemeAt -gt -1) {
$scheme = $Url.Substring(0,$schemeAt)
$Url = $Url.Substring($schemeAt + $schemeDelimiter.Length)
}
$firstSlash = $Url.IndexOf('/')
$firstAt = $Url.IndexOf('@')
if ($firstAt -lt $firstSlash) {
# The first @ comes before the first slash, which indicates a user component
$Username = $Url.Substring(0, $firstAt)
$usernameSplit = $Username -split ':'
if ($usernameSplit.Length -gt 2) {
throw "$logLead : A username field can not contain more than two segments. You should probaly just not be using a username anyways, it's highly insecure. But if you must, check the RFC. Can only have one colon here. https://datatracker.ietf.org/doc/html/rfc3986#section-3.2.1"
}
$Username = $usernameSplit[0]
$password = $usernameSplit[1]
if (![string]::IsNullOrWhiteSpace($password)) {
$replacePassword = ":$password@"
$originalString = $originalString.Replace($replacePassword,":<REDACTED SECURE VALUE>@")
# It is okay to force this here because we are literally parsing a raw text password.
$password = (ConvertTo-SecureString $password -AsPlainText -Force)
$Credential = New-Object System.Management.Automation.PSCredential -ArgumentList $Username, $password
# Either return the credential OR the Username, never both
$Username = $null
}
$Url = $Url.Substring($firstAt + 1)
}
$firstSlash = $Url.IndexOf('/')
$Hostname = $Url.Substring(0, $firstSlash)
$Url = $Url.Substring($firstSlash)
$portDelimiter = $Hostname.IndexOf(':')
if ($portDelimiter -gt -1) {
$Port = $Hostname.Substring($portDelimiter + 1)
$Hostname = $Hostname.Substring(0, $portDelimiter)
}
$fragmentDelimiter = $Url.IndexOf('#')
if ($fragmentDelimiter -gt -1) {
$Fragment = $Url.Substring($fragmentDelimiter)
$Url = $Url.Substring(0, $fragmentDelimiter)
}
$queryDelimiter = $Url.IndexOf('?')
if ($queryDelimiter -gt -1) {
$Query = $Url.Substring($queryDelimiter)
$Url = $Url.Substring(0, $queryDelimiter)
}
# We have trimmed off the scheme, user, host, port, query, fragment. All that is left is the path.
$Path = $Url
$segments = $null
if (![string]::IsNullOrWhiteSpace($Path)) {
$segments = @($Path -split '/').Where({![string]::IsNullOrWhiteSpace($_)})
}
return New-Object PSCustomObject -Property @{
Hostname = $Hostname;
Scheme = $Scheme;
Port = $Port;
Path = $Path;
Segments = $segments;
Query = $Query;
Fragment = $Fragment;
Username = $Username;
Credential = $Credential;
OriginalString = $originalString;
}
}