ps/Modules/Alkami.PowerShell.ServiceFabric/Public/Join-AlkamiServiceFabricCluster.ps1
2023-05-30 22:51:22 -07:00

137 lines
5.9 KiB
PowerShell

function Join-AlkamiServiceFabricCluster {
<#
.SYNOPSIS
Adds the executing server into an existing ServiceFabric cluster. Returns $true on success.
.PARAMETER servers
The server hostname(s) of an existing cluster. Only one is required.
#>
[CmdletBinding()]
[OutputType([System.Boolean])]
Param(
[Parameter(Mandatory = $true)]
[string[]]$Servers
)
$loglead = (Get-LogLeadName);
# Make sure we're not installing this on a web server.
if(Test-IsWebServer) {
Write-Error "$loglead : Cannot install Service Fabric on a Web server!";
return $false;
}
if(($null -eq $servers) -or ($servers.count -eq 0))
{
Write-Error "$loglead No SF cluster servers passed in to join. Exiting.";
return;
}
$endpointPort = 19000;
Write-Host "$loglead : Scanning for an existing Service Fabric cluster to join.";
$localIP = (Get-IpAddress);
$existingClusterNodeIp = $null;
foreach($server in $servers) {
if($server -eq $localIP) {
continue;
}
Write-Host "$loglead : Testing endpoint $($server):$endpointPort for a Service Fabric cluster.";
$testEndpoint = (Test-NetConnection -ComputerName $server -Port $endpointPort);
if($testEndpoint.TcpTestSucceeded) {
$existingClusterNodeIp = $server;
break;
}
}
if($null -eq $existingClusterNodeIp) {
Write-Host "$loglead : Could not find a running service fabric cluster to join server to.";
return $false;
}
# Connect to an existing cluster if one exists.
Write-Host "$loglead : Downloading Service Fabric installer/runtime files via Chocolatey."
choco upgrade Alkami.DevOps.ServiceFabric -yr;
$chocoInstallPath = Get-ChocolateyInstallPath
$fabricBasePath = Join-Path $chocoInstallPath "lib\Alkami.DevOps.ServiceFabric\files"
if(!(Test-Path $fabricBasePath)) {
Write-Error "$loglead : Service Fabric was not downloaded correctly. Check the Chocolatey logs.";
return $false;
}
# Make sure that the offline installation .cab was downloaded successfully.
$runtime = Get-ChildItem -Path $fabricBasePath -Filter "*.cab" | select-object -First 1;
if(($null -eq $runtime) -or (!(Test-Path $runtime.FullName))) {
Write-Error "$loglead : The Service Fabric offline installation .cab was not downloaded correctly. Check the chocolatey logs.";
return $false;
}
$runtimePath = $runtime.FullName;
# Determine the name of the SF node and its fqdn for windows authentication.
$nodeName = $env:COMPUTERNAME;
$fqdn = (Get-FullyQualifiedServerName);
# Determine endpoint of the existing cluster endpoint we're joining.
$endpoint = "{0}:$endpointPort" -f $existingClusterNodeIp;
# Determine fault/upgrade domain.
$faultDomain = $null;
$hashcode = [Math]::Abs($fqdn.GetHashCode());
# Use the hostname as the AZ to get around overtly strict rules around microservice placements in SF
$az = $env:COMPUTERNAME;
# Set the fault domain to the availability zone in AWS.
$faultDomain = "fd:/{0}/r0" -f $az;
$upgradeDomain = $hashcode;
$environmentName = (Get-AppSetting -appSettingKey "Environment.Name");
$workerName = (Format-AlkamiEnvironmentWorkerNodeType $environmentName);
# TODO: Fix this to remove a manual setup step.
# The issue is that this relies on SF module functions to work.
# However, the SF module is installed with the AddNode.ps1 script below.
# New-AlkamiServiceFabricEnvironmentNodeType -environmentName $environmentName;
# Look up the server certificate name from the seed node.
$clusterManifestLocation = "C:\ProgramData\SF\clusterManifest.xml";
$clusterManifestLocation = Get-UncPath -filePath $clusterManifestLocation -ComputerName ($Servers[0]);
if(!(Test-Path $clusterManifestLocation)) {
Write-Error "$loglead : Could not find Cluster Manifest config file at '$clusterManifestLocation'.";
return;
}
# Read the certificate common name to connect to the cluster with.
$ServerCertificateCommonName = $null;
$namespace = @{ x = "http://schemas.microsoft.com/2011/01/fabric" };
$serverCertNode = (Select-Xml -Path $clusterManifestLocation -XPath "//x:ServerCertificate" -Namespace $namespace) | Select-Object -ExpandProperty Node -First 1;
if(!([string]::IsNullOrWhiteSpace($serverCertNode.X509FindValue))) {
$ServerCertificateCommonName = $serverCertNode.X509FindValue;
} else {
Write-Error "$loglead Could not locate a certificate common name in the cluster manifest. Exiting.."
return;
}
# Look up the certificate to connect to the cluster with.
$serverCert = Find-CertificateByName -CommonName $ServerCertificateCommonName -StoreLocation "LocalMachine" -StoreName "My";
if($null -eq $serverCert) {
Write-Host "$loglead Could not locate certificate $ServerCertificateCommonName. Make sure it is loaded to the local machine store.";
}
$serverCertThumbprint = $serverCert.Thumbprint;
Write-Host "$loglead : Connecting to existing Service Fabric cluster on node $existingClusterNodeIp";
$installerPath = Join-Path $fabricBasePath "AddNode.ps1";
& $installerPath -NodeName $nodeName -NodeType $workerName -NodeIPAddressorFQDN $fqdn -ExistingClientConnectionEndpoint $endpoint -UpgradeDomain $upgradeDomain -FaultDomain $faultDomain -FabricRuntimePackagePath $runtimePath -AcceptEULA -X509Credential -ServerCertThumbprint $serverCertThumbprint -StoreLocation "LocalMachine" -StoreName "My" -FindValueThumbprint $serverCertThumbprint -Verbose;
# See if the SF port is listening as a test. The SF AddNode script doesn't actually fail.
Start-Sleep -Seconds 5;
if ($null -eq (Get-NetTCPConnection | Where-Object {($_.LocalPort -eq $endpointPort) -and ($_.State -eq "Listen")})) {
Write-Error "$loglead : Server did not successfully join cluster. Read logs.";
return $false;
}
return $true;
}