Added better error handling and daemon health check
This commit is contained in:
parent
bd8ba11410
commit
fc319f0f73
58
Moonlight/App/Diagnostics/HealthChecks/DaemonHealthCheck.cs
Normal file
58
Moonlight/App/Diagnostics/HealthChecks/DaemonHealthCheck.cs
Normal file
|
@ -0,0 +1,58 @@
|
|||
using System.Diagnostics;
|
||||
using Microsoft.Extensions.Diagnostics.HealthChecks;
|
||||
using Moonlight.App.Database.Entities;
|
||||
using Moonlight.App.Repositories;
|
||||
using Moonlight.App.Services;
|
||||
|
||||
namespace Moonlight.App.Diagnostics.HealthChecks;
|
||||
|
||||
public class DaemonHealthCheck : IHealthCheck
|
||||
{
|
||||
private readonly Repository<Node> NodeRepository;
|
||||
private readonly NodeService NodeService;
|
||||
|
||||
public DaemonHealthCheck(Repository<Node> nodeRepository, NodeService nodeService)
|
||||
{
|
||||
NodeRepository = nodeRepository;
|
||||
NodeService = nodeService;
|
||||
}
|
||||
|
||||
public async Task<HealthCheckResult> CheckHealthAsync(HealthCheckContext context, CancellationToken cancellationToken = new CancellationToken())
|
||||
{
|
||||
var nodes = NodeRepository.Get().ToArray();
|
||||
|
||||
var results = new Dictionary<Node, bool>();
|
||||
var healthCheckData = new Dictionary<string, object>();
|
||||
|
||||
foreach (var node in nodes)
|
||||
{
|
||||
try
|
||||
{
|
||||
await NodeService.GetCpuMetrics(node);
|
||||
|
||||
results.Add(node, true);
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
results.Add(node, false);
|
||||
healthCheckData.Add(node.Name, e.ToStringDemystified());
|
||||
}
|
||||
}
|
||||
|
||||
var offlineNodes = results
|
||||
.Where(x => !x.Value)
|
||||
.ToArray();
|
||||
|
||||
if (offlineNodes.Length == nodes.Length)
|
||||
{
|
||||
return HealthCheckResult.Unhealthy("All node daemons are offline", null, healthCheckData);
|
||||
}
|
||||
|
||||
if (offlineNodes.Length == 0)
|
||||
{
|
||||
return HealthCheckResult.Healthy("All node daemons are online");
|
||||
}
|
||||
|
||||
return HealthCheckResult.Degraded($"{offlineNodes.Length} node daemons are offline", null, healthCheckData);
|
||||
}
|
||||
}
|
|
@ -70,7 +70,8 @@ namespace Moonlight
|
|||
builder.Services.AddHttpContextAccessor();
|
||||
builder.Services.AddHealthChecks()
|
||||
.AddCheck<DatabaseHealthCheck>("Database")
|
||||
.AddCheck<NodeHealthCheck>("Nodes");
|
||||
.AddCheck<NodeHealthCheck>("Nodes")
|
||||
.AddCheck<DaemonHealthCheck>("Daemons");
|
||||
|
||||
// Databases
|
||||
builder.Services.AddDbContext<DataContext>();
|
||||
|
|
|
@ -33,7 +33,7 @@
|
|||
@(entry.Key)
|
||||
</button>
|
||||
</h2>
|
||||
<div id="healthCheck_body_@(entry.Key.ToLower())" class="accordion-collapse collapse show" data-bs-parent="#healthCheck">
|
||||
<div id="healthCheck_body_@(entry.Key.ToLower())" class="accordion-collapse collapse" data-bs-parent="#healthCheck">
|
||||
<div class="accordion-body">
|
||||
<b><TL>Status</TL>:</b> <TL>@(entry.Value.Status)</TL><br/>
|
||||
<b><TL>Description</TL>:</b> @(entry.Value.Description)<br/>
|
||||
|
|
|
@ -6,6 +6,7 @@
|
|||
@using Moonlight.App.Models.Misc
|
||||
@using Moonlight.App.Services
|
||||
@using Newtonsoft.Json
|
||||
@using Logging.Net
|
||||
|
||||
@inject ServerRepository ServerRepository
|
||||
@inject UserRepository UserRepository
|
||||
|
@ -101,7 +102,28 @@
|
|||
</a>
|
||||
</div>
|
||||
</div>
|
||||
<HealthCheckView HealthCheck="@HealthCheckData" />
|
||||
|
||||
<LazyLoader Load="LoadHealthCheckData">
|
||||
@if (HealthCheckData == null)
|
||||
{
|
||||
<div class="card">
|
||||
<div class="card-header">
|
||||
<div class="card-title">
|
||||
<TL>Moonlight health</TL>
|
||||
</div>
|
||||
</div>
|
||||
<div class="card-body">
|
||||
<div class="alert alert-warning">
|
||||
<TL>Unable to fetch health check data</TL>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
}
|
||||
else
|
||||
{
|
||||
<HealthCheckView HealthCheck="@HealthCheckData"/>
|
||||
}
|
||||
</LazyLoader>
|
||||
</LazyLoader>
|
||||
</OnlyAdmin>
|
||||
|
||||
|
@ -112,15 +134,22 @@
|
|||
private int DomainCount = 0;
|
||||
private int WebSpaceCount = 0;
|
||||
|
||||
private HealthCheck HealthCheckData;
|
||||
private HealthCheck? HealthCheckData;
|
||||
|
||||
private async Task Load(LazyLoader lazyLoader)
|
||||
private Task Load(LazyLoader lazyLoader)
|
||||
{
|
||||
ServerCount = ServerRepository.Get().Count();
|
||||
UserCount = UserRepository.Get().Count();
|
||||
DomainCount = DomainRepository.Get().Count();
|
||||
WebSpaceCount = WebSpaceRepository.Get().Count();
|
||||
|
||||
return Task.CompletedTask;
|
||||
}
|
||||
|
||||
private async Task LoadHealthCheckData(LazyLoader lazyLoader)
|
||||
{
|
||||
await lazyLoader.SetText("Loading health check data");
|
||||
|
||||
var appUrl = ConfigService
|
||||
.GetSection("Moonlight")
|
||||
.GetValue<string>("AppUrl");
|
||||
|
@ -131,14 +160,11 @@
|
|||
var json = await client.GetStringAsync($"{appUrl}/_health");
|
||||
HealthCheckData = JsonConvert.DeserializeObject<HealthCheck>(json) ?? new();
|
||||
}
|
||||
catch (Exception)
|
||||
catch (Exception e)
|
||||
{
|
||||
HealthCheckData = new()
|
||||
{
|
||||
Status = "Healthy",
|
||||
Entries = new(),
|
||||
TotalDuration = TimeSpan.MinValue
|
||||
};
|
||||
HealthCheckData = null;
|
||||
Logger.Warn("Unable to fetch health check data");
|
||||
Logger.Warn(e);
|
||||
}
|
||||
}
|
||||
}
|
Loading…
Reference in a new issue