Add scrape/ingestion engine + validation, and 24h shift hour-range visualization
Scrape engine (Services/Scraping/): pluggable IListingSource (working sample + Telegram/Divar credential-ready stubs) → IngestionService (content-hash dedupe → parse → validate → review queue) → ListingValidator (completeness score + spam screen) → IngestionWorker (config-gated hosted service). RawListing gains ContentHash/Confidence/ValidationNotes; RawListingStatus.Flagged. Admin /Admin gets run-now, source list, confidence + flagged queue. Hour-range viz: _HourBar 24h timeline bar (colored by type, overnight wrap) on shift cards, recommendation cards, and detail. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,59 @@
|
||||
using Microsoft.Extensions.Options;
|
||||
|
||||
namespace JobsMedical.Web.Services.Scraping;
|
||||
|
||||
public class IngestionOptions
|
||||
{
|
||||
public bool Enabled { get; set; } = false; // off by default — opt in via config
|
||||
public int IntervalMinutes { get; set; } = 30;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Periodically runs the ingestion engine when enabled (Ingestion:Enabled=true). Off by default
|
||||
/// so nothing scrapes uninvited; admins can also trigger a run on demand from the admin UI.
|
||||
/// </summary>
|
||||
public class IngestionWorker : BackgroundService
|
||||
{
|
||||
private readonly IServiceScopeFactory _scopes;
|
||||
private readonly IngestionOptions _opts;
|
||||
private readonly ILogger<IngestionWorker> _log;
|
||||
|
||||
public IngestionWorker(IServiceScopeFactory scopes, IOptions<IngestionOptions> opts,
|
||||
ILogger<IngestionWorker> log)
|
||||
{
|
||||
_scopes = scopes;
|
||||
_opts = opts.Value;
|
||||
_log = log;
|
||||
}
|
||||
|
||||
protected override async Task ExecuteAsync(CancellationToken stoppingToken)
|
||||
{
|
||||
if (!_opts.Enabled)
|
||||
{
|
||||
_log.LogInformation("Ingestion worker disabled (Ingestion:Enabled=false).");
|
||||
return;
|
||||
}
|
||||
|
||||
var interval = TimeSpan.FromMinutes(Math.Max(1, _opts.IntervalMinutes));
|
||||
_log.LogInformation("Ingestion worker on; every {Min} min.", _opts.IntervalMinutes);
|
||||
|
||||
while (!stoppingToken.IsCancellationRequested)
|
||||
{
|
||||
try
|
||||
{
|
||||
using var scope = _scopes.CreateScope();
|
||||
var svc = scope.ServiceProvider.GetRequiredService<IngestionService>();
|
||||
var summary = await svc.RunAsync(stoppingToken);
|
||||
_log.LogInformation("Scheduled ingestion: queued={Q} flagged={F} spam={S} dupes={D}",
|
||||
summary.TotalQueued, summary.TotalFlagged, summary.TotalSpam, summary.TotalDuplicates);
|
||||
}
|
||||
catch (Exception ex) when (ex is not OperationCanceledException)
|
||||
{
|
||||
_log.LogError(ex, "Scheduled ingestion run failed");
|
||||
}
|
||||
|
||||
try { await Task.Delay(interval, stoppingToken); }
|
||||
catch (OperationCanceledException) { break; }
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user