[Demo] Add admin demo-mode toggle + generic website ingest source
- AppSetting: DemoMode, WebsitesEnabled, WebsiteUrls - Facility.IsDemo flag; SeedData split into SeedReferenceAsync (always) + SeedDemoAsync/ClearDemoAsync (idempotent, toggleable at runtime) - WebsiteListingSource: scrape any admin-configured URL (og:title + content) - Admin Settings: seed/clear demo card, demo-mode checkbox, website source fields; Program.cs seeds demo when DemoMode on (or in Development) - EF migration DemoModeAndWebsites Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
@@ -41,6 +41,9 @@ public class SettingsService
|
||||
s.TelegramChannels = incoming.TelegramChannels?.Trim();
|
||||
s.BaleEnabled = incoming.BaleEnabled;
|
||||
s.BaleBotToken = incoming.BaleBotToken?.Trim();
|
||||
s.DemoMode = incoming.DemoMode;
|
||||
s.WebsitesEnabled = incoming.WebsitesEnabled;
|
||||
s.WebsiteUrls = incoming.WebsiteUrls?.Trim();
|
||||
s.DivarEnabled = incoming.DivarEnabled;
|
||||
s.DivarCity = string.IsNullOrWhiteSpace(incoming.DivarCity) ? "tehran" : incoming.DivarCity.Trim();
|
||||
s.DivarQueries = incoming.DivarQueries?.Trim();
|
||||
|
||||
@@ -0,0 +1,69 @@
|
||||
using System.Net;
|
||||
using System.Text.RegularExpressions;
|
||||
using JobsMedical.Web.Models;
|
||||
|
||||
namespace JobsMedical.Web.Services.Scraping;
|
||||
|
||||
/// <summary>
|
||||
/// Generic website source: fetches each admin-configured URL and extracts readable text
|
||||
/// (JobPosting/Product JSON-LD description, common content containers, or og:title+description).
|
||||
/// Lets the admin point the engine at any job page / channel archive / classifieds listing.
|
||||
/// </summary>
|
||||
public class WebsiteListingSource : IListingSource
|
||||
{
|
||||
private readonly IHttpClientFactory _http;
|
||||
private readonly ILogger<WebsiteListingSource> _log;
|
||||
|
||||
public WebsiteListingSource(IHttpClientFactory http, ILogger<WebsiteListingSource> log)
|
||||
{
|
||||
_http = http;
|
||||
_log = log;
|
||||
}
|
||||
|
||||
public string Name => "وبسایتها";
|
||||
|
||||
public async Task<IReadOnlyList<ScrapedItem>> FetchAsync(AppSetting s, CancellationToken ct = default)
|
||||
{
|
||||
var urls = AppSetting.SplitList(s.WebsiteUrls);
|
||||
if (!s.WebsitesEnabled || urls.Count == 0) return Array.Empty<ScrapedItem>();
|
||||
|
||||
var client = _http.CreateClient("scrape");
|
||||
var items = new List<ScrapedItem>();
|
||||
foreach (var url in urls.Where(u => u.StartsWith("http")))
|
||||
{
|
||||
try
|
||||
{
|
||||
var html = await client.GetStringAsync(url, ct);
|
||||
var text = Extract(html);
|
||||
if (text.Length >= 25) items.Add(new ScrapedItem($"وبسایت ({Host(url)})", text, url));
|
||||
}
|
||||
catch (Exception ex) { _log.LogWarning(ex, "Website fetch failed for {Url}", url); }
|
||||
}
|
||||
return items;
|
||||
}
|
||||
|
||||
private static string Host(string url) => Uri.TryCreate(url, UriKind.Absolute, out var u) ? u.Host : "web";
|
||||
|
||||
private static string Extract(string html)
|
||||
{
|
||||
string? title = Meta(html, "og:title");
|
||||
if (title is not null) { var bar = title.IndexOf('|'); if (bar > 10) title = title[..bar].Trim(); }
|
||||
string? body = Between(html, "rtcl-description") ?? Between(html, "entry-content")
|
||||
?? Between(html, "job-description") ?? Meta(html, "og:description");
|
||||
var text = HtmlUtil.ToPlainText(string.Join("\n", new[] { title, body }.Where(x => !string.IsNullOrWhiteSpace(x))));
|
||||
return text.Length > 1800 ? text[..1800] : text;
|
||||
}
|
||||
|
||||
private static string? Meta(string html, string prop)
|
||||
{
|
||||
var m = Regex.Match(html, $"<meta[^>]+property=[\"']{Regex.Escape(prop)}[\"'][^>]+content=[\"']([^\"']*)[\"']");
|
||||
return m.Success ? WebUtility.HtmlDecode(m.Groups[1].Value) : null;
|
||||
}
|
||||
|
||||
private static string? Between(string html, string cls)
|
||||
{
|
||||
var m = Regex.Match(html, $"<(?:div|article)[^>]+class=[\"'][^\"']*{Regex.Escape(cls)}[^\"']*[\"'][^>]*>(.*?)</(?:div|article)>",
|
||||
RegexOptions.Singleline);
|
||||
return m.Success ? m.Groups[1].Value : null;
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user