[Demo] Add admin demo-mode toggle + generic website ingest source
CI/CD / CI · dotnet build (push) Failing after 1m40s
CI/CD / Deploy · hamkadr (push) Has been skipped

- AppSetting: DemoMode, WebsitesEnabled, WebsiteUrls
- Facility.IsDemo flag; SeedData split into SeedReferenceAsync (always)
  + SeedDemoAsync/ClearDemoAsync (idempotent, toggleable at runtime)
- WebsiteListingSource: scrape any admin-configured URL (og:title + content)
- Admin Settings: seed/clear demo card, demo-mode checkbox, website source
  fields; Program.cs seeds demo when DemoMode on (or in Development)
- EF migration DemoModeAndWebsites

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
soroush.asadi
2026-06-04 13:43:07 +03:30
parent eae38373b9
commit 0c0449c2b9
11 changed files with 1341 additions and 149 deletions
@@ -41,6 +41,9 @@ public class SettingsService
s.TelegramChannels = incoming.TelegramChannels?.Trim();
s.BaleEnabled = incoming.BaleEnabled;
s.BaleBotToken = incoming.BaleBotToken?.Trim();
s.DemoMode = incoming.DemoMode;
s.WebsitesEnabled = incoming.WebsitesEnabled;
s.WebsiteUrls = incoming.WebsiteUrls?.Trim();
s.DivarEnabled = incoming.DivarEnabled;
s.DivarCity = string.IsNullOrWhiteSpace(incoming.DivarCity) ? "tehran" : incoming.DivarCity.Trim();
s.DivarQueries = incoming.DivarQueries?.Trim();
@@ -0,0 +1,69 @@
using System.Net;
using System.Text.RegularExpressions;
using JobsMedical.Web.Models;
namespace JobsMedical.Web.Services.Scraping;
/// <summary>
/// Generic website source: fetches each admin-configured URL and extracts readable text
/// (JobPosting/Product JSON-LD description, common content containers, or og:title+description).
/// Lets the admin point the engine at any job page / channel archive / classifieds listing.
/// </summary>
public class WebsiteListingSource : IListingSource
{
private readonly IHttpClientFactory _http;
private readonly ILogger<WebsiteListingSource> _log;
public WebsiteListingSource(IHttpClientFactory http, ILogger<WebsiteListingSource> log)
{
_http = http;
_log = log;
}
public string Name => "وب‌سایت‌ها";
public async Task<IReadOnlyList<ScrapedItem>> FetchAsync(AppSetting s, CancellationToken ct = default)
{
var urls = AppSetting.SplitList(s.WebsiteUrls);
if (!s.WebsitesEnabled || urls.Count == 0) return Array.Empty<ScrapedItem>();
var client = _http.CreateClient("scrape");
var items = new List<ScrapedItem>();
foreach (var url in urls.Where(u => u.StartsWith("http")))
{
try
{
var html = await client.GetStringAsync(url, ct);
var text = Extract(html);
if (text.Length >= 25) items.Add(new ScrapedItem($"وب‌سایت ({Host(url)})", text, url));
}
catch (Exception ex) { _log.LogWarning(ex, "Website fetch failed for {Url}", url); }
}
return items;
}
private static string Host(string url) => Uri.TryCreate(url, UriKind.Absolute, out var u) ? u.Host : "web";
private static string Extract(string html)
{
string? title = Meta(html, "og:title");
if (title is not null) { var bar = title.IndexOf('|'); if (bar > 10) title = title[..bar].Trim(); }
string? body = Between(html, "rtcl-description") ?? Between(html, "entry-content")
?? Between(html, "job-description") ?? Meta(html, "og:description");
var text = HtmlUtil.ToPlainText(string.Join("\n", new[] { title, body }.Where(x => !string.IsNullOrWhiteSpace(x))));
return text.Length > 1800 ? text[..1800] : text;
}
private static string? Meta(string html, string prop)
{
var m = Regex.Match(html, $"<meta[^>]+property=[\"']{Regex.Escape(prop)}[\"'][^>]+content=[\"']([^\"']*)[\"']");
return m.Success ? WebUtility.HtmlDecode(m.Groups[1].Value) : null;
}
private static string? Between(string html, string cls)
{
var m = Regex.Match(html, $"<(?:div|article)[^>]+class=[\"'][^\"']*{Regex.Escape(cls)}[^\"']*[\"'][^>]*>(.*?)</(?:div|article)>",
RegexOptions.Singleline);
return m.Success ? m.Groups[1].Value : null;
}
}