Move ingestion + Telegram/Bale/Divar config to DB-backed admin settings
- AppSetting gains source config: AutoIngestEnabled, IngestIntervalMinutes, Telegram/Bale/Divar enabled+channels/token/queries - IListingSource.FetchAsync(AppSetting) — sources read config from DB, not IOptions/appsettings; sample source dev-only - IngestionWorker reads AutoIngest+interval from DB each cycle (toggle at runtime, no redeploy) - /Admin/Settings gets a 'منابع جمعآوری' section; removed Ingestion env/appsettings + compose env vars - ENV_FILE shrinks to HOST_PORT + POSTGRES_* + ADMIN_PHONE (AI + sources are all in-admin); migration Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
@@ -1,46 +1,34 @@
|
||||
using System.Text.Json;
|
||||
using Microsoft.Extensions.Options;
|
||||
using JobsMedical.Web.Models;
|
||||
|
||||
namespace JobsMedical.Web.Services.Scraping;
|
||||
|
||||
public class BaleOptions
|
||||
{
|
||||
public bool Enabled { get; set; }
|
||||
public string? BotToken { get; set; }
|
||||
public string BaseUrl { get; set; } = "https://tapi.bale.ai"; // Bale Bot API host
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Bale (Iranian messenger) source via its Telegram-compatible Bot API getUpdates. The bot must
|
||||
/// be a member/admin of the channels it should read. Pulls text from messages and channel posts.
|
||||
/// Bale (Iranian messenger) source via its Telegram-compatible Bot API getUpdates. Enabled +
|
||||
/// bot token come from admin settings (DB). The bot must be a member of the channels it reads.
|
||||
/// </summary>
|
||||
public class BaleListingSource : IListingSource
|
||||
{
|
||||
private readonly BaleOptions _opts;
|
||||
private const string BaseUrl = "https://tapi.bale.ai";
|
||||
private readonly IHttpClientFactory _http;
|
||||
private readonly ILogger<BaleListingSource> _log;
|
||||
|
||||
public BaleListingSource(IOptions<BaleOptions> opts, IHttpClientFactory http,
|
||||
ILogger<BaleListingSource> log)
|
||||
public BaleListingSource(IHttpClientFactory http, ILogger<BaleListingSource> log)
|
||||
{
|
||||
_opts = opts.Value;
|
||||
_http = http;
|
||||
_log = log;
|
||||
}
|
||||
|
||||
public string Name => "بله";
|
||||
public bool Enabled => _opts.Enabled && !string.IsNullOrWhiteSpace(_opts.BotToken);
|
||||
|
||||
public async Task<IReadOnlyList<ScrapedItem>> FetchAsync(CancellationToken ct = default)
|
||||
public async Task<IReadOnlyList<ScrapedItem>> FetchAsync(AppSetting s, CancellationToken ct = default)
|
||||
{
|
||||
if (!Enabled) { _log.LogInformation("Bale source disabled/unconfigured."); return Array.Empty<ScrapedItem>(); }
|
||||
if (!s.BaleEnabled || string.IsNullOrWhiteSpace(s.BaleBotToken)) return Array.Empty<ScrapedItem>();
|
||||
|
||||
try
|
||||
{
|
||||
var client = _http.CreateClient("scrape");
|
||||
var url = $"{_opts.BaseUrl.TrimEnd('/')}/bot{_opts.BotToken}/getUpdates";
|
||||
var body = await client.GetStringAsync(url, ct);
|
||||
|
||||
var body = await client.GetStringAsync($"{BaseUrl}/bot{s.BaleBotToken}/getUpdates", ct);
|
||||
using var doc = JsonDocument.Parse(body);
|
||||
if (!doc.RootElement.TryGetProperty("result", out var result) || result.ValueKind != JsonValueKind.Array)
|
||||
return Array.Empty<ScrapedItem>();
|
||||
@@ -54,11 +42,7 @@ public class BaleListingSource : IListingSource
|
||||
}
|
||||
return items;
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_log.LogWarning(ex, "Bale fetch failed.");
|
||||
return Array.Empty<ScrapedItem>();
|
||||
}
|
||||
catch (Exception ex) { _log.LogWarning(ex, "Bale fetch failed."); return Array.Empty<ScrapedItem>(); }
|
||||
}
|
||||
|
||||
private static string? TextOf(JsonElement update, string key)
|
||||
|
||||
@@ -1,55 +1,44 @@
|
||||
using System.Text;
|
||||
using System.Text.Json;
|
||||
using Microsoft.Extensions.Options;
|
||||
using JobsMedical.Web.Models;
|
||||
|
||||
namespace JobsMedical.Web.Services.Scraping;
|
||||
|
||||
public class DivarOptions
|
||||
{
|
||||
public bool Enabled { get; set; }
|
||||
public string City { get; set; } = "tehran";
|
||||
public string Category { get; set; } = "jobs";
|
||||
public List<string> Queries { get; set; } = new(); // e.g. "پرستار", "پزشک عمومی", "درمانگاه"
|
||||
public string BaseUrl { get; set; } = "https://api.divar.ir/v8/web-search";
|
||||
public int PerQuery { get; set; } = 25;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Best-effort Divar fetch: queries Divar's web-search JSON for each term and harvests post
|
||||
/// titles + descriptions. Divar's private API shifts shape over time, so we walk the JSON
|
||||
/// tolerantly for any object carrying a "title" plus a nearby description field, and fail soft.
|
||||
/// titles + descriptions. Enabled + city + queries come from admin settings (DB). Divar's
|
||||
/// private API shifts shape, so we walk JSON tolerantly and fail soft.
|
||||
/// </summary>
|
||||
public class DivarListingSource : IListingSource
|
||||
{
|
||||
private readonly DivarOptions _opts;
|
||||
private const string BaseUrl = "https://api.divar.ir/v8/web-search";
|
||||
private readonly IHttpClientFactory _http;
|
||||
private readonly ILogger<DivarListingSource> _log;
|
||||
|
||||
public DivarListingSource(IOptions<DivarOptions> opts, IHttpClientFactory http,
|
||||
ILogger<DivarListingSource> log)
|
||||
public DivarListingSource(IHttpClientFactory http, ILogger<DivarListingSource> log)
|
||||
{
|
||||
_opts = opts.Value;
|
||||
_http = http;
|
||||
_log = log;
|
||||
}
|
||||
|
||||
public string Name => "دیوار";
|
||||
public bool Enabled => _opts.Enabled && _opts.Queries.Count > 0;
|
||||
|
||||
public async Task<IReadOnlyList<ScrapedItem>> FetchAsync(CancellationToken ct = default)
|
||||
public async Task<IReadOnlyList<ScrapedItem>> FetchAsync(AppSetting s, CancellationToken ct = default)
|
||||
{
|
||||
if (!Enabled) { _log.LogInformation("Divar source disabled/unconfigured."); return Array.Empty<ScrapedItem>(); }
|
||||
var queries = AppSetting.SplitList(s.DivarQueries);
|
||||
if (!s.DivarEnabled || queries.Count == 0) return Array.Empty<ScrapedItem>();
|
||||
var city = string.IsNullOrWhiteSpace(s.DivarCity) ? "tehran" : s.DivarCity.Trim();
|
||||
|
||||
var client = _http.CreateClient("scrape");
|
||||
var items = new List<ScrapedItem>();
|
||||
foreach (var q in _opts.Queries.Where(q => q.Trim().Length > 0))
|
||||
foreach (var q in queries)
|
||||
{
|
||||
try
|
||||
{
|
||||
var url = $"{_opts.BaseUrl.TrimEnd('/')}/{_opts.City}/{_opts.Category}?q={Uri.EscapeDataString(q)}";
|
||||
var url = $"{BaseUrl}/{city}/jobs?q={Uri.EscapeDataString(q)}";
|
||||
var body = await client.GetStringAsync(url, ct);
|
||||
using var doc = JsonDocument.Parse(body);
|
||||
foreach (var text in Harvest(doc.RootElement).Take(_opts.PerQuery))
|
||||
foreach (var text in Harvest(doc.RootElement).Take(25))
|
||||
items.Add(new ScrapedItem("دیوار", text, "https://divar.ir"));
|
||||
}
|
||||
catch (Exception ex) { _log.LogWarning(ex, "Divar fetch failed for query {Query}", q); }
|
||||
@@ -60,7 +49,6 @@ public class DivarListingSource : IListingSource
|
||||
private static readonly string[] DescKeys =
|
||||
{ "description", "middle_description_text", "subtitle", "bottom_description_text", "normal_text" };
|
||||
|
||||
/// <summary>Walk the JSON; for each object with a string "title", emit title + first description.</summary>
|
||||
private static IEnumerable<string> Harvest(JsonElement el)
|
||||
{
|
||||
if (el.ValueKind == JsonValueKind.Object)
|
||||
@@ -75,12 +63,12 @@ public class DivarListingSource : IListingSource
|
||||
if (text.Length >= 15) yield return text;
|
||||
}
|
||||
foreach (var p in el.EnumerateObject())
|
||||
foreach (var s in Harvest(p.Value)) yield return s;
|
||||
foreach (var x in Harvest(p.Value)) yield return x;
|
||||
}
|
||||
else if (el.ValueKind == JsonValueKind.Array)
|
||||
{
|
||||
foreach (var item in el.EnumerateArray())
|
||||
foreach (var s in Harvest(item)) yield return s;
|
||||
foreach (var x in Harvest(item)) yield return x;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,15 +1,17 @@
|
||||
using JobsMedical.Web.Models;
|
||||
|
||||
namespace JobsMedical.Web.Services.Scraping;
|
||||
|
||||
/// <summary>One raw post pulled from a source (a Telegram message, a Divar ad, etc.).</summary>
|
||||
public record ScrapedItem(string Source, string RawText, string? SourceUrl = null);
|
||||
|
||||
/// <summary>
|
||||
/// A pluggable source the ingestion engine pulls from. Implement once per channel/site.
|
||||
/// `Enabled` lets a source be present but dormant until it's configured with credentials.
|
||||
/// A pluggable source the ingestion engine pulls from. Configuration (enabled, channels, tokens)
|
||||
/// comes from the DB-backed <see cref="AppSetting"/> passed in — set in the admin panel, not env.
|
||||
/// A disabled/unconfigured source returns an empty list.
|
||||
/// </summary>
|
||||
public interface IListingSource
|
||||
{
|
||||
string Name { get; }
|
||||
bool Enabled { get; }
|
||||
Task<IReadOnlyList<ScrapedItem>> FetchAsync(CancellationToken ct = default);
|
||||
Task<IReadOnlyList<ScrapedItem>> FetchAsync(AppSetting settings, CancellationToken ct = default);
|
||||
}
|
||||
|
||||
@@ -43,8 +43,7 @@ public class IngestionService
|
||||
_ai = ai; _settings = settings; _log = log;
|
||||
}
|
||||
|
||||
public IReadOnlyList<(string Name, bool Enabled)> Sources =>
|
||||
_sources.Select(s => (s.Name, s.Enabled)).ToList();
|
||||
public IReadOnlyList<string> SourceNames => _sources.Select(s => s.Name).ToList();
|
||||
|
||||
public async Task<IngestionSummary> RunAsync(CancellationToken ct = default)
|
||||
{
|
||||
@@ -58,12 +57,13 @@ public class IngestionService
|
||||
|
||||
var results = new List<SourceResult>();
|
||||
|
||||
foreach (var source in _sources.Where(s => s.Enabled))
|
||||
foreach (var source in _sources)
|
||||
{
|
||||
int fetched = 0, queued = 0, published = 0, flagged = 0, spam = 0, dupes = 0;
|
||||
IReadOnlyList<ScrapedItem> items;
|
||||
try { items = await source.FetchAsync(ct); }
|
||||
try { items = await source.FetchAsync(settings, ct); }
|
||||
catch (Exception ex) { _log.LogError(ex, "Source {Source} failed", source.Name); continue; }
|
||||
if (items.Count == 0) continue; // disabled/unconfigured source
|
||||
|
||||
foreach (var item in items)
|
||||
{
|
||||
|
||||
@@ -1,58 +1,52 @@
|
||||
using Microsoft.Extensions.Options;
|
||||
|
||||
namespace JobsMedical.Web.Services.Scraping;
|
||||
|
||||
public class IngestionOptions
|
||||
{
|
||||
public bool Enabled { get; set; } = false; // off by default — opt in via config
|
||||
public int IntervalMinutes { get; set; } = 30;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Periodically runs the ingestion engine when enabled (Ingestion:Enabled=true). Off by default
|
||||
/// so nothing scrapes uninvited; admins can also trigger a run on demand from the admin UI.
|
||||
/// Periodically runs the ingestion engine when the admin has turned auto-ingest ON
|
||||
/// (AppSetting.AutoIngestEnabled) — read fresh from the DB each cycle, so it can be toggled at
|
||||
/// runtime from the admin panel with no redeploy. When off, it idles and re-checks.
|
||||
/// </summary>
|
||||
public class IngestionWorker : BackgroundService
|
||||
{
|
||||
private readonly IServiceScopeFactory _scopes;
|
||||
private readonly IngestionOptions _opts;
|
||||
private readonly ILogger<IngestionWorker> _log;
|
||||
|
||||
public IngestionWorker(IServiceScopeFactory scopes, IOptions<IngestionOptions> opts,
|
||||
ILogger<IngestionWorker> log)
|
||||
public IngestionWorker(IServiceScopeFactory scopes, ILogger<IngestionWorker> log)
|
||||
{
|
||||
_scopes = scopes;
|
||||
_opts = opts.Value;
|
||||
_log = log;
|
||||
}
|
||||
|
||||
protected override async Task ExecuteAsync(CancellationToken stoppingToken)
|
||||
{
|
||||
if (!_opts.Enabled)
|
||||
{
|
||||
_log.LogInformation("Ingestion worker disabled (Ingestion:Enabled=false).");
|
||||
return;
|
||||
}
|
||||
|
||||
var interval = TimeSpan.FromMinutes(Math.Max(1, _opts.IntervalMinutes));
|
||||
_log.LogInformation("Ingestion worker on; every {Min} min.", _opts.IntervalMinutes);
|
||||
// Small startup delay so the DB/migrations are ready.
|
||||
try { await Task.Delay(TimeSpan.FromSeconds(20), stoppingToken); }
|
||||
catch (OperationCanceledException) { return; }
|
||||
|
||||
while (!stoppingToken.IsCancellationRequested)
|
||||
{
|
||||
var idleMinutes = 10;
|
||||
try
|
||||
{
|
||||
using var scope = _scopes.CreateScope();
|
||||
var svc = scope.ServiceProvider.GetRequiredService<IngestionService>();
|
||||
var summary = await svc.RunAsync(stoppingToken);
|
||||
_log.LogInformation("Scheduled ingestion: queued={Q} flagged={F} spam={S} dupes={D}",
|
||||
summary.TotalQueued, summary.TotalFlagged, summary.TotalSpam, summary.TotalDuplicates);
|
||||
var settings = await scope.ServiceProvider
|
||||
.GetRequiredService<SettingsService>().GetAsync();
|
||||
|
||||
if (settings.AutoIngestEnabled)
|
||||
{
|
||||
var svc = scope.ServiceProvider.GetRequiredService<IngestionService>();
|
||||
var summary = await svc.RunAsync(stoppingToken);
|
||||
_log.LogInformation("Auto-ingest: queued={Q} published={P} flagged={F} spam={S} dupes={D}",
|
||||
summary.TotalQueued, summary.TotalPublished, summary.TotalFlagged,
|
||||
summary.TotalSpam, summary.TotalDuplicates);
|
||||
idleMinutes = Math.Max(1, settings.IngestIntervalMinutes);
|
||||
}
|
||||
}
|
||||
catch (Exception ex) when (ex is not OperationCanceledException)
|
||||
{
|
||||
_log.LogError(ex, "Scheduled ingestion run failed");
|
||||
_log.LogError(ex, "Auto-ingest cycle failed");
|
||||
}
|
||||
|
||||
try { await Task.Delay(interval, stoppingToken); }
|
||||
try { await Task.Delay(TimeSpan.FromMinutes(idleMinutes), stoppingToken); }
|
||||
catch (OperationCanceledException) { break; }
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,27 +1,33 @@
|
||||
using JobsMedical.Web.Models;
|
||||
using Microsoft.Extensions.Hosting;
|
||||
|
||||
namespace JobsMedical.Web.Services.Scraping;
|
||||
|
||||
/// <summary>
|
||||
/// A built-in source of representative Persian posts (the kind found in shift channels). Always
|
||||
/// available, needs no credentials — it lets the whole ingestion → validation → review pipeline
|
||||
/// run and be demoed today, and doubles as a fixture mix of good, incomplete, and spam posts.
|
||||
/// Built-in representative Persian posts (good, incomplete, and spam) so the whole pipeline can be
|
||||
/// demoed. Only active in Development — never injects sample data into production.
|
||||
/// </summary>
|
||||
public class SampleListingSource : IListingSource
|
||||
{
|
||||
private readonly IHostEnvironment _env;
|
||||
public SampleListingSource(IHostEnvironment env) => _env = env;
|
||||
|
||||
public string Name => "نمونه (کانال آزمایشی)";
|
||||
public bool Enabled => true;
|
||||
|
||||
private static readonly string[] Posts =
|
||||
{
|
||||
"درمانگاه شبانهروزی در سعادتآباد نیازمند پزشک عمومی برای شیفت شب، کارانه ۳ میلیون تومان. تماس ۰۹۱۲۳۴۵۶۷۸۹",
|
||||
"کلینیک تخصصی در تهران به پرستار برای شیفت عصر نیازمند است، ۵۰٪ سهم درآمد. ۰۹۳۵۱۱۱۲۲۳۳",
|
||||
"کلینیک تخصصی در تهران به پرستار خانم برای شیفت عصر نیازمند است، ۵۰٪ سهم درآمد. ۰۹۳۵۱۱۱۲۲۳۳",
|
||||
"استخدام ماما تماموقت در بیمارستان خصوصی، حقوق توافقی. منطقه شهرک غرب.",
|
||||
"نیازمند تکنسین اتاق عمل جهت همکاری در نارمک، شیفت صبح. ۰۹۱۲۰۰۰۰۰۰۰",
|
||||
"فروش فالوور و بک لینک ارزان، سرمایه گذاری در ارز دیجیتال با سود تضمینی!", // spam
|
||||
"پزشک", // too short / incomplete
|
||||
"نیازمند تکنسین اتاق عمل آقا جهت همکاری در نارمک، شیفت صبح. ۰۹۱۲۰۰۰۰۰۰۰",
|
||||
"فروش فالوور و بک لینک ارزان، سرمایه گذاری در ارز دیجیتال با سود تضمینی!",
|
||||
"پزشک",
|
||||
"بیمارستان آتیه جهت تکمیل کادر درمان به پزشک عمومی مقیم نیازمند است. قرارداد یکساله، حقوق ۴۵ میلیون ماهانه. تهرانپارس.",
|
||||
};
|
||||
|
||||
public Task<IReadOnlyList<ScrapedItem>> FetchAsync(CancellationToken ct = default)
|
||||
public Task<IReadOnlyList<ScrapedItem>> FetchAsync(AppSetting settings, CancellationToken ct = default)
|
||||
=> Task.FromResult<IReadOnlyList<ScrapedItem>>(
|
||||
Posts.Select(p => new ScrapedItem(Name, p)).ToList());
|
||||
_env.IsDevelopment()
|
||||
? Posts.Select(p => new ScrapedItem(Name, p)).ToList()
|
||||
: Array.Empty<ScrapedItem>());
|
||||
}
|
||||
|
||||
@@ -34,6 +34,16 @@ public class SettingsService
|
||||
s.AiSystemPrompt = string.IsNullOrWhiteSpace(incoming.AiSystemPrompt)
|
||||
? AppSetting.DefaultPrompt : incoming.AiSystemPrompt;
|
||||
s.AiAutoApprove = incoming.AiAutoApprove;
|
||||
// Channel scraping sources
|
||||
s.AutoIngestEnabled = incoming.AutoIngestEnabled;
|
||||
s.IngestIntervalMinutes = Math.Max(1, incoming.IngestIntervalMinutes);
|
||||
s.TelegramEnabled = incoming.TelegramEnabled;
|
||||
s.TelegramChannels = incoming.TelegramChannels?.Trim();
|
||||
s.BaleEnabled = incoming.BaleEnabled;
|
||||
s.BaleBotToken = incoming.BaleBotToken?.Trim();
|
||||
s.DivarEnabled = incoming.DivarEnabled;
|
||||
s.DivarCity = string.IsNullOrWhiteSpace(incoming.DivarCity) ? "tehran" : incoming.DivarCity.Trim();
|
||||
s.DivarQueries = incoming.DivarQueries?.Trim();
|
||||
s.UpdatedAt = DateTime.UtcNow;
|
||||
await _db.SaveChangesAsync();
|
||||
}
|
||||
|
||||
@@ -1,50 +1,39 @@
|
||||
using System.Net;
|
||||
using System.Text.RegularExpressions;
|
||||
using Microsoft.Extensions.Options;
|
||||
using JobsMedical.Web.Models;
|
||||
|
||||
namespace JobsMedical.Web.Services.Scraping;
|
||||
|
||||
public class TelegramOptions
|
||||
{
|
||||
public bool Enabled { get; set; }
|
||||
public string? BotToken { get; set; } // optional (for private channels later)
|
||||
public List<string> Channels { get; set; } = new(); // public channel usernames (no @)
|
||||
public int PerChannel { get; set; } = 20;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Reads public Telegram channels via the web preview (https://t.me/s/<channel>) — no bot
|
||||
/// token or login needed for public channels. Each message's text becomes a ScrapedItem.
|
||||
/// token needed for public channels. Enabled + channel list come from the admin settings (DB).
|
||||
/// </summary>
|
||||
public class TelegramListingSource : IListingSource
|
||||
{
|
||||
private readonly TelegramOptions _opts;
|
||||
private readonly IHttpClientFactory _http;
|
||||
private readonly ILogger<TelegramListingSource> _log;
|
||||
|
||||
public TelegramListingSource(IOptions<TelegramOptions> opts, IHttpClientFactory http,
|
||||
ILogger<TelegramListingSource> log)
|
||||
public TelegramListingSource(IHttpClientFactory http, ILogger<TelegramListingSource> log)
|
||||
{
|
||||
_opts = opts.Value;
|
||||
_http = http;
|
||||
_log = log;
|
||||
}
|
||||
|
||||
public string Name => "تلگرام";
|
||||
public bool Enabled => _opts.Enabled && _opts.Channels.Count > 0;
|
||||
|
||||
public async Task<IReadOnlyList<ScrapedItem>> FetchAsync(CancellationToken ct = default)
|
||||
public async Task<IReadOnlyList<ScrapedItem>> FetchAsync(AppSetting s, CancellationToken ct = default)
|
||||
{
|
||||
if (!Enabled) { _log.LogInformation("Telegram source disabled/unconfigured."); return Array.Empty<ScrapedItem>(); }
|
||||
var channels = AppSetting.SplitList(s.TelegramChannels);
|
||||
if (!s.TelegramEnabled || channels.Count == 0) return Array.Empty<ScrapedItem>();
|
||||
|
||||
var client = _http.CreateClient("scrape");
|
||||
var items = new List<ScrapedItem>();
|
||||
foreach (var ch in _opts.Channels.Select(c => c.TrimStart('@')).Where(c => c.Length > 0))
|
||||
foreach (var ch in channels.Select(c => c.TrimStart('@')).Where(c => c.Length > 0))
|
||||
{
|
||||
try
|
||||
{
|
||||
var html = await client.GetStringAsync($"https://t.me/s/{ch}", ct);
|
||||
foreach (var text in ExtractMessages(html).Take(_opts.PerChannel))
|
||||
foreach (var text in ExtractMessages(html).Take(20))
|
||||
items.Add(new ScrapedItem($"تلگرام/{ch}", text, $"https://t.me/{ch}"));
|
||||
}
|
||||
catch (Exception ex) { _log.LogWarning(ex, "Telegram fetch failed for {Channel}", ch); }
|
||||
@@ -52,7 +41,6 @@ public class TelegramListingSource : IListingSource
|
||||
return items;
|
||||
}
|
||||
|
||||
// Message bodies live in <div class="tgme_widget_message_text ...">...</div>.
|
||||
private static IEnumerable<string> ExtractMessages(string html)
|
||||
{
|
||||
foreach (Match m in Regex.Matches(html,
|
||||
@@ -69,7 +57,7 @@ internal static class HtmlUtil
|
||||
public static string ToPlainText(string html)
|
||||
{
|
||||
var s = Regex.Replace(html, "<br\\s*/?>", "\n", RegexOptions.IgnoreCase);
|
||||
s = Regex.Replace(s, "<[^>]+>", ""); // strip remaining tags
|
||||
s = Regex.Replace(s, "<[^>]+>", "");
|
||||
s = WebUtility.HtmlDecode(s);
|
||||
s = Regex.Replace(s, "[ \\t]+", " ");
|
||||
return s.Trim();
|
||||
|
||||
Reference in New Issue
Block a user