Files
hamkadr/src/JobsMedical.Web/Services/Scraping/BaleListingSource.cs
T
soroush.asadi cea27c8684
CI/CD / CI · dotnet build (push) Successful in 53s
CI/CD / Deploy · hamkadr (push) Successful in 1m12s
[Ingest] Route scraping through an optional V2Ray/Xray proxy (Telegram in Iran)
Telegram and some sources are filtered in Iran. .NET cannot speak vmess/vless/trojan, so add an Xray sidecar (compose service 'xray', behind the 'proxy' profile) that converts the admin's config into a local SOCKS5 proxy (xray:10808). New ScrapeHttpClients provider builds a proxied or direct HttpClient (WebProxy supports socks5/socks4/http) cached per proxy URL; all five ingestion sources (Telegram/Bale/Divar/Medjobs/Websites) now use it. Admin settings gain IngestProxyEnabled + IngestProxyUrl (migration; UI under sources). Added deploy/xray/config.json template + README with vmess/vless/trojan examples.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-04 17:53:17 +03:30

53 lines
2.1 KiB
C#

using System.Text.Json;
using JobsMedical.Web.Models;
namespace JobsMedical.Web.Services.Scraping;
/// <summary>
/// Bale (Iranian messenger) source via its Telegram-compatible Bot API getUpdates. Enabled +
/// bot token come from admin settings (DB). The bot must be a member of the channels it reads.
/// </summary>
public class BaleListingSource : IListingSource
{
private const string BaseUrl = "https://tapi.bale.ai";
private readonly ScrapeHttpClients _clients;
private readonly ILogger<BaleListingSource> _log;
public BaleListingSource(ScrapeHttpClients clients, ILogger<BaleListingSource> log)
{
_clients = clients;
_log = log;
}
public string Name => "بله";
public async Task<IReadOnlyList<ScrapedItem>> FetchAsync(AppSetting s, CancellationToken ct = default)
{
if (!s.BaleEnabled || string.IsNullOrWhiteSpace(s.BaleBotToken)) return Array.Empty<ScrapedItem>();
try
{
var client = _clients.For(s);
var body = await client.GetStringAsync($"{BaseUrl}/bot{s.BaleBotToken}/getUpdates", ct);
using var doc = JsonDocument.Parse(body);
if (!doc.RootElement.TryGetProperty("result", out var result) || result.ValueKind != JsonValueKind.Array)
return Array.Empty<ScrapedItem>();
var items = new List<ScrapedItem>();
foreach (var update in result.EnumerateArray())
{
var text = TextOf(update, "channel_post") ?? TextOf(update, "message");
if (!string.IsNullOrWhiteSpace(text) && text!.Trim().Length >= 15)
items.Add(new ScrapedItem("بله", text.Trim()));
}
return items;
}
catch (Exception ex) { _log.LogWarning(ex, "Bale fetch failed."); return Array.Empty<ScrapedItem>(); }
}
private static string? TextOf(JsonElement update, string key)
=> update.TryGetProperty(key, out var m)
&& m.TryGetProperty("text", out var t) && t.ValueKind == JsonValueKind.String
? t.GetString() : null;
}