2026-06-03 17:41:02 +03:30
|
|
|
using System.Text;
|
|
|
|
|
using System.Text.Json;
|
2026-06-04 00:44:11 +03:30
|
|
|
using JobsMedical.Web.Models;
|
2026-06-03 08:18:19 +03:30
|
|
|
|
|
|
|
|
namespace JobsMedical.Web.Services.Scraping;
|
|
|
|
|
|
|
|
|
|
/// <summary>
|
2026-06-03 17:41:02 +03:30
|
|
|
/// Best-effort Divar fetch: queries Divar's web-search JSON for each term and harvests post
|
2026-06-04 00:44:11 +03:30
|
|
|
/// titles + descriptions. Enabled + city + queries come from admin settings (DB). Divar's
|
|
|
|
|
/// private API shifts shape, so we walk JSON tolerantly and fail soft.
|
2026-06-03 08:18:19 +03:30
|
|
|
/// </summary>
|
|
|
|
|
public class DivarListingSource : IListingSource
|
|
|
|
|
{
|
2026-06-04 00:44:11 +03:30
|
|
|
private const string BaseUrl = "https://api.divar.ir/v8/web-search";
|
2026-06-04 17:53:17 +03:30
|
|
|
private readonly ScrapeHttpClients _clients;
|
2026-06-03 08:18:19 +03:30
|
|
|
private readonly ILogger<DivarListingSource> _log;
|
|
|
|
|
|
2026-06-04 17:53:17 +03:30
|
|
|
public DivarListingSource(ScrapeHttpClients clients, ILogger<DivarListingSource> log)
|
2026-06-03 08:18:19 +03:30
|
|
|
{
|
2026-06-04 17:53:17 +03:30
|
|
|
_clients = clients;
|
2026-06-03 08:18:19 +03:30
|
|
|
_log = log;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public string Name => "دیوار";
|
|
|
|
|
|
2026-06-04 00:44:11 +03:30
|
|
|
public async Task<IReadOnlyList<ScrapedItem>> FetchAsync(AppSetting s, CancellationToken ct = default)
|
2026-06-03 08:18:19 +03:30
|
|
|
{
|
2026-06-04 00:44:11 +03:30
|
|
|
var queries = AppSetting.SplitList(s.DivarQueries);
|
|
|
|
|
if (!s.DivarEnabled || queries.Count == 0) return Array.Empty<ScrapedItem>();
|
|
|
|
|
var city = string.IsNullOrWhiteSpace(s.DivarCity) ? "tehran" : s.DivarCity.Trim();
|
2026-06-03 17:41:02 +03:30
|
|
|
|
2026-06-04 18:46:48 +03:30
|
|
|
var client = _clients.For(s, s.DivarUseProxy);
|
2026-06-03 17:41:02 +03:30
|
|
|
var items = new List<ScrapedItem>();
|
2026-06-04 00:44:11 +03:30
|
|
|
foreach (var q in queries)
|
2026-06-03 17:41:02 +03:30
|
|
|
{
|
|
|
|
|
try
|
|
|
|
|
{
|
2026-06-04 00:44:11 +03:30
|
|
|
var url = $"{BaseUrl}/{city}/jobs?q={Uri.EscapeDataString(q)}";
|
2026-06-03 17:41:02 +03:30
|
|
|
var body = await client.GetStringAsync(url, ct);
|
|
|
|
|
using var doc = JsonDocument.Parse(body);
|
2026-06-04 00:44:11 +03:30
|
|
|
foreach (var text in Harvest(doc.RootElement).Take(25))
|
2026-06-03 17:41:02 +03:30
|
|
|
items.Add(new ScrapedItem("دیوار", text, "https://divar.ir"));
|
|
|
|
|
}
|
|
|
|
|
catch (Exception ex) { _log.LogWarning(ex, "Divar fetch failed for query {Query}", q); }
|
|
|
|
|
}
|
|
|
|
|
return items;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
private static readonly string[] DescKeys =
|
|
|
|
|
{ "description", "middle_description_text", "subtitle", "bottom_description_text", "normal_text" };
|
|
|
|
|
|
|
|
|
|
private static IEnumerable<string> Harvest(JsonElement el)
|
|
|
|
|
{
|
|
|
|
|
if (el.ValueKind == JsonValueKind.Object)
|
|
|
|
|
{
|
|
|
|
|
if (el.TryGetProperty("title", out var t) && t.ValueKind == JsonValueKind.String)
|
|
|
|
|
{
|
|
|
|
|
var sb = new StringBuilder(t.GetString());
|
|
|
|
|
foreach (var k in DescKeys)
|
|
|
|
|
if (el.TryGetProperty(k, out var d) && d.ValueKind == JsonValueKind.String)
|
|
|
|
|
{ sb.Append(" — ").Append(d.GetString()); break; }
|
|
|
|
|
var text = sb.ToString().Trim();
|
|
|
|
|
if (text.Length >= 15) yield return text;
|
|
|
|
|
}
|
|
|
|
|
foreach (var p in el.EnumerateObject())
|
2026-06-04 00:44:11 +03:30
|
|
|
foreach (var x in Harvest(p.Value)) yield return x;
|
2026-06-03 17:41:02 +03:30
|
|
|
}
|
|
|
|
|
else if (el.ValueKind == JsonValueKind.Array)
|
2026-06-03 08:18:19 +03:30
|
|
|
{
|
2026-06-03 17:41:02 +03:30
|
|
|
foreach (var item in el.EnumerateArray())
|
2026-06-04 00:44:11 +03:30
|
|
|
foreach (var x in Harvest(item)) yield return x;
|
2026-06-03 08:18:19 +03:30
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|