2026-06-09 18:30:12 +03:30
|
|
|
|
using System.Net;
|
2026-06-03 17:41:02 +03:30
|
|
|
|
using System.Net.Http.Headers;
|
|
|
|
|
|
using System.Text;
|
|
|
|
|
|
using System.Text.Json;
|
|
|
|
|
|
using JobsMedical.Web.Models;
|
|
|
|
|
|
|
|
|
|
|
|
namespace JobsMedical.Web.Services.Scraping;
|
|
|
|
|
|
|
|
|
|
|
|
public record AiStructured(
|
|
|
|
|
|
string? Kind, string? Role, string? City, string? District, string? ShiftType,
|
2026-06-08 08:11:14 +03:30
|
|
|
|
string? EmploymentType, long? PayAmount, int? SharePercent, string? Title, string? FacilityName,
|
2026-06-09 19:04:24 +03:30
|
|
|
|
string? Phone = null, string? PersonName = null, int? YearsExperience = null, bool? IsLicensed = null,
|
|
|
|
|
|
// Dynamic taxonomy: the model may name a role/category outside the seeded set (ingestion
|
|
|
|
|
|
// resolves-or-creates it). Tags carry the post's skills/requirements (ICU, MMT, پروانهدار…).
|
2026-06-20 15:48:42 +03:30
|
|
|
|
string? Category = null, IReadOnlyList<string>? Tags = null,
|
|
|
|
|
|
// Approximate coords the model infers from a named neighborhood — used ONLY as a geocoding
|
|
|
|
|
|
// fallback (validated against Tehran's bbox), when the source ad and the local table have none.
|
|
|
|
|
|
double? Lat = null, double? Lng = null);
|
2026-06-03 17:41:02 +03:30
|
|
|
|
|
|
|
|
|
|
/// <summary>An AI verdict on a raw listing.</summary>
|
|
|
|
|
|
public record AiAuditResult(string Decision, int Confidence, string? Reason, AiStructured? Data)
|
|
|
|
|
|
{
|
|
|
|
|
|
public bool Approve => Decision.Equals("approve", StringComparison.OrdinalIgnoreCase);
|
|
|
|
|
|
public bool Reject => Decision.Equals("reject", StringComparison.OrdinalIgnoreCase);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
public interface IAiAuditor
|
|
|
|
|
|
{
|
|
|
|
|
|
/// <summary>Audit a raw post. Returns null when AI is off or the call fails (fail safe → manual).</summary>
|
|
|
|
|
|
Task<AiAuditResult?> AuditAsync(string rawText, AppSetting settings, CancellationToken ct = default);
|
2026-06-09 18:30:12 +03:30
|
|
|
|
|
|
|
|
|
|
/// <summary>Diagnostic: runs a real call and returns a detailed, human-readable Persian
|
|
|
|
|
|
/// success/error string (HTTP status, response snippet, exception detail) so the admin can
|
|
|
|
|
|
/// see exactly why the AI service won't connect. Never throws.</summary>
|
|
|
|
|
|
Task<string> TestAsync(string rawText, AppSetting settings, CancellationToken ct = default);
|
2026-06-03 17:41:02 +03:30
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/// <summary>
|
|
|
|
|
|
/// Calls any OpenAI-compatible chat-completions endpoint (self-hosted vLLM/Ollama, or an Iranian
|
|
|
|
|
|
/// provider — OpenAI/Anthropic are blocked from Iran). The admin-set system prompt is the
|
|
|
|
|
|
/// "framework" that tells the model how to approve/reject/structure. We ask for strict JSON and
|
|
|
|
|
|
/// parse it. Any failure returns null so ingestion falls back to the rule-based path.
|
|
|
|
|
|
/// </summary>
|
|
|
|
|
|
public class OpenAiCompatibleAuditor : IAiAuditor
|
|
|
|
|
|
{
|
2026-06-08 08:11:14 +03:30
|
|
|
|
// Authoritative output contract appended to the admin prompt so tags/categories stay correct
|
|
|
|
|
|
// (including the «آماده به کار» type and contact phone) regardless of the stored prompt text.
|
|
|
|
|
|
private const string OutputSchema = """
|
|
|
|
|
|
فقط یک شیء JSON با این کلیدها برگردان (هر فیلد نامشخص = null):
|
|
|
|
|
|
decision: approve|reject|review
|
|
|
|
|
|
confidence: عدد ۰ تا ۱۰۰
|
|
|
|
|
|
reason: توضیح کوتاه فارسی
|
|
|
|
|
|
kind: shift (شیفت توسط مرکز) | job (استخدام توسط مرکز) | talent (کادر درمان که خودش «آماده به کار» است)
|
2026-06-20 14:24:20 +03:30
|
|
|
|
role: «حرفهٔ پایه»، نه با توصیفگر. گروه سنی/بخش/سطح را در tags بگذار («پرستار کودک»→role «پرستار»). فقط برای حرفهٔ پایهٔ متفاوت که در فهرست نیست نقش جدید بساز.
|
|
|
|
|
|
category: فقط یکی از این پنج: پزشک | پرستار | ماما | تکنسین | دندانپزشک. اگر نگنجید «سایر». هرگز گروه جدید نساز.
|
|
|
|
|
|
tags: آرایهٔ کلیدواژههای بالینی (مهارت/بخش/گواهی/گروه سنی/سطح) مثل "ICU"،"دیالیز"،"کودک"،"پروانهدار". بدون مبلغ/پرداخت/تماس/شهر یا جملهٔ ناقص. اگر نبود [].
|
2026-06-08 08:11:14 +03:30
|
|
|
|
city, district: نام شهر و محله/منطقه در صورت ذکر
|
2026-06-20 15:48:42 +03:30
|
|
|
|
lat, lng: اگر محله/منطقه را در تهران تشخیص دادی، مختصاتِ تقریبیِ مرکزِ همان محله را بهصورت عدد اعشاری برگردان (lat حدود ۳۵.x، lng حدود ۵۱.x)؛ در غیر این صورت null. حدس نزن.
|
2026-06-08 08:11:14 +03:30
|
|
|
|
shiftType: day|evening|night|oncall (فقط برای shift)
|
|
|
|
|
|
employmentType: fulltime|parttime|contract|plan
|
|
|
|
|
|
payAmount: عدد تومان یا null ، sharePercent: عدد ۰ تا ۱۰۰ یا null (مثل «۵۰٪ تسویه»)
|
|
|
|
|
|
title: عنوان کوتاه ، facilityName: نام مرکز درمانی (فقط برای shift/job)
|
|
|
|
|
|
phone: شماره تماس (موبایل یا ثابت) بهصورت رقم لاتین، یا null
|
|
|
|
|
|
personName: نام فرد (فقط برای talent) ، yearsExperience: سال سابقه عدد یا null ، isLicensed: true/false (پروانهدار)
|
|
|
|
|
|
""";
|
|
|
|
|
|
|
2026-06-07 22:55:07 +03:30
|
|
|
|
private readonly ScrapeHttpClients _clients;
|
2026-06-03 17:41:02 +03:30
|
|
|
|
private readonly ILogger<OpenAiCompatibleAuditor> _log;
|
|
|
|
|
|
|
2026-06-07 22:55:07 +03:30
|
|
|
|
public OpenAiCompatibleAuditor(ScrapeHttpClients clients, ILogger<OpenAiCompatibleAuditor> log)
|
2026-06-03 17:41:02 +03:30
|
|
|
|
{
|
2026-06-07 22:55:07 +03:30
|
|
|
|
_clients = clients;
|
2026-06-03 17:41:02 +03:30
|
|
|
|
_log = log;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
public async Task<AiAuditResult?> AuditAsync(string rawText, AppSetting s, CancellationToken ct = default)
|
|
|
|
|
|
{
|
|
|
|
|
|
if (!s.AiEnabled || string.IsNullOrWhiteSpace(s.AiEndpoint)) return null;
|
|
|
|
|
|
|
|
|
|
|
|
try
|
|
|
|
|
|
{
|
2026-06-09 18:30:12 +03:30
|
|
|
|
var (status, body) = await SendAsync(rawText, s, ct);
|
|
|
|
|
|
if (!IsSuccess(status))
|
2026-06-03 17:41:02 +03:30
|
|
|
|
{
|
2026-06-09 18:30:12 +03:30
|
|
|
|
// Log the actual status + response body — the provider usually explains the failure
|
|
|
|
|
|
// here (bad key, unknown model, quota), so don't throw it away with EnsureSuccessStatusCode.
|
|
|
|
|
|
_log.LogWarning("AI endpoint {Endpoint} returned HTTP {Status}: {Body}",
|
|
|
|
|
|
s.AiEndpoint, (int)status, Truncate(body, 600));
|
|
|
|
|
|
return null;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
var content = ExtractContent(body);
|
|
|
|
|
|
if (string.IsNullOrWhiteSpace(content))
|
2026-06-03 17:41:02 +03:30
|
|
|
|
{
|
2026-06-09 18:30:12 +03:30
|
|
|
|
_log.LogWarning("AI endpoint {Endpoint} returned no message content (response shape not OpenAI-compatible?). Body: {Body}",
|
|
|
|
|
|
s.AiEndpoint, Truncate(body, 600));
|
|
|
|
|
|
return null;
|
|
|
|
|
|
}
|
2026-06-03 17:41:02 +03:30
|
|
|
|
|
|
|
|
|
|
return ParseVerdict(content);
|
|
|
|
|
|
}
|
2026-06-09 18:30:12 +03:30
|
|
|
|
catch (OperationCanceledException) when (!ct.IsCancellationRequested)
|
|
|
|
|
|
{
|
|
|
|
|
|
_log.LogWarning("AI call to {Endpoint} timed out (proxy={Proxy}).", s.AiEndpoint, s.AiUseProxy);
|
|
|
|
|
|
return null;
|
|
|
|
|
|
}
|
2026-06-03 17:41:02 +03:30
|
|
|
|
catch (Exception ex)
|
|
|
|
|
|
{
|
2026-06-09 18:30:12 +03:30
|
|
|
|
_log.LogWarning(ex, "AI audit failed for endpoint {Endpoint} (proxy={Proxy}) — falling back to rule-based decision.",
|
|
|
|
|
|
s.AiEndpoint, s.AiUseProxy);
|
2026-06-03 17:41:02 +03:30
|
|
|
|
return null;
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2026-06-09 18:30:12 +03:30
|
|
|
|
public async Task<string> TestAsync(string rawText, AppSetting s, CancellationToken ct = default)
|
|
|
|
|
|
{
|
|
|
|
|
|
if (!s.AiEnabled || string.IsNullOrWhiteSpace(s.AiEndpoint))
|
|
|
|
|
|
return "هوش مصنوعی غیرفعال است یا آدرس سرویس خالی است. ابتدا آن را فعال و ذخیره کن.";
|
|
|
|
|
|
|
|
|
|
|
|
try
|
|
|
|
|
|
{
|
|
|
|
|
|
var (status, body) = await SendAsync(rawText, s, ct);
|
|
|
|
|
|
if (!IsSuccess(status))
|
|
|
|
|
|
return $"❌ سرویس کد HTTP {(int)status} ({status}) برگرداند.\nآدرس: {s.AiEndpoint}\nپروکسی: {(s.AiUseProxy ? "روشن" : "خاموش")}\nپاسخ سرویس:\n{Truncate(body, 800)}";
|
|
|
|
|
|
|
|
|
|
|
|
var content = ExtractContent(body);
|
|
|
|
|
|
if (string.IsNullOrWhiteSpace(content))
|
|
|
|
|
|
return $"❌ پاسخ دریافت شد ولی محتوای پیام خالی بود — ساختار پاسخ با OpenAI سازگار نیست؟\nپاسخ خام:\n{Truncate(body, 800)}";
|
|
|
|
|
|
|
|
|
|
|
|
var v = ParseVerdict(content);
|
|
|
|
|
|
return v is null
|
|
|
|
|
|
? $"⚠️ مدل پاسخ داد ولی JSON قابلخواندن نبود. (response_format=json_object را پشتیبانی نمیکند؟)\nمحتوا:\n{Truncate(content, 800)}"
|
|
|
|
|
|
: $"✅ اتصال موفق — تصمیم: {v.Decision} | اطمینان: {v.Confidence}٪ | نقش: {v.Data?.Role} | شهر: {v.Data?.City} | شیفت: {v.Data?.ShiftType}";
|
|
|
|
|
|
}
|
|
|
|
|
|
catch (OperationCanceledException) when (!ct.IsCancellationRequested)
|
|
|
|
|
|
{
|
|
|
|
|
|
return "❌ مهلت پاسخگویی تمام شد (timeout ۱۰۰ ثانیه). اگر تیک «از طریق پروکسی» روشن است، صحت آدرس پروکسی را بررسی کن.";
|
|
|
|
|
|
}
|
|
|
|
|
|
catch (HttpRequestException ex)
|
|
|
|
|
|
{
|
|
|
|
|
|
// DNS failure, connection refused, TLS error, proxy unreachable — the common Iran cases.
|
|
|
|
|
|
var inner = ex.InnerException is { } i ? $" — {i.Message}" : "";
|
|
|
|
|
|
return $"❌ خطای شبکه/پروکسی: {ex.Message}{inner}\nآدرس: {s.AiEndpoint}\nپروکسی: {(s.AiUseProxy ? "روشن" : "خاموش")}";
|
|
|
|
|
|
}
|
|
|
|
|
|
catch (Exception ex)
|
|
|
|
|
|
{
|
|
|
|
|
|
return $"❌ خطا: {ex.GetType().Name}: {ex.Message}";
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/// <summary>POSTs the chat-completions request and returns the raw status + body. Shared by
|
|
|
|
|
|
/// AuditAsync (fail-safe) and TestAsync (diagnostic) so both exercise the identical call path.</summary>
|
|
|
|
|
|
private async Task<(HttpStatusCode status, string body)> SendAsync(string rawText, AppSetting s, CancellationToken ct)
|
|
|
|
|
|
{
|
|
|
|
|
|
var payload = new
|
|
|
|
|
|
{
|
|
|
|
|
|
model = string.IsNullOrWhiteSpace(s.AiModel) ? "gpt-4o-mini" : s.AiModel,
|
|
|
|
|
|
temperature = 0,
|
|
|
|
|
|
response_format = new { type = "json_object" },
|
|
|
|
|
|
messages = new object[]
|
|
|
|
|
|
{
|
2026-06-09 19:04:24 +03:30
|
|
|
|
// Hardcoded, code-owned prompt (NOT the stored AiSystemPrompt) + the authoritative
|
|
|
|
|
|
// output schema, so classification/tags can never be broken by an admin edit.
|
|
|
|
|
|
new { role = "system", content = AppSetting.DefaultPrompt + "\n\n" + OutputSchema },
|
2026-06-09 18:30:12 +03:30
|
|
|
|
new { role = "user", content = "آگهی خام:\n" + rawText + "\n\nفقط با JSON پاسخ بده." },
|
|
|
|
|
|
},
|
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
var client = _clients.ForAi(s); // proxy-aware when AiUseProxy is on (e.g. OpenAI from Iran)
|
|
|
|
|
|
using var req = new HttpRequestMessage(HttpMethod.Post, s.AiEndpoint)
|
|
|
|
|
|
{
|
|
|
|
|
|
Content = new StringContent(JsonSerializer.Serialize(payload), Encoding.UTF8, "application/json"),
|
|
|
|
|
|
};
|
|
|
|
|
|
if (!string.IsNullOrWhiteSpace(s.AiApiKey))
|
|
|
|
|
|
req.Headers.Authorization = new AuthenticationHeaderValue("Bearer", s.AiApiKey);
|
|
|
|
|
|
|
|
|
|
|
|
using var resp = await client.SendAsync(req, ct);
|
|
|
|
|
|
var body = await resp.Content.ReadAsStringAsync(ct);
|
|
|
|
|
|
return (resp.StatusCode, body);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
private static bool IsSuccess(HttpStatusCode s) => (int)s is >= 200 and < 300;
|
|
|
|
|
|
|
|
|
|
|
|
/// <summary>Pulls choices[0].message.content out of an OpenAI-style response. Returns null on any
|
|
|
|
|
|
/// unexpected shape (e.g. an error object) rather than throwing, so the caller can show the body.</summary>
|
|
|
|
|
|
private static string? ExtractContent(string body)
|
|
|
|
|
|
{
|
|
|
|
|
|
try
|
|
|
|
|
|
{
|
|
|
|
|
|
using var doc = JsonDocument.Parse(body);
|
|
|
|
|
|
if (doc.RootElement.TryGetProperty("choices", out var choices)
|
|
|
|
|
|
&& choices.ValueKind == JsonValueKind.Array && choices.GetArrayLength() > 0
|
|
|
|
|
|
&& choices[0].TryGetProperty("message", out var msg)
|
|
|
|
|
|
&& msg.TryGetProperty("content", out var content))
|
|
|
|
|
|
return content.GetString();
|
|
|
|
|
|
}
|
|
|
|
|
|
catch (JsonException) { }
|
|
|
|
|
|
return null;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
private static string Truncate(string? s, int max)
|
|
|
|
|
|
=> string.IsNullOrEmpty(s) ? "(خالی)" : (s.Length <= max ? s : s[..max] + " …");
|
|
|
|
|
|
|
2026-06-03 17:41:02 +03:30
|
|
|
|
private static AiAuditResult? ParseVerdict(string json)
|
|
|
|
|
|
{
|
|
|
|
|
|
// The content itself should be a JSON object; tolerate code fences.
|
|
|
|
|
|
json = json.Trim().Trim('`');
|
|
|
|
|
|
var start = json.IndexOf('{');
|
|
|
|
|
|
var end = json.LastIndexOf('}');
|
|
|
|
|
|
if (start < 0 || end <= start) return null;
|
|
|
|
|
|
json = json.Substring(start, end - start + 1);
|
|
|
|
|
|
|
2026-06-09 18:30:12 +03:30
|
|
|
|
JsonDocument doc;
|
|
|
|
|
|
try { doc = JsonDocument.Parse(json); }
|
|
|
|
|
|
catch (JsonException) { return null; } // model returned non-JSON content
|
|
|
|
|
|
using (doc)
|
|
|
|
|
|
{
|
2026-06-03 17:41:02 +03:30
|
|
|
|
var r = doc.RootElement;
|
2026-06-07 23:23:02 +03:30
|
|
|
|
// Guard on ValueKind == Number first — TryGetInt32/64 THROW on null/string values
|
|
|
|
|
|
// (the model often returns payAmount/sharePercent as null), which would fail the whole parse.
|
2026-06-03 17:41:02 +03:30
|
|
|
|
string? S(string k) => r.TryGetProperty(k, out var v) && v.ValueKind == JsonValueKind.String ? v.GetString() : null;
|
2026-06-07 23:23:02 +03:30
|
|
|
|
int I(string k, int d) => r.TryGetProperty(k, out var v) && v.ValueKind == JsonValueKind.Number && v.TryGetInt32(out var n) ? n : d;
|
|
|
|
|
|
long? L(string k) => r.TryGetProperty(k, out var v) && v.ValueKind == JsonValueKind.Number && v.TryGetInt64(out var n) ? n : null;
|
2026-06-20 15:48:42 +03:30
|
|
|
|
double? D(string k) => r.TryGetProperty(k, out var v) && v.ValueKind == JsonValueKind.Number && v.TryGetDouble(out var n) ? n : null;
|
2026-06-07 23:23:02 +03:30
|
|
|
|
int? NI(string k) => r.TryGetProperty(k, out var v) && v.ValueKind == JsonValueKind.Number && v.TryGetInt32(out var n) ? n : null;
|
2026-06-08 08:11:14 +03:30
|
|
|
|
bool? B(string k) => r.TryGetProperty(k, out var v) && (v.ValueKind == JsonValueKind.True || v.ValueKind == JsonValueKind.False) ? v.GetBoolean() : null;
|
2026-06-09 19:04:24 +03:30
|
|
|
|
// Array-of-strings reader (tolerates the model returning a single string instead of an array).
|
|
|
|
|
|
IReadOnlyList<string>? SA(string k)
|
|
|
|
|
|
{
|
|
|
|
|
|
if (!r.TryGetProperty(k, out var v)) return null;
|
|
|
|
|
|
var list = new List<string>();
|
|
|
|
|
|
if (v.ValueKind == JsonValueKind.Array)
|
|
|
|
|
|
foreach (var el in v.EnumerateArray())
|
|
|
|
|
|
if (el.ValueKind == JsonValueKind.String && el.GetString() is { Length: > 0 } s) list.Add(s);
|
|
|
|
|
|
else if (v.ValueKind == JsonValueKind.String && v.GetString() is { Length: > 0 } one) list.Add(one);
|
|
|
|
|
|
return list.Count > 0 ? list : null;
|
|
|
|
|
|
}
|
2026-06-03 17:41:02 +03:30
|
|
|
|
|
|
|
|
|
|
var decision = (S("decision") ?? "review").ToLowerInvariant();
|
|
|
|
|
|
var data = new AiStructured(S("kind"), S("role"), S("city"), S("district"), S("shiftType"),
|
2026-06-08 08:11:14 +03:30
|
|
|
|
S("employmentType"), L("payAmount"), NI("sharePercent"), S("title"), S("facilityName"),
|
2026-06-09 19:04:24 +03:30
|
|
|
|
Phone: S("phone"), PersonName: S("personName"), YearsExperience: NI("yearsExperience"), IsLicensed: B("isLicensed"),
|
2026-06-20 15:48:42 +03:30
|
|
|
|
Category: S("category"), Tags: SA("tags"), Lat: D("lat"), Lng: D("lng"));
|
2026-06-03 17:41:02 +03:30
|
|
|
|
return new AiAuditResult(decision, Math.Clamp(I("confidence", 50), 0, 100), S("reason"), data);
|
2026-06-09 18:30:12 +03:30
|
|
|
|
}
|
2026-06-03 17:41:02 +03:30
|
|
|
|
}
|
|
|
|
|
|
}
|