Files
hamkadr/src/JobsMedical.Web/Services/Scraping/AiAuditor.cs
T

243 lines
14 KiB
C#
Raw Normal View History

using System.Net;
using System.Net.Http.Headers;
using System.Text;
using System.Text.Json;
using JobsMedical.Web.Models;
namespace JobsMedical.Web.Services.Scraping;
public record AiStructured(
string? Kind, string? Role, string? City, string? District, string? ShiftType,
string? EmploymentType, long? PayAmount, int? SharePercent, string? Title, string? FacilityName,
string? Phone = null, string? PersonName = null, int? YearsExperience = null, bool? IsLicensed = null,
// Dynamic taxonomy: the model may name a role/category outside the seeded set (ingestion
// resolves-or-creates it). Tags carry the post's skills/requirements (ICU, MMT, پروانه‌دار…).
string? Category = null, IReadOnlyList<string>? Tags = null);
/// <summary>An AI verdict on a raw listing.</summary>
public record AiAuditResult(string Decision, int Confidence, string? Reason, AiStructured? Data)
{
public bool Approve => Decision.Equals("approve", StringComparison.OrdinalIgnoreCase);
public bool Reject => Decision.Equals("reject", StringComparison.OrdinalIgnoreCase);
}
public interface IAiAuditor
{
/// <summary>Audit a raw post. Returns null when AI is off or the call fails (fail safe → manual).</summary>
Task<AiAuditResult?> AuditAsync(string rawText, AppSetting settings, CancellationToken ct = default);
/// <summary>Diagnostic: runs a real call and returns a detailed, human-readable Persian
/// success/error string (HTTP status, response snippet, exception detail) so the admin can
/// see exactly why the AI service won't connect. Never throws.</summary>
Task<string> TestAsync(string rawText, AppSetting settings, CancellationToken ct = default);
}
/// <summary>
/// Calls any OpenAI-compatible chat-completions endpoint (self-hosted vLLM/Ollama, or an Iranian
/// provider — OpenAI/Anthropic are blocked from Iran). The admin-set system prompt is the
/// "framework" that tells the model how to approve/reject/structure. We ask for strict JSON and
/// parse it. Any failure returns null so ingestion falls back to the rule-based path.
/// </summary>
public class OpenAiCompatibleAuditor : IAiAuditor
{
// Authoritative output contract appended to the admin prompt so tags/categories stay correct
// (including the «آماده به کار» type and contact phone) regardless of the stored prompt text.
private const string OutputSchema = """
فقط یک شیء JSON با این کلیدها برگردان (هر فیلد نامشخص = null):
decision: approve|reject|review
confidence: عدد ۰ تا ۱۰۰
reason: توضیح کوتاه فارسی
kind: shift (شیفت توسط مرکز) | job (استخدام توسط مرکز) | talent (کادر درمان که خودش «آماده به کار» است)
role: «حرفهٔ پایه»، نه با توصیفگر. گروه سنی/بخش/سطح را در tags بگذار («پرستار کودک»role «پرستار»). فقط برای حرفهٔ پایهٔ متفاوت که در فهرست نیست نقش جدید بساز.
category: فقط یکی از این پنج: پزشک | پرستار | ماما | تکنسین | دندانپزشک. اگر نگنجید «سایر». هرگز گروه جدید نساز.
tags: آرایهٔ کلیدواژههای بالینی (مهارت/بخش/گواهی/گروه سنی/سطح) مثل "ICU"،"دیالیز"،"کودک"،"پروانه‌دار". بدون مبلغ/پرداخت/تماس/شهر یا جملهٔ ناقص. اگر نبود [].
city, district: نام شهر و محله/منطقه در صورت ذکر
shiftType: day|evening|night|oncall (فقط برای shift)
employmentType: fulltime|parttime|contract|plan
payAmount: عدد تومان یا null ، sharePercent: عدد ۰ تا ۱۰۰ یا null (مثل «۵۰٪ تسویه»)
title: عنوان کوتاه ، facilityName: نام مرکز درمانی (فقط برای shift/job)
phone: شماره تماس (موبایل یا ثابت) بهصورت رقم لاتین، یا null
personName: نام فرد (فقط برای talent) ، yearsExperience: سال سابقه عدد یا null ، isLicensed: true/false (پروانهدار)
""";
private readonly ScrapeHttpClients _clients;
private readonly ILogger<OpenAiCompatibleAuditor> _log;
public OpenAiCompatibleAuditor(ScrapeHttpClients clients, ILogger<OpenAiCompatibleAuditor> log)
{
_clients = clients;
_log = log;
}
public async Task<AiAuditResult?> AuditAsync(string rawText, AppSetting s, CancellationToken ct = default)
{
if (!s.AiEnabled || string.IsNullOrWhiteSpace(s.AiEndpoint)) return null;
try
{
var (status, body) = await SendAsync(rawText, s, ct);
if (!IsSuccess(status))
{
// Log the actual status + response body — the provider usually explains the failure
// here (bad key, unknown model, quota), so don't throw it away with EnsureSuccessStatusCode.
_log.LogWarning("AI endpoint {Endpoint} returned HTTP {Status}: {Body}",
s.AiEndpoint, (int)status, Truncate(body, 600));
return null;
}
var content = ExtractContent(body);
if (string.IsNullOrWhiteSpace(content))
{
_log.LogWarning("AI endpoint {Endpoint} returned no message content (response shape not OpenAI-compatible?). Body: {Body}",
s.AiEndpoint, Truncate(body, 600));
return null;
}
return ParseVerdict(content);
}
catch (OperationCanceledException) when (!ct.IsCancellationRequested)
{
_log.LogWarning("AI call to {Endpoint} timed out (proxy={Proxy}).", s.AiEndpoint, s.AiUseProxy);
return null;
}
catch (Exception ex)
{
_log.LogWarning(ex, "AI audit failed for endpoint {Endpoint} (proxy={Proxy}) — falling back to rule-based decision.",
s.AiEndpoint, s.AiUseProxy);
return null;
}
}
public async Task<string> TestAsync(string rawText, AppSetting s, CancellationToken ct = default)
{
if (!s.AiEnabled || string.IsNullOrWhiteSpace(s.AiEndpoint))
return "هوش مصنوعی غیرفعال است یا آدرس سرویس خالی است. ابتدا آن را فعال و ذخیره کن.";
try
{
var (status, body) = await SendAsync(rawText, s, ct);
if (!IsSuccess(status))
return $"❌ سرویس کد HTTP {(int)status} ({status}) برگرداند.\nآدرس: {s.AiEndpoint}\nپروکسی: {(s.AiUseProxy ? "روشن" : "خاموش")}\nپاسخ سرویس:\n{Truncate(body, 800)}";
var content = ExtractContent(body);
if (string.IsNullOrWhiteSpace(content))
return $"❌ پاسخ دریافت شد ولی محتوای پیام خالی بود — ساختار پاسخ با OpenAI سازگار نیست؟\nپاسخ خام:\n{Truncate(body, 800)}";
var v = ParseVerdict(content);
return v is null
? $"⚠️ مدل پاسخ داد ولی JSON قابل‌خواندن نبود. (response_format=json_object را پشتیبانی نمی‌کند؟)\nمحتوا:\n{Truncate(content, 800)}"
: $"✅ اتصال موفق — تصمیم: {v.Decision} | اطمینان: {v.Confidence}٪ | نقش: {v.Data?.Role} | شهر: {v.Data?.City} | شیفت: {v.Data?.ShiftType}";
}
catch (OperationCanceledException) when (!ct.IsCancellationRequested)
{
return "❌ مهلت پاسخ‌گویی تمام شد (timeout ۱۰۰ ثانیه). اگر تیک «از طریق پروکسی» روشن است، صحت آدرس پروکسی را بررسی کن.";
}
catch (HttpRequestException ex)
{
// DNS failure, connection refused, TLS error, proxy unreachable — the common Iran cases.
var inner = ex.InnerException is { } i ? $" — {i.Message}" : "";
return $"❌ خطای شبکه/پروکسی: {ex.Message}{inner}\nآدرس: {s.AiEndpoint}\nپروکسی: {(s.AiUseProxy ? "روشن" : "خاموش")}";
}
catch (Exception ex)
{
return $"❌ خطا: {ex.GetType().Name}: {ex.Message}";
}
}
/// <summary>POSTs the chat-completions request and returns the raw status + body. Shared by
/// AuditAsync (fail-safe) and TestAsync (diagnostic) so both exercise the identical call path.</summary>
private async Task<(HttpStatusCode status, string body)> SendAsync(string rawText, AppSetting s, CancellationToken ct)
{
var payload = new
{
model = string.IsNullOrWhiteSpace(s.AiModel) ? "gpt-4o-mini" : s.AiModel,
temperature = 0,
response_format = new { type = "json_object" },
messages = new object[]
{
// Hardcoded, code-owned prompt (NOT the stored AiSystemPrompt) + the authoritative
// output schema, so classification/tags can never be broken by an admin edit.
new { role = "system", content = AppSetting.DefaultPrompt + "\n\n" + OutputSchema },
new { role = "user", content = "آگهی خام:\n" + rawText + "\n\nفقط با JSON پاسخ بده." },
},
};
var client = _clients.ForAi(s); // proxy-aware when AiUseProxy is on (e.g. OpenAI from Iran)
using var req = new HttpRequestMessage(HttpMethod.Post, s.AiEndpoint)
{
Content = new StringContent(JsonSerializer.Serialize(payload), Encoding.UTF8, "application/json"),
};
if (!string.IsNullOrWhiteSpace(s.AiApiKey))
req.Headers.Authorization = new AuthenticationHeaderValue("Bearer", s.AiApiKey);
using var resp = await client.SendAsync(req, ct);
var body = await resp.Content.ReadAsStringAsync(ct);
return (resp.StatusCode, body);
}
private static bool IsSuccess(HttpStatusCode s) => (int)s is >= 200 and < 300;
/// <summary>Pulls choices[0].message.content out of an OpenAI-style response. Returns null on any
/// unexpected shape (e.g. an error object) rather than throwing, so the caller can show the body.</summary>
private static string? ExtractContent(string body)
{
try
{
using var doc = JsonDocument.Parse(body);
if (doc.RootElement.TryGetProperty("choices", out var choices)
&& choices.ValueKind == JsonValueKind.Array && choices.GetArrayLength() > 0
&& choices[0].TryGetProperty("message", out var msg)
&& msg.TryGetProperty("content", out var content))
return content.GetString();
}
catch (JsonException) { }
return null;
}
private static string Truncate(string? s, int max)
=> string.IsNullOrEmpty(s) ? "(خالی)" : (s.Length <= max ? s : s[..max] + " …");
private static AiAuditResult? ParseVerdict(string json)
{
// The content itself should be a JSON object; tolerate code fences.
json = json.Trim().Trim('`');
var start = json.IndexOf('{');
var end = json.LastIndexOf('}');
if (start < 0 || end <= start) return null;
json = json.Substring(start, end - start + 1);
JsonDocument doc;
try { doc = JsonDocument.Parse(json); }
catch (JsonException) { return null; } // model returned non-JSON content
using (doc)
{
var r = doc.RootElement;
// Guard on ValueKind == Number first — TryGetInt32/64 THROW on null/string values
// (the model often returns payAmount/sharePercent as null), which would fail the whole parse.
string? S(string k) => r.TryGetProperty(k, out var v) && v.ValueKind == JsonValueKind.String ? v.GetString() : null;
int I(string k, int d) => r.TryGetProperty(k, out var v) && v.ValueKind == JsonValueKind.Number && v.TryGetInt32(out var n) ? n : d;
long? L(string k) => r.TryGetProperty(k, out var v) && v.ValueKind == JsonValueKind.Number && v.TryGetInt64(out var n) ? n : null;
int? NI(string k) => r.TryGetProperty(k, out var v) && v.ValueKind == JsonValueKind.Number && v.TryGetInt32(out var n) ? n : null;
bool? B(string k) => r.TryGetProperty(k, out var v) && (v.ValueKind == JsonValueKind.True || v.ValueKind == JsonValueKind.False) ? v.GetBoolean() : null;
// Array-of-strings reader (tolerates the model returning a single string instead of an array).
IReadOnlyList<string>? SA(string k)
{
if (!r.TryGetProperty(k, out var v)) return null;
var list = new List<string>();
if (v.ValueKind == JsonValueKind.Array)
foreach (var el in v.EnumerateArray())
if (el.ValueKind == JsonValueKind.String && el.GetString() is { Length: > 0 } s) list.Add(s);
else if (v.ValueKind == JsonValueKind.String && v.GetString() is { Length: > 0 } one) list.Add(one);
return list.Count > 0 ? list : null;
}
var decision = (S("decision") ?? "review").ToLowerInvariant();
var data = new AiStructured(S("kind"), S("role"), S("city"), S("district"), S("shiftType"),
S("employmentType"), L("payAmount"), NI("sharePercent"), S("title"), S("facilityName"),
Phone: S("phone"), PersonName: S("personName"), YearsExperience: NI("yearsExperience"), IsLicensed: B("isLicensed"),
Category: S("category"), Tags: SA("tags"));
return new AiAuditResult(decision, Math.Clamp(I("confidence", 50), 0, 100), S("reason"), data);
}
}
}