Files
hamkadr/src/JobsMedical.Web/Services/Scraping/AiAuditor.cs
T

133 lines
7.2 KiB
C#
Raw Normal View History

using System.Net.Http.Headers;
using System.Text;
using System.Text.Json;
using JobsMedical.Web.Models;
namespace JobsMedical.Web.Services.Scraping;
public record AiStructured(
string? Kind, string? Role, string? City, string? District, string? ShiftType,
string? EmploymentType, long? PayAmount, int? SharePercent, string? Title, string? FacilityName,
string? Phone = null, string? PersonName = null, int? YearsExperience = null, bool? IsLicensed = null);
/// <summary>An AI verdict on a raw listing.</summary>
public record AiAuditResult(string Decision, int Confidence, string? Reason, AiStructured? Data)
{
public bool Approve => Decision.Equals("approve", StringComparison.OrdinalIgnoreCase);
public bool Reject => Decision.Equals("reject", StringComparison.OrdinalIgnoreCase);
}
public interface IAiAuditor
{
/// <summary>Audit a raw post. Returns null when AI is off or the call fails (fail safe → manual).</summary>
Task<AiAuditResult?> AuditAsync(string rawText, AppSetting settings, CancellationToken ct = default);
}
/// <summary>
/// Calls any OpenAI-compatible chat-completions endpoint (self-hosted vLLM/Ollama, or an Iranian
/// provider — OpenAI/Anthropic are blocked from Iran). The admin-set system prompt is the
/// "framework" that tells the model how to approve/reject/structure. We ask for strict JSON and
/// parse it. Any failure returns null so ingestion falls back to the rule-based path.
/// </summary>
public class OpenAiCompatibleAuditor : IAiAuditor
{
// Authoritative output contract appended to the admin prompt so tags/categories stay correct
// (including the «آماده به کار» type and contact phone) regardless of the stored prompt text.
private const string OutputSchema = """
فقط یک شیء JSON با این کلیدها برگردان (هر فیلد نامشخص = null):
decision: approve|reject|review
confidence: عدد ۰ تا ۱۰۰
reason: توضیح کوتاه فارسی
kind: shift (شیفت توسط مرکز) | job (استخدام توسط مرکز) | talent (کادر درمان که خودش «آماده به کار» است)
role: عنوان دقیق نقش درمانی (مثل پرستار، پزشک عمومی، دندانپزشک، تکنسین اتاق عمل، ماما، کارشناس آزمایشگاه)
city, district: نام شهر و محله/منطقه در صورت ذکر
shiftType: day|evening|night|oncall (فقط برای shift)
employmentType: fulltime|parttime|contract|plan
payAmount: عدد تومان یا null ، sharePercent: عدد ۰ تا ۱۰۰ یا null (مثل «۵۰٪ تسویه»)
title: عنوان کوتاه ، facilityName: نام مرکز درمانی (فقط برای shift/job)
phone: شماره تماس (موبایل یا ثابت) بهصورت رقم لاتین، یا null
personName: نام فرد (فقط برای talent) ، yearsExperience: سال سابقه عدد یا null ، isLicensed: true/false (پروانهدار)
""";
private readonly ScrapeHttpClients _clients;
private readonly ILogger<OpenAiCompatibleAuditor> _log;
public OpenAiCompatibleAuditor(ScrapeHttpClients clients, ILogger<OpenAiCompatibleAuditor> log)
{
_clients = clients;
_log = log;
}
public async Task<AiAuditResult?> AuditAsync(string rawText, AppSetting s, CancellationToken ct = default)
{
if (!s.AiEnabled || string.IsNullOrWhiteSpace(s.AiEndpoint)) return null;
try
{
var payload = new
{
model = string.IsNullOrWhiteSpace(s.AiModel) ? "gpt-4o-mini" : s.AiModel,
temperature = 0,
response_format = new { type = "json_object" },
messages = new object[]
{
// Admin prompt + an authoritative output schema, so classification/tags stay
// correct even if the stored prompt predates the talent/phone fields.
new { role = "system", content = s.AiSystemPrompt + "\n\n" + OutputSchema },
new { role = "user", content = "آگهی خام:\n" + rawText + "\n\nفقط با JSON پاسخ بده." },
},
};
var client = _clients.ForAi(s); // proxy-aware when AiUseProxy is on (e.g. OpenAI from Iran)
using var req = new HttpRequestMessage(HttpMethod.Post, s.AiEndpoint)
{
Content = new StringContent(JsonSerializer.Serialize(payload), Encoding.UTF8, "application/json"),
};
if (!string.IsNullOrWhiteSpace(s.AiApiKey))
req.Headers.Authorization = new AuthenticationHeaderValue("Bearer", s.AiApiKey);
using var resp = await client.SendAsync(req, ct);
resp.EnsureSuccessStatusCode();
var body = await resp.Content.ReadAsStringAsync(ct);
using var doc = JsonDocument.Parse(body);
var content = doc.RootElement
.GetProperty("choices")[0].GetProperty("message").GetProperty("content").GetString();
if (string.IsNullOrWhiteSpace(content)) return null;
return ParseVerdict(content);
}
catch (Exception ex)
{
_log.LogWarning(ex, "AI audit failed — falling back to rule-based decision.");
return null;
}
}
private static AiAuditResult? ParseVerdict(string json)
{
// The content itself should be a JSON object; tolerate code fences.
json = json.Trim().Trim('`');
var start = json.IndexOf('{');
var end = json.LastIndexOf('}');
if (start < 0 || end <= start) return null;
json = json.Substring(start, end - start + 1);
using var doc = JsonDocument.Parse(json);
var r = doc.RootElement;
// Guard on ValueKind == Number first — TryGetInt32/64 THROW on null/string values
// (the model often returns payAmount/sharePercent as null), which would fail the whole parse.
string? S(string k) => r.TryGetProperty(k, out var v) && v.ValueKind == JsonValueKind.String ? v.GetString() : null;
int I(string k, int d) => r.TryGetProperty(k, out var v) && v.ValueKind == JsonValueKind.Number && v.TryGetInt32(out var n) ? n : d;
long? L(string k) => r.TryGetProperty(k, out var v) && v.ValueKind == JsonValueKind.Number && v.TryGetInt64(out var n) ? n : null;
int? NI(string k) => r.TryGetProperty(k, out var v) && v.ValueKind == JsonValueKind.Number && v.TryGetInt32(out var n) ? n : null;
bool? B(string k) => r.TryGetProperty(k, out var v) && (v.ValueKind == JsonValueKind.True || v.ValueKind == JsonValueKind.False) ? v.GetBoolean() : null;
var decision = (S("decision") ?? "review").ToLowerInvariant();
var data = new AiStructured(S("kind"), S("role"), S("city"), S("district"), S("shiftType"),
S("employmentType"), L("payAmount"), NI("sharePercent"), S("title"), S("facilityName"),
Phone: S("phone"), PersonName: S("personName"), YearsExperience: NI("yearsExperience"), IsLicensed: B("isLicensed"));
return new AiAuditResult(decision, Math.Clamp(I("confidence", 50), 0, 100), S("reason"), data);
}
}