AI tag/category assignment + phone extraction from web ads
AI (when enabled, now that the server proxy is up): - AiStructured gains phone, personName, yearsExperience, isLicensed. - The auditor appends an authoritative output-schema to the admin prompt so classification stays correct even with an older stored prompt — it now classifies kind as shift|job|talent and extracts the contact phone and talent details. - Ingestion publish prefers the AI's tags (kind/role/city/facility/phone + talent fields) over the heuristic parser when present. - Default prompt updated to describe the three kinds + new fields. Phone extraction from websites (Medjobs / generic sites), where the number sits behind a "تماس با این آگهی" reveal: - HtmlUtil.HarvestPhones scans the full markup for tel: links, JSON-LD "telephone", data-*phone* attributes, and inline Iranian mobile/landline numbers (Persian digits folded), normalized (mobiles 09…, landlines 0…). - Medjobs + Website sources append harvested numbers to the ad text so the parser/AI capture them; manual review then prefills the phone too. - Parser phone extraction now also captures a landline as a fallback. Note: if a site loads the number purely via XHR (not in HTML), a per-source reveal endpoint would be a follow-up. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
@@ -7,7 +7,8 @@ namespace JobsMedical.Web.Services.Scraping;
|
||||
|
||||
public record AiStructured(
|
||||
string? Kind, string? Role, string? City, string? District, string? ShiftType,
|
||||
string? EmploymentType, long? PayAmount, int? SharePercent, string? Title, string? FacilityName);
|
||||
string? EmploymentType, long? PayAmount, int? SharePercent, string? Title, string? FacilityName,
|
||||
string? Phone = null, string? PersonName = null, int? YearsExperience = null, bool? IsLicensed = null);
|
||||
|
||||
/// <summary>An AI verdict on a raw listing.</summary>
|
||||
public record AiAuditResult(string Decision, int Confidence, string? Reason, AiStructured? Data)
|
||||
@@ -30,6 +31,24 @@ public interface IAiAuditor
|
||||
/// </summary>
|
||||
public class OpenAiCompatibleAuditor : IAiAuditor
|
||||
{
|
||||
// Authoritative output contract appended to the admin prompt so tags/categories stay correct
|
||||
// (including the «آماده به کار» type and contact phone) regardless of the stored prompt text.
|
||||
private const string OutputSchema = """
|
||||
فقط یک شیء JSON با این کلیدها برگردان (هر فیلد نامشخص = null):
|
||||
decision: approve|reject|review
|
||||
confidence: عدد ۰ تا ۱۰۰
|
||||
reason: توضیح کوتاه فارسی
|
||||
kind: shift (شیفت توسط مرکز) | job (استخدام توسط مرکز) | talent (کادر درمان که خودش «آماده به کار» است)
|
||||
role: عنوان دقیق نقش درمانی (مثل پرستار، پزشک عمومی، دندانپزشک، تکنسین اتاق عمل، ماما، کارشناس آزمایشگاه)
|
||||
city, district: نام شهر و محله/منطقه در صورت ذکر
|
||||
shiftType: day|evening|night|oncall (فقط برای shift)
|
||||
employmentType: fulltime|parttime|contract|plan
|
||||
payAmount: عدد تومان یا null ، sharePercent: عدد ۰ تا ۱۰۰ یا null (مثل «۵۰٪ تسویه»)
|
||||
title: عنوان کوتاه ، facilityName: نام مرکز درمانی (فقط برای shift/job)
|
||||
phone: شماره تماس (موبایل یا ثابت) بهصورت رقم لاتین، یا null
|
||||
personName: نام فرد (فقط برای talent) ، yearsExperience: سال سابقه عدد یا null ، isLicensed: true/false (پروانهدار)
|
||||
""";
|
||||
|
||||
private readonly ScrapeHttpClients _clients;
|
||||
private readonly ILogger<OpenAiCompatibleAuditor> _log;
|
||||
|
||||
@@ -52,7 +71,9 @@ public class OpenAiCompatibleAuditor : IAiAuditor
|
||||
response_format = new { type = "json_object" },
|
||||
messages = new object[]
|
||||
{
|
||||
new { role = "system", content = s.AiSystemPrompt },
|
||||
// Admin prompt + an authoritative output schema, so classification/tags stay
|
||||
// correct even if the stored prompt predates the talent/phone fields.
|
||||
new { role = "system", content = s.AiSystemPrompt + "\n\n" + OutputSchema },
|
||||
new { role = "user", content = "آگهی خام:\n" + rawText + "\n\nفقط با JSON پاسخ بده." },
|
||||
},
|
||||
};
|
||||
@@ -100,10 +121,12 @@ public class OpenAiCompatibleAuditor : IAiAuditor
|
||||
int I(string k, int d) => r.TryGetProperty(k, out var v) && v.ValueKind == JsonValueKind.Number && v.TryGetInt32(out var n) ? n : d;
|
||||
long? L(string k) => r.TryGetProperty(k, out var v) && v.ValueKind == JsonValueKind.Number && v.TryGetInt64(out var n) ? n : null;
|
||||
int? NI(string k) => r.TryGetProperty(k, out var v) && v.ValueKind == JsonValueKind.Number && v.TryGetInt32(out var n) ? n : null;
|
||||
bool? B(string k) => r.TryGetProperty(k, out var v) && (v.ValueKind == JsonValueKind.True || v.ValueKind == JsonValueKind.False) ? v.GetBoolean() : null;
|
||||
|
||||
var decision = (S("decision") ?? "review").ToLowerInvariant();
|
||||
var data = new AiStructured(S("kind"), S("role"), S("city"), S("district"), S("shiftType"),
|
||||
S("employmentType"), L("payAmount"), NI("sharePercent"), S("title"), S("facilityName"));
|
||||
S("employmentType"), L("payAmount"), NI("sharePercent"), S("title"), S("facilityName"),
|
||||
Phone: S("phone"), PersonName: S("personName"), YearsExperience: NI("yearsExperience"), IsLicensed: B("isLicensed"));
|
||||
return new AiAuditResult(decision, Math.Clamp(I("confidence", 50), 0, 100), S("reason"), data);
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user