AI ingestion: dynamic role/category creation + tags, hardcoded read-only prompt
CI/CD / CI · dotnet build (push) Successful in 2m19s
CI/CD / Deploy · hamkadr (push) Successful in 2m12s

- Unknown roles from the AI are now resolved-or-CREATED (Persian-normalized dedupe) instead of dropped/fallback; new role gets the AI's category, assigned to the applicant.
- AI output gains category + tags; AI-detected skills/requirements (ICU, MMT, پروانه‌دار…) now fold into the applicant's searchable Tags.
- System prompt is hardcoded in AppSetting.DefaultPrompt and used directly by the auditor; admin sees it read-only (cannot edit/break it).

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
soroush.asadi
2026-06-09 19:04:24 +03:30
parent 59fb30ac77
commit cf5e0011c4
6 changed files with 123 additions and 43 deletions
@@ -9,7 +9,10 @@ namespace JobsMedical.Web.Services.Scraping;
public record AiStructured(
string? Kind, string? Role, string? City, string? District, string? ShiftType,
string? EmploymentType, long? PayAmount, int? SharePercent, string? Title, string? FacilityName,
string? Phone = null, string? PersonName = null, int? YearsExperience = null, bool? IsLicensed = null);
string? Phone = null, string? PersonName = null, int? YearsExperience = null, bool? IsLicensed = null,
// Dynamic taxonomy: the model may name a role/category outside the seeded set (ingestion
// resolves-or-creates it). Tags carry the post's skills/requirements (ICU, MMT, پروانه‌دار…).
string? Category = null, IReadOnlyList<string>? Tags = null);
/// <summary>An AI verdict on a raw listing.</summary>
public record AiAuditResult(string Decision, int Confidence, string? Reason, AiStructured? Data)
@@ -45,7 +48,9 @@ public class OpenAiCompatibleAuditor : IAiAuditor
confidence: عدد ۰ تا ۱۰۰
reason: توضیح کوتاه فارسی
kind: shift (شیفت توسط مرکز) | job (استخدام توسط مرکز) | talent (کادر درمان که خودش «آماده به کار» است)
role: عنوان دقیق نقش درمانی (مثل پرستار، پزشک عمومی، دندانپزشک، تکنسین اتاق عمل، ماما، کارشناس آزمایشگاه)
role: عنوان دقیق نقش درمانی (مثل پرستار، پزشک عمومی، دندانپزشک، تکنسین اتاق عمل، ماما، کارشناس آزمایشگاه). اگر تخصص دقیق در فهرست نبود، همان عنوان دقیق را برگردان.
category: گروه نقش (پزشک | پرستار | ماما | تکنسین | دندانپزشک). اگر هیچکدام مناسب نبود، یک گروه کوتاه و مناسب پیشنهاد بده.
tags: آرایهای از کلیدواژههای مهارت/الزام مرتبط بهصورت رشته (مثل "ICU"، "MMT"، "CPR"، "پروانه‌دار"، "خانم") یا []
city, district: نام شهر و محله/منطقه در صورت ذکر
shiftType: day|evening|night|oncall (فقط برای shift)
employmentType: fulltime|parttime|contract|plan
@@ -150,9 +155,9 @@ public class OpenAiCompatibleAuditor : IAiAuditor
response_format = new { type = "json_object" },
messages = new object[]
{
// Admin prompt + an authoritative output schema, so classification/tags stay
// correct even if the stored prompt predates the talent/phone fields.
new { role = "system", content = s.AiSystemPrompt + "\n\n" + OutputSchema },
// Hardcoded, code-owned prompt (NOT the stored AiSystemPrompt) + the authoritative
// output schema, so classification/tags can never be broken by an admin edit.
new { role = "system", content = AppSetting.DefaultPrompt + "\n\n" + OutputSchema },
new { role = "user", content = "آگهی خام:\n" + rawText + "\n\nفقط با JSON پاسخ بده." },
},
};
@@ -214,11 +219,23 @@ public class OpenAiCompatibleAuditor : IAiAuditor
long? L(string k) => r.TryGetProperty(k, out var v) && v.ValueKind == JsonValueKind.Number && v.TryGetInt64(out var n) ? n : null;
int? NI(string k) => r.TryGetProperty(k, out var v) && v.ValueKind == JsonValueKind.Number && v.TryGetInt32(out var n) ? n : null;
bool? B(string k) => r.TryGetProperty(k, out var v) && (v.ValueKind == JsonValueKind.True || v.ValueKind == JsonValueKind.False) ? v.GetBoolean() : null;
// Array-of-strings reader (tolerates the model returning a single string instead of an array).
IReadOnlyList<string>? SA(string k)
{
if (!r.TryGetProperty(k, out var v)) return null;
var list = new List<string>();
if (v.ValueKind == JsonValueKind.Array)
foreach (var el in v.EnumerateArray())
if (el.ValueKind == JsonValueKind.String && el.GetString() is { Length: > 0 } s) list.Add(s);
else if (v.ValueKind == JsonValueKind.String && v.GetString() is { Length: > 0 } one) list.Add(one);
return list.Count > 0 ? list : null;
}
var decision = (S("decision") ?? "review").ToLowerInvariant();
var data = new AiStructured(S("kind"), S("role"), S("city"), S("district"), S("shiftType"),
S("employmentType"), L("payAmount"), NI("sharePercent"), S("title"), S("facilityName"),
Phone: S("phone"), PersonName: S("personName"), YearsExperience: NI("yearsExperience"), IsLicensed: B("isLicensed"));
Phone: S("phone"), PersonName: S("personName"), YearsExperience: NI("yearsExperience"), IsLicensed: B("isLicensed"),
Category: S("category"), Tags: SA("tags"));
return new AiAuditResult(decision, Math.Clamp(I("confidence", 50), 0, 100), S("reason"), data);
}
}
@@ -166,14 +166,21 @@ public class IngestionService
// One ad can name several roles («پرستار سالمند و کودک و همراه بیمار») — resolve them all
// and publish one listing per role so each is browsable/filterable. Capped to avoid spam.
var roleNames = new List<string>();
if (!string.IsNullOrWhiteSpace(d?.Role)) roleNames.Add(d!.Role!.Trim());
roleNames.AddRange(parsed.RoleNames);
if (parsed.RoleName is not null) roleNames.Add(parsed.RoleName);
var pubRoles = roleNames
.Select(n => roles.FirstOrDefault(r => r.Name == n))
.Where(r => r is not null).Cast<Role>()
.Distinct().Take(4).ToList();
// The AI's role (+ its category) is the trusted, possibly-new one; parser names are already
// canonical matches. Unknown roles are CREATED (dynamic taxonomy), not dropped.
var candidates = new List<(string name, string? category)>();
if (!string.IsNullOrWhiteSpace(d?.Role)) candidates.Add((d!.Role!.Trim(), d.Category));
foreach (var n in parsed.RoleNames) candidates.Add((n, null));
if (parsed.RoleName is not null) candidates.Add((parsed.RoleName, null));
var pubRoles = new List<Role>();
foreach (var (name, category) in candidates)
{
if (string.IsNullOrWhiteSpace(name)) continue;
var role = ResolveOrCreateRole(roles, name, category);
if (!pubRoles.Contains(role)) pubRoles.Add(role);
if (pubRoles.Count >= 4) break;
}
if (pubRoles.Count == 0) pubRoles.Add(roles.First());
var city = cities.FirstOrDefault(c => c.Name == cityName)
@@ -205,7 +212,7 @@ public class IngestionService
Description = raw.RawText,
Status = ShiftStatus.Open, Source = ShiftSource.Aggregated, SourceUrl = raw.SourceUrl,
Contacts = BuildContacts(d, parsed), // fresh instances per listing
Tags = BuildTags(parsed, role, city),
Tags = BuildTags(parsed, d, role, city),
});
raw.Status = RawListingStatus.Normalized;
return;
@@ -261,13 +268,52 @@ public class IngestionService
raw.Status = RawListingStatus.Normalized;
}
/// <summary>Space-separated searchable tags: parsed cert/skill tags + this listing's role + city.</summary>
private static string BuildTags(ParsedListing parsed, Role role, City city)
/// <summary>Space-separated searchable tags: parsed cert/skill tags + AI-detected skills/requirements
/// + this listing's role/category + city. Drives deep search and tag chips on the applicant card.</summary>
private static string BuildTags(ParsedListing parsed, AiStructured? d, Role role, City city)
{
var tags = new List<string>(parsed.Tags) { role.Name, city.Name };
var tags = new List<string>(parsed.Tags) { role.Name, role.Category, city.Name };
if (d?.Tags is not null)
tags.AddRange(d.Tags.Where(t => !string.IsNullOrWhiteSpace(t)).Select(t => t.Trim()));
return string.Join(" ", tags.Where(t => !string.IsNullOrWhiteSpace(t)).Distinct());
}
/// <summary>Find an existing role by Persian-normalized name; if none, create a new Role (dynamic
/// taxonomy) using the AI's suggested category — reusing an existing category when one normalizes
/// to the same text — and add it to the in-run list so later items reuse it instead of duplicating.</summary>
private Role ResolveOrCreateRole(List<Role> roles, string name, string? category)
{
var norm = NormalizeFa(name);
var match = roles.FirstOrDefault(r => NormalizeFa(r.Name) == norm);
if (match is not null) return match;
var wantCat = string.IsNullOrWhiteSpace(category) ? "سایر" : category!.Trim();
// Collapse onto an existing category that normalizes the same, so «تکنسین» != «تکنسين» doesn't fork.
var existingCat = roles.Select(r => r.Category)
.FirstOrDefault(c => !string.IsNullOrWhiteSpace(c) && NormalizeFa(c) == NormalizeFa(wantCat));
var created = new Role
{
Name = Clamp(name.Trim(), 100), // respect Role.Name MaxLength(100)
Category = Clamp(existingCat ?? wantCat, 50), // respect Role.Category MaxLength(50)
IsActive = true,
SortOrder = (roles.Count == 0 ? 0 : roles.Max(r => r.SortOrder)) + 1,
};
_db.Roles.Add(created);
roles.Add(created); // reuse within this run (saved with the batch at end of source)
_log.LogInformation("Ingestion introduced new role «{Role}» (category «{Category}») from AI.",
created.Name, created.Category);
return created;
}
/// <summary>Normalize a Persian string for dedupe: unify Arabic/Persian ي→ی and ك→ک, drop ZWNJ,
/// collapse whitespace, trim, lowercase (so Latin tags like "ICU"/"icu" also match).</summary>
private static string NormalizeFa(string? s) => Regex.Replace(
(s ?? "").Replace('ي', 'ی').Replace('ك', 'ک').Replace('', ' ').Trim(),
@"\s+", " ").ToLowerInvariant();
private static string Clamp(string s, int max) => s.Length <= max ? s : s[..max].Trim();
/// <summary>Fresh ContactMethod rows for one talent listing (parser contacts + AI phone).</summary>
private static List<ContactMethod> BuildContacts(AiStructured? d, ParsedListing parsed)
{
@@ -31,8 +31,7 @@ public class SettingsService
s.AiEndpoint = incoming.AiEndpoint?.Trim();
s.AiApiKey = incoming.AiApiKey?.Trim();
s.AiModel = incoming.AiModel?.Trim();
s.AiSystemPrompt = string.IsNullOrWhiteSpace(incoming.AiSystemPrompt)
? AppSetting.DefaultPrompt : incoming.AiSystemPrompt;
s.AiSystemPrompt = AppSetting.DefaultPrompt; // hardcoded & read-only — keep the column in sync
s.AiAutoApprove = incoming.AiAutoApprove;
s.AiUseProxy = incoming.AiUseProxy;
// Channel scraping sources