2026-06-03 08:18:19 +03:30
|
|
|
|
using System.Security.Cryptography;
|
|
|
|
|
|
using System.Text;
|
|
|
|
|
|
using System.Text.RegularExpressions;
|
|
|
|
|
|
using JobsMedical.Web.Data;
|
|
|
|
|
|
using JobsMedical.Web.Models;
|
|
|
|
|
|
using Microsoft.EntityFrameworkCore;
|
|
|
|
|
|
|
|
|
|
|
|
namespace JobsMedical.Web.Services.Scraping;
|
|
|
|
|
|
|
2026-06-03 17:41:02 +03:30
|
|
|
|
public record SourceResult(string Source, int Fetched, int Queued, int Published, int Flagged, int Spam, int Duplicates);
|
2026-06-03 08:18:19 +03:30
|
|
|
|
|
|
|
|
|
|
public record IngestionSummary(List<SourceResult> Sources)
|
|
|
|
|
|
{
|
2026-06-08 06:23:58 +03:30
|
|
|
|
public int TotalFetched => Sources.Sum(s => s.Fetched);
|
2026-06-03 08:18:19 +03:30
|
|
|
|
public int TotalQueued => Sources.Sum(s => s.Queued);
|
2026-06-03 17:41:02 +03:30
|
|
|
|
public int TotalPublished => Sources.Sum(s => s.Published);
|
2026-06-03 08:18:19 +03:30
|
|
|
|
public int TotalFlagged => Sources.Sum(s => s.Flagged);
|
|
|
|
|
|
public int TotalSpam => Sources.Sum(s => s.Spam);
|
|
|
|
|
|
public int TotalDuplicates => Sources.Sum(s => s.Duplicates);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/// <summary>
|
2026-06-03 17:41:02 +03:30
|
|
|
|
/// The scrape engine. For every enabled source: dedupe by content hash → parse → rule-validate →
|
|
|
|
|
|
/// (optional) AI audit → decide. Decision depends on admin settings:
|
|
|
|
|
|
/// • spam → Discarded
|
|
|
|
|
|
/// • AI on: AI verdict drives approve/reject/review; approve + Automatic + AiAutoApprove → publish
|
|
|
|
|
|
/// • AI off: Automatic + confidence ≥ threshold → publish; else queue/flag
|
|
|
|
|
|
/// "Publish" resolves-or-creates an (unverified) facility and creates the Shift/JobOpening.
|
2026-06-03 08:18:19 +03:30
|
|
|
|
/// </summary>
|
|
|
|
|
|
public class IngestionService
|
|
|
|
|
|
{
|
|
|
|
|
|
private readonly AppDbContext _db;
|
|
|
|
|
|
private readonly IEnumerable<IListingSource> _sources;
|
|
|
|
|
|
private readonly IListingParser _parser;
|
|
|
|
|
|
private readonly ListingValidator _validator;
|
2026-06-03 17:41:02 +03:30
|
|
|
|
private readonly IAiAuditor _ai;
|
|
|
|
|
|
private readonly SettingsService _settings;
|
2026-06-03 08:18:19 +03:30
|
|
|
|
private readonly ILogger<IngestionService> _log;
|
|
|
|
|
|
|
2026-06-03 17:41:02 +03:30
|
|
|
|
public IngestionService(AppDbContext db, IEnumerable<IListingSource> sources, IListingParser parser,
|
|
|
|
|
|
ListingValidator validator, IAiAuditor ai, SettingsService settings, ILogger<IngestionService> log)
|
2026-06-03 08:18:19 +03:30
|
|
|
|
{
|
2026-06-03 17:41:02 +03:30
|
|
|
|
_db = db; _sources = sources; _parser = parser; _validator = validator;
|
|
|
|
|
|
_ai = ai; _settings = settings; _log = log;
|
2026-06-03 08:18:19 +03:30
|
|
|
|
}
|
|
|
|
|
|
|
2026-06-04 00:44:11 +03:30
|
|
|
|
public IReadOnlyList<string> SourceNames => _sources.Select(s => s.Name).ToList();
|
2026-06-03 08:18:19 +03:30
|
|
|
|
|
|
|
|
|
|
public async Task<IngestionSummary> RunAsync(CancellationToken ct = default)
|
|
|
|
|
|
{
|
2026-06-03 17:41:02 +03:30
|
|
|
|
var settings = await _settings.GetAsync();
|
|
|
|
|
|
var roles = await _db.Roles.ToListAsync(ct);
|
|
|
|
|
|
var cities = await _db.Cities.ToListAsync(ct);
|
|
|
|
|
|
var districts = await _db.Districts.ToListAsync(ct);
|
2026-06-08 07:14:48 +03:30
|
|
|
|
var facilities = await _db.Facilities.ToListAsync(ct); // fuzzy-matched + grown as we create
|
2026-06-03 17:41:02 +03:30
|
|
|
|
var roleNames = roles.Select(r => r.Name).ToList();
|
|
|
|
|
|
var cityNames = cities.Select(c => c.Name).ToList();
|
|
|
|
|
|
var districtNames = districts.Select(d => d.Name).ToList();
|
2026-06-03 08:18:19 +03:30
|
|
|
|
|
|
|
|
|
|
var results = new List<SourceResult>();
|
|
|
|
|
|
|
2026-06-04 00:44:11 +03:30
|
|
|
|
foreach (var source in _sources)
|
2026-06-03 08:18:19 +03:30
|
|
|
|
{
|
2026-06-03 17:41:02 +03:30
|
|
|
|
int fetched = 0, queued = 0, published = 0, flagged = 0, spam = 0, dupes = 0;
|
2026-06-03 08:18:19 +03:30
|
|
|
|
IReadOnlyList<ScrapedItem> items;
|
2026-06-04 00:44:11 +03:30
|
|
|
|
try { items = await source.FetchAsync(settings, ct); }
|
2026-06-03 17:41:02 +03:30
|
|
|
|
catch (Exception ex) { _log.LogError(ex, "Source {Source} failed", source.Name); continue; }
|
2026-06-04 00:44:11 +03:30
|
|
|
|
if (items.Count == 0) continue; // disabled/unconfigured source
|
2026-06-03 08:18:19 +03:30
|
|
|
|
|
|
|
|
|
|
foreach (var item in items)
|
|
|
|
|
|
{
|
|
|
|
|
|
fetched++;
|
|
|
|
|
|
var hash = Hash(item.RawText);
|
|
|
|
|
|
if (await _db.RawListings.AnyAsync(r => r.ContentHash == hash, ct)) { dupes++; continue; }
|
|
|
|
|
|
|
2026-06-03 17:41:02 +03:30
|
|
|
|
var parsed = _parser.Parse(item.RawText, roleNames, cityNames, districtNames);
|
2026-06-03 08:18:19 +03:30
|
|
|
|
var val = _validator.Validate(item.RawText, parsed);
|
|
|
|
|
|
|
2026-06-03 17:41:02 +03:30
|
|
|
|
AiAuditResult? ai = null;
|
|
|
|
|
|
if (settings.AiEnabled && !val.IsSpam)
|
|
|
|
|
|
ai = await _ai.AuditAsync(item.RawText, settings, ct);
|
|
|
|
|
|
|
|
|
|
|
|
var (status, reason, confidence) = Decide(settings, val, ai);
|
2026-06-03 08:18:19 +03:30
|
|
|
|
|
2026-06-03 17:41:02 +03:30
|
|
|
|
var raw = new RawListing
|
2026-06-03 08:18:19 +03:30
|
|
|
|
{
|
|
|
|
|
|
SourceChannel = item.Source,
|
|
|
|
|
|
SourceUrl = item.SourceUrl,
|
|
|
|
|
|
RawText = item.RawText.Trim(),
|
|
|
|
|
|
ContentHash = hash,
|
2026-06-03 17:41:02 +03:30
|
|
|
|
Confidence = confidence,
|
|
|
|
|
|
ValidationNotes = reason,
|
2026-06-03 08:18:19 +03:30
|
|
|
|
Status = status,
|
2026-06-03 17:41:02 +03:30
|
|
|
|
};
|
|
|
|
|
|
_db.RawListings.Add(raw);
|
|
|
|
|
|
|
|
|
|
|
|
if (status == RawListingStatus.Normalized)
|
|
|
|
|
|
{
|
2026-06-08 07:14:48 +03:30
|
|
|
|
try { Publish(parsed, ai, raw, roles, cities, districts, facilities); published++; }
|
2026-06-03 17:41:02 +03:30
|
|
|
|
catch (Exception ex) { _log.LogWarning(ex, "Auto-publish failed; queueing instead"); raw.Status = RawListingStatus.New; queued++; }
|
|
|
|
|
|
}
|
|
|
|
|
|
else if (status == RawListingStatus.New) queued++;
|
|
|
|
|
|
else if (status == RawListingStatus.Flagged) flagged++;
|
|
|
|
|
|
else spam++;
|
2026-06-03 08:18:19 +03:30
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
await _db.SaveChangesAsync(ct);
|
2026-06-03 17:41:02 +03:30
|
|
|
|
results.Add(new SourceResult(source.Name, fetched, queued, published, flagged, spam, dupes));
|
|
|
|
|
|
_log.LogInformation("Ingest {S}: fetched={F} queued={Q} published={P} flagged={Fl} spam={Sp} dupes={D}",
|
|
|
|
|
|
source.Name, fetched, queued, published, flagged, spam, dupes);
|
2026-06-03 08:18:19 +03:30
|
|
|
|
}
|
|
|
|
|
|
|
2026-06-08 06:23:58 +03:30
|
|
|
|
var summary = new IngestionSummary(results);
|
|
|
|
|
|
|
|
|
|
|
|
// Persist a run-log row so admins get a crawl history (with a per-source breakdown).
|
|
|
|
|
|
if (results.Count > 0)
|
|
|
|
|
|
{
|
|
|
|
|
|
var detail = string.Join("؛ ", results.Select(r =>
|
|
|
|
|
|
$"{r.Source}: یافت {r.Fetched}، صف {r.Queued}، منتشر {r.Published}، پرچم {r.Flagged}، اسپم {r.Spam}، تکراری {r.Duplicates}"));
|
|
|
|
|
|
_db.IngestionRuns.Add(new IngestionRun
|
|
|
|
|
|
{
|
|
|
|
|
|
Fetched = summary.TotalFetched,
|
|
|
|
|
|
Queued = summary.TotalQueued,
|
|
|
|
|
|
Published = summary.TotalPublished,
|
|
|
|
|
|
Flagged = summary.TotalFlagged,
|
|
|
|
|
|
Spam = summary.TotalSpam,
|
|
|
|
|
|
Duplicates = summary.TotalDuplicates,
|
|
|
|
|
|
Detail = detail.Length > 2000 ? detail[..2000] : detail,
|
|
|
|
|
|
});
|
|
|
|
|
|
await _db.SaveChangesAsync(ct);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
return summary;
|
2026-06-03 08:18:19 +03:30
|
|
|
|
}
|
|
|
|
|
|
|
2026-06-03 17:41:02 +03:30
|
|
|
|
private static (RawListingStatus status, string? reason, int confidence) Decide(
|
|
|
|
|
|
AppSetting s, ValidationResult val, AiAuditResult? ai)
|
|
|
|
|
|
{
|
|
|
|
|
|
var notes = val.Issues.Count > 0 ? string.Join("؛ ", val.Issues) : null;
|
|
|
|
|
|
|
|
|
|
|
|
if (val.IsSpam)
|
|
|
|
|
|
return (RawListingStatus.Discarded, Join("اسپم", notes), val.Confidence);
|
|
|
|
|
|
|
|
|
|
|
|
if (ai is not null)
|
|
|
|
|
|
{
|
|
|
|
|
|
var aiNote = Join($"AI: {ai.Decision} ({ai.Confidence}٪)" + (ai.Reason is null ? "" : $" — {ai.Reason}"), notes);
|
|
|
|
|
|
if (ai.Reject) return (RawListingStatus.Discarded, aiNote, ai.Confidence);
|
|
|
|
|
|
if (ai.Approve)
|
|
|
|
|
|
return (s.Mode == IngestionMode.Automatic && s.AiAutoApprove
|
|
|
|
|
|
? RawListingStatus.Normalized : RawListingStatus.New, aiNote, ai.Confidence);
|
|
|
|
|
|
return (RawListingStatus.Flagged, aiNote, ai.Confidence); // review
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
if (!val.IsValid) return (RawListingStatus.Flagged, notes, val.Confidence);
|
|
|
|
|
|
if (s.Mode == IngestionMode.Automatic && val.Confidence >= s.AutoPublishMinConfidence)
|
|
|
|
|
|
return (RawListingStatus.Normalized, notes, val.Confidence);
|
|
|
|
|
|
return (RawListingStatus.New, notes, val.Confidence);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
private void Publish(ParsedListing parsed, AiAuditResult? ai, RawListing raw,
|
2026-06-08 07:14:48 +03:30
|
|
|
|
List<Role> roles, List<City> cities, List<District> districts, List<Facility> facilities)
|
2026-06-03 17:41:02 +03:30
|
|
|
|
{
|
|
|
|
|
|
var d = ai?.Data;
|
|
|
|
|
|
var cityName = d?.City ?? parsed.CityName;
|
|
|
|
|
|
var districtName = d?.District ?? parsed.DistrictName;
|
|
|
|
|
|
|
2026-06-08 10:58:29 +03:30
|
|
|
|
// One ad can name several roles («پرستار سالمند و کودک و همراه بیمار») — resolve them all
|
|
|
|
|
|
// and publish one listing per role so each is browsable/filterable. Capped to avoid spam.
|
2026-06-09 19:04:24 +03:30
|
|
|
|
// The AI's role (+ its category) is the trusted, possibly-new one; parser names are already
|
|
|
|
|
|
// canonical matches. Unknown roles are CREATED (dynamic taxonomy), not dropped.
|
|
|
|
|
|
var candidates = new List<(string name, string? category)>();
|
|
|
|
|
|
if (!string.IsNullOrWhiteSpace(d?.Role)) candidates.Add((d!.Role!.Trim(), d.Category));
|
|
|
|
|
|
foreach (var n in parsed.RoleNames) candidates.Add((n, null));
|
|
|
|
|
|
if (parsed.RoleName is not null) candidates.Add((parsed.RoleName, null));
|
|
|
|
|
|
|
|
|
|
|
|
var pubRoles = new List<Role>();
|
|
|
|
|
|
foreach (var (name, category) in candidates)
|
|
|
|
|
|
{
|
|
|
|
|
|
if (string.IsNullOrWhiteSpace(name)) continue;
|
|
|
|
|
|
var role = ResolveOrCreateRole(roles, name, category);
|
|
|
|
|
|
if (!pubRoles.Contains(role)) pubRoles.Add(role);
|
|
|
|
|
|
if (pubRoles.Count >= 4) break;
|
|
|
|
|
|
}
|
2026-06-08 10:58:29 +03:30
|
|
|
|
if (pubRoles.Count == 0) pubRoles.Add(roles.First());
|
|
|
|
|
|
|
2026-06-03 17:41:02 +03:30
|
|
|
|
var city = cities.FirstOrDefault(c => c.Name == cityName)
|
|
|
|
|
|
?? cities.FirstOrDefault(c => c.IsActive) ?? cities.First();
|
|
|
|
|
|
var district = districts.FirstOrDefault(x => x.Name == districtName && x.CityId == city.Id);
|
|
|
|
|
|
|
2026-06-08 08:01:12 +03:30
|
|
|
|
var kindStr = (d?.Kind ?? parsed.Kind.ToString()).ToLowerInvariant();
|
|
|
|
|
|
|
|
|
|
|
|
// «آماده به کار» — a worker offering themselves. No facility involved.
|
|
|
|
|
|
if (parsed.Kind == ListingKind.Talent || kindStr.Contains("talent") || kindStr.Contains("آماده"))
|
|
|
|
|
|
{
|
2026-06-08 08:11:14 +03:30
|
|
|
|
// Prefer the AI's tags when present, else the heuristic parser.
|
|
|
|
|
|
var tPay = d?.PayAmount ?? parsed.PayAmount;
|
|
|
|
|
|
var tShare = d?.SharePercent ?? parsed.SharePercent;
|
2026-06-08 10:58:29 +03:30
|
|
|
|
foreach (var role in pubRoles)
|
|
|
|
|
|
_db.TalentListings.Add(new TalentListing
|
|
|
|
|
|
{
|
|
|
|
|
|
Role = role, City = city, DistrictId = district?.Id,
|
|
|
|
|
|
PersonName = !string.IsNullOrWhiteSpace(d?.PersonName) ? d!.PersonName!.Trim() : parsed.PersonName,
|
|
|
|
|
|
YearsExperience = d?.YearsExperience ?? parsed.YearsExperience,
|
|
|
|
|
|
IsLicensed = d?.IsLicensed ?? parsed.IsLicensed,
|
|
|
|
|
|
AreaNote = parsed.AreaNote,
|
|
|
|
|
|
Availability = MapEmployment(d?.EmploymentType, parsed.EmploymentType),
|
|
|
|
|
|
Gender = parsed.Gender,
|
|
|
|
|
|
PayType = tShare is not null && tPay is null ? PayType.Percentage
|
|
|
|
|
|
: tPay is null ? PayType.Negotiable : PayType.PerShift,
|
|
|
|
|
|
PayAmount = tPay, SharePercent = tShare,
|
|
|
|
|
|
Phone = !string.IsNullOrWhiteSpace(d?.Phone) ? d!.Phone!.Trim() : parsed.Phone,
|
|
|
|
|
|
Description = raw.RawText,
|
|
|
|
|
|
Status = ShiftStatus.Open, Source = ShiftSource.Aggregated, SourceUrl = raw.SourceUrl,
|
2026-06-08 11:10:19 +03:30
|
|
|
|
Contacts = BuildContacts(d, parsed), // fresh instances per listing
|
2026-06-09 19:04:24 +03:30
|
|
|
|
Tags = BuildTags(parsed, d, role, city),
|
2026-06-08 10:58:29 +03:30
|
|
|
|
});
|
2026-06-08 08:01:12 +03:30
|
|
|
|
raw.Status = RawListingStatus.Normalized;
|
|
|
|
|
|
return;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2026-06-08 09:05:34 +03:30
|
|
|
|
// Never surface the crawl source (e.g. «مدجابز») in a public facility name.
|
2026-06-03 17:41:02 +03:30
|
|
|
|
var facilityName = !string.IsNullOrWhiteSpace(d?.FacilityName) ? d!.FacilityName!.Trim()
|
2026-06-08 07:14:48 +03:30
|
|
|
|
: !string.IsNullOrWhiteSpace(parsed.FacilityName) ? parsed.FacilityName!.Trim()
|
2026-06-08 09:05:34 +03:30
|
|
|
|
: "مرکز درمانی (نامشخص)";
|
2026-06-08 07:14:48 +03:30
|
|
|
|
// Reuse an existing facility (exact or Persian-aware fuzzy match) before creating a new one.
|
|
|
|
|
|
var facility = FacilityMatcher.FindBest(facilities, facilityName, city.Id);
|
2026-06-03 17:41:02 +03:30
|
|
|
|
if (facility is null)
|
|
|
|
|
|
{
|
|
|
|
|
|
facility = new Facility
|
|
|
|
|
|
{
|
|
|
|
|
|
Name = facilityName, Type = FacilityType.Clinic, City = city, DistrictId = district?.Id,
|
2026-06-08 08:11:14 +03:30
|
|
|
|
Phone = !string.IsNullOrWhiteSpace(d?.Phone) ? d!.Phone!.Trim() : parsed.Phone, IsVerified = false,
|
2026-06-03 17:41:02 +03:30
|
|
|
|
};
|
|
|
|
|
|
_db.Facilities.Add(facility);
|
2026-06-08 07:14:48 +03:30
|
|
|
|
facilities.Add(facility); // so later listings in this run match it too
|
2026-06-03 17:41:02 +03:30
|
|
|
|
}
|
|
|
|
|
|
|
2026-06-08 08:01:12 +03:30
|
|
|
|
if (kindStr.Contains("job") || kindStr.Contains("استخدام"))
|
2026-06-03 17:41:02 +03:30
|
|
|
|
{
|
2026-06-08 10:58:29 +03:30
|
|
|
|
foreach (var role in pubRoles)
|
|
|
|
|
|
_db.JobOpenings.Add(new JobOpening
|
|
|
|
|
|
{
|
|
|
|
|
|
Facility = facility, Role = role,
|
|
|
|
|
|
Title = !string.IsNullOrWhiteSpace(d?.Title) && pubRoles.Count == 1 ? d!.Title!.Trim() : $"استخدام {role.Name}",
|
|
|
|
|
|
EmploymentType = MapEmployment(d?.EmploymentType, parsed.EmploymentType),
|
|
|
|
|
|
SalaryMin = parsed.PayAmount,
|
|
|
|
|
|
Description = raw.RawText, Status = ShiftStatus.Open, Source = ShiftSource.Aggregated,
|
|
|
|
|
|
SourceUrl = raw.SourceUrl,
|
|
|
|
|
|
});
|
2026-06-03 17:41:02 +03:30
|
|
|
|
}
|
|
|
|
|
|
else
|
|
|
|
|
|
{
|
|
|
|
|
|
var st = MapShiftType(d?.ShiftType, parsed.ShiftType);
|
|
|
|
|
|
var (start, end) = DefaultTimes(st);
|
2026-06-08 10:58:29 +03:30
|
|
|
|
foreach (var role in pubRoles)
|
|
|
|
|
|
_db.Shifts.Add(new Shift
|
|
|
|
|
|
{
|
|
|
|
|
|
Facility = facility, Role = role,
|
|
|
|
|
|
Date = DateOnly.FromDateTime(DateTime.UtcNow).AddDays(1),
|
|
|
|
|
|
StartTime = start, EndTime = end, ShiftType = st,
|
|
|
|
|
|
SpecialtyRequired = role.Name, Description = raw.RawText,
|
|
|
|
|
|
PayType = parsed.SharePercent is not null && parsed.PayAmount is null ? PayType.Percentage
|
|
|
|
|
|
: parsed.PayAmount is null ? PayType.Negotiable : PayType.PerShift,
|
|
|
|
|
|
PayAmount = parsed.PayAmount, SharePercent = parsed.SharePercent,
|
|
|
|
|
|
Status = ShiftStatus.Open, Source = ShiftSource.Aggregated, SourceUrl = raw.SourceUrl,
|
|
|
|
|
|
});
|
2026-06-03 17:41:02 +03:30
|
|
|
|
}
|
|
|
|
|
|
raw.Status = RawListingStatus.Normalized;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2026-06-09 19:04:24 +03:30
|
|
|
|
/// <summary>Space-separated searchable tags: parsed cert/skill tags + AI-detected skills/requirements
|
|
|
|
|
|
/// + this listing's role/category + city. Drives deep search and tag chips on the applicant card.</summary>
|
|
|
|
|
|
private static string BuildTags(ParsedListing parsed, AiStructured? d, Role role, City city)
|
2026-06-08 11:25:32 +03:30
|
|
|
|
{
|
2026-06-09 19:04:24 +03:30
|
|
|
|
var tags = new List<string>(parsed.Tags) { role.Name, role.Category, city.Name };
|
|
|
|
|
|
if (d?.Tags is not null)
|
|
|
|
|
|
tags.AddRange(d.Tags.Where(t => !string.IsNullOrWhiteSpace(t)).Select(t => t.Trim()));
|
2026-06-08 11:25:32 +03:30
|
|
|
|
return string.Join(" ", tags.Where(t => !string.IsNullOrWhiteSpace(t)).Distinct());
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2026-06-09 19:04:24 +03:30
|
|
|
|
/// <summary>Find an existing role by Persian-normalized name; if none, create a new Role (dynamic
|
|
|
|
|
|
/// taxonomy) using the AI's suggested category — reusing an existing category when one normalizes
|
|
|
|
|
|
/// to the same text — and add it to the in-run list so later items reuse it instead of duplicating.</summary>
|
|
|
|
|
|
private Role ResolveOrCreateRole(List<Role> roles, string name, string? category)
|
|
|
|
|
|
{
|
|
|
|
|
|
var norm = NormalizeFa(name);
|
|
|
|
|
|
var match = roles.FirstOrDefault(r => NormalizeFa(r.Name) == norm);
|
|
|
|
|
|
if (match is not null) return match;
|
|
|
|
|
|
|
|
|
|
|
|
var wantCat = string.IsNullOrWhiteSpace(category) ? "سایر" : category!.Trim();
|
|
|
|
|
|
// Collapse onto an existing category that normalizes the same, so «تکنسین» != «تکنسين» doesn't fork.
|
|
|
|
|
|
var existingCat = roles.Select(r => r.Category)
|
|
|
|
|
|
.FirstOrDefault(c => !string.IsNullOrWhiteSpace(c) && NormalizeFa(c) == NormalizeFa(wantCat));
|
|
|
|
|
|
|
|
|
|
|
|
var created = new Role
|
|
|
|
|
|
{
|
|
|
|
|
|
Name = Clamp(name.Trim(), 100), // respect Role.Name MaxLength(100)
|
|
|
|
|
|
Category = Clamp(existingCat ?? wantCat, 50), // respect Role.Category MaxLength(50)
|
|
|
|
|
|
IsActive = true,
|
|
|
|
|
|
SortOrder = (roles.Count == 0 ? 0 : roles.Max(r => r.SortOrder)) + 1,
|
|
|
|
|
|
};
|
|
|
|
|
|
_db.Roles.Add(created);
|
|
|
|
|
|
roles.Add(created); // reuse within this run (saved with the batch at end of source)
|
|
|
|
|
|
_log.LogInformation("Ingestion introduced new role «{Role}» (category «{Category}») from AI.",
|
|
|
|
|
|
created.Name, created.Category);
|
|
|
|
|
|
return created;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/// <summary>Normalize a Persian string for dedupe: unify Arabic/Persian ي→ی and ك→ک, drop ZWNJ,
|
|
|
|
|
|
/// collapse whitespace, trim, lowercase (so Latin tags like "ICU"/"icu" also match).</summary>
|
|
|
|
|
|
private static string NormalizeFa(string? s) => Regex.Replace(
|
|
|
|
|
|
(s ?? "").Replace('ي', 'ی').Replace('ك', 'ک').Replace('', ' ').Trim(),
|
|
|
|
|
|
@"\s+", " ").ToLowerInvariant();
|
|
|
|
|
|
|
|
|
|
|
|
private static string Clamp(string s, int max) => s.Length <= max ? s : s[..max].Trim();
|
|
|
|
|
|
|
2026-06-08 11:10:19 +03:30
|
|
|
|
/// <summary>Fresh ContactMethod rows for one talent listing (parser contacts + AI phone).</summary>
|
|
|
|
|
|
private static List<ContactMethod> BuildContacts(AiStructured? d, ParsedListing parsed)
|
|
|
|
|
|
{
|
|
|
|
|
|
var contacts = parsed.Contacts
|
|
|
|
|
|
.Select((c, i) => new ContactMethod { Type = c.Type, Value = c.Value, SortOrder = i })
|
|
|
|
|
|
.ToList();
|
|
|
|
|
|
if (!string.IsNullOrWhiteSpace(d?.Phone)
|
|
|
|
|
|
&& !contacts.Any(c => c.Type is ContactType.Mobile or ContactType.Phone))
|
|
|
|
|
|
contacts.Insert(0, new ContactMethod { Type = ContactType.Mobile, Value = d!.Phone!.Trim(), SortOrder = -1 });
|
|
|
|
|
|
return contacts;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2026-06-03 17:41:02 +03:30
|
|
|
|
private static ShiftType MapShiftType(string? ai, ShiftType? parsed) => (ai?.ToLowerInvariant()) switch
|
|
|
|
|
|
{
|
|
|
|
|
|
"day" => ShiftType.Day, "evening" => ShiftType.Evening, "night" => ShiftType.Night, "oncall" => ShiftType.OnCall,
|
|
|
|
|
|
_ => parsed ?? ShiftType.Day,
|
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
private static EmploymentType MapEmployment(string? ai, EmploymentType? parsed) => (ai?.ToLowerInvariant()) switch
|
|
|
|
|
|
{
|
|
|
|
|
|
"parttime" => EmploymentType.PartTime, "contract" => EmploymentType.Contract,
|
|
|
|
|
|
"plan" => EmploymentType.Plan, "fulltime" => EmploymentType.FullTime,
|
|
|
|
|
|
_ => parsed ?? EmploymentType.FullTime,
|
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
private static (TimeOnly, TimeOnly) DefaultTimes(ShiftType t) => t switch
|
|
|
|
|
|
{
|
|
|
|
|
|
ShiftType.Day => (new TimeOnly(8, 0), new TimeOnly(14, 0)),
|
|
|
|
|
|
ShiftType.Evening => (new TimeOnly(14, 0), new TimeOnly(20, 0)),
|
|
|
|
|
|
ShiftType.Night => (new TimeOnly(20, 0), new TimeOnly(8, 0)),
|
|
|
|
|
|
_ => (new TimeOnly(8, 0), new TimeOnly(8, 0)),
|
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
private static string? Join(string a, string? b) => string.IsNullOrEmpty(b) ? a : $"{a} | {b}";
|
|
|
|
|
|
|
2026-06-03 08:18:19 +03:30
|
|
|
|
private static string Hash(string text)
|
|
|
|
|
|
{
|
|
|
|
|
|
var normalized = Regex.Replace((text ?? "").Trim(), @"\s+", " ");
|
2026-06-03 17:41:02 +03:30
|
|
|
|
return Convert.ToHexString(SHA256.HashData(Encoding.UTF8.GetBytes(normalized))).ToLowerInvariant();
|
2026-06-03 08:18:19 +03:30
|
|
|
|
}
|
|
|
|
|
|
}
|