Divar geo-coords to facility map + medical gate + RawListing FK/geo migrations
CI/CD / CI · dotnet build (push) Successful in 2m6s
CI/CD / Deploy · hamkadr (push) Successful in 2m3s

This commit is contained in:
soroush.asadi
2026-06-09 21:38:55 +03:30
parent cf5e0011c4
commit 380243b669
14 changed files with 3567 additions and 36 deletions
@@ -40,6 +40,14 @@
<p class="muted" style="font-size:11px; margin:8px 0 0;">
موتور: واکشی ← حذف تکراری ← تجزیه ← اعتبارسنجی ← صف بررسی.
</p>
<form method="post" onsubmit="return confirm('⚠ همه‌ی آیتم‌های جمع‌آوری‌شده (کش) و همه‌ی آگهی‌های منتشرشده از جمع‌آوری حذف می‌شوند (آگهی‌های ثبت‌شده توسط مراکز دست‌نخورده می‌مانند)، سپس همه‌چیز با هوش مصنوعی دوباره جمع‌آوری و افزوده می‌شود. این کار بازگشت‌ناپذیر است. ادامه می‌دهی؟');">
<button type="submit" asp-page-handler="PurgeAndReingest" class="btn btn-outline btn-block" style="margin-top:8px; color:var(--danger); border-color:var(--danger);">
🔄 پاک‌سازی کش و جمع‌آوری مجدد با هوش مصنوعی
</button>
</form>
<p class="muted" style="font-size:11px; margin:6px 0 0;">
کش حذف تکراری و آگهی‌های جمع‌آوری‌شده پاک و از نو با AI پردازش می‌شوند. (آگهی‌های مراکز حذف نمی‌شوند.)
</p>
<hr style="border:none; border-top:1px solid var(--line); margin:16px 0;" />
@@ -65,6 +65,35 @@ public class IndexModel : PageModel
return RedirectToPage();
}
/// <summary>
/// DESTRUCTIVE rebuild, in two distinct deletes:
/// 1. The DEDUPE CACHE — ALL RawListings, including any added via «افزودن دستی». These are not
/// published content; they're the crawl/staging rows whose ContentHash blocks re-ingesting
/// the same ad. Wiping them lets everything be re-fetched and re-judged by the AI.
/// 2. AGGREGATED listings only — Shifts/JobOpenings/TalentListings with Source==Aggregated, i.e.
/// produced by ingestion. Employer/admin-posted listings (Source==Direct) are left untouched.
/// Then re-fetch everything and re-run it through the (now AI-enabled) pipeline.
/// RawListings are deleted first so their LinkedShift/LinkedTalent FKs (SetNull) don't dangle;
/// DB cascade clears ContactMethods / Applications / InterestEvents when the posts are deleted.
/// </summary>
public async Task<IActionResult> OnPostPurgeAndReingestAsync()
{
int rawCount, shifts, jobs, talent;
await using (var tx = await _db.Database.BeginTransactionAsync())
{
rawCount = await _db.RawListings.ExecuteDeleteAsync(); // clear dedupe cache
shifts = await _db.Shifts.Where(s => s.Source == ShiftSource.Aggregated).ExecuteDeleteAsync();
jobs = await _db.JobOpenings.Where(j => j.Source == ShiftSource.Aggregated).ExecuteDeleteAsync();
talent = await _db.TalentListings.Where(t => t.Source == ShiftSource.Aggregated).ExecuteDeleteAsync();
await tx.CommitAsync();
}
var s = await _ingest.RunAsync(); // fresh fetch → AI audit → publish/queue
IngestMessage = $"پاک‌سازی شد (حذف: {rawCount} آیتم کش، {shifts} شیفت، {jobs} استخدام، {talent} آماده‌به‌کارِ جمع‌آوری‌شده). " +
$"جمع‌آوری مجدد: {s.TotalPublished} منتشر، {s.TotalQueued} در صف، {s.TotalFlagged} پرچم، {s.TotalSpam} اسپم، {s.TotalDuplicates} تکراری.";
return RedirectToPage();
}
private async Task LoadAsync()
{
Queue = await _db.RawListings
@@ -282,13 +282,26 @@ public class ReviewModel : PageModel
if (cityId is null) return null; // no cities seeded — cannot create a facility
// No facility named in the ad → use/create the shared placeholder.
var name = string.IsNullOrWhiteSpace(NewFacilityName) ? UnknownFacilityName : NewFacilityName.Trim();
var isPlaceholder = string.IsNullOrWhiteSpace(NewFacilityName);
var name = isPlaceholder ? UnknownFacilityName : NewFacilityName.Trim();
// Approximate coords carried from the crawl (e.g. Divar). NEVER apply them to the shared
// «نامشخص» placeholder — it's reused across many ads, so a single ad's point would mislead.
bool HasGeo() => !isPlaceholder && Raw?.Lat is not null;
// Reuse an existing facility that's exactly or closely the same (Persian-aware fuzzy
// match), so we don't create duplicates like «بیمارستان میلاد» vs «میلاد».
var all = await _db.Facilities.ToListAsync();
var match = FacilityMatcher.FindBest(all, name, cityId);
if (match is not null) return match.Id;
if (match is not null)
{
if (HasGeo() && match.Lat is null && match.Lng is null) // backfill only, never overwrite
{
match.Lat = Raw!.Lat; match.Lng = Raw.Lng;
await _db.SaveChangesAsync();
}
return match.Id;
}
var facility = new Facility
{
@@ -297,6 +310,8 @@ public class ReviewModel : PageModel
Type = FacilityType.Hospital,
Verification = VerificationStatus.Unverified,
IsVerified = false,
Lat = HasGeo() ? Raw!.Lat : null,
Lng = HasGeo() ? Raw!.Lng : null,
};
_db.Facilities.Add(facility);
await _db.SaveChangesAsync();