Divar geo-coords to facility map + medical gate + RawListing FK/geo migrations
This commit is contained in:
@@ -40,6 +40,14 @@
|
||||
<p class="muted" style="font-size:11px; margin:8px 0 0;">
|
||||
موتور: واکشی ← حذف تکراری ← تجزیه ← اعتبارسنجی ← صف بررسی.
|
||||
</p>
|
||||
<form method="post" onsubmit="return confirm('⚠ همهی آیتمهای جمعآوریشده (کش) و همهی آگهیهای منتشرشده از جمعآوری حذف میشوند (آگهیهای ثبتشده توسط مراکز دستنخورده میمانند)، سپس همهچیز با هوش مصنوعی دوباره جمعآوری و افزوده میشود. این کار بازگشتناپذیر است. ادامه میدهی؟');">
|
||||
<button type="submit" asp-page-handler="PurgeAndReingest" class="btn btn-outline btn-block" style="margin-top:8px; color:var(--danger); border-color:var(--danger);">
|
||||
🔄 پاکسازی کش و جمعآوری مجدد با هوش مصنوعی
|
||||
</button>
|
||||
</form>
|
||||
<p class="muted" style="font-size:11px; margin:6px 0 0;">
|
||||
کش حذف تکراری و آگهیهای جمعآوریشده پاک و از نو با AI پردازش میشوند. (آگهیهای مراکز حذف نمیشوند.)
|
||||
</p>
|
||||
|
||||
<hr style="border:none; border-top:1px solid var(--line); margin:16px 0;" />
|
||||
|
||||
|
||||
@@ -65,6 +65,35 @@ public class IndexModel : PageModel
|
||||
return RedirectToPage();
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// DESTRUCTIVE rebuild, in two distinct deletes:
|
||||
/// 1. The DEDUPE CACHE — ALL RawListings, including any added via «افزودن دستی». These are not
|
||||
/// published content; they're the crawl/staging rows whose ContentHash blocks re-ingesting
|
||||
/// the same ad. Wiping them lets everything be re-fetched and re-judged by the AI.
|
||||
/// 2. AGGREGATED listings only — Shifts/JobOpenings/TalentListings with Source==Aggregated, i.e.
|
||||
/// produced by ingestion. Employer/admin-posted listings (Source==Direct) are left untouched.
|
||||
/// Then re-fetch everything and re-run it through the (now AI-enabled) pipeline.
|
||||
/// RawListings are deleted first so their LinkedShift/LinkedTalent FKs (SetNull) don't dangle;
|
||||
/// DB cascade clears ContactMethods / Applications / InterestEvents when the posts are deleted.
|
||||
/// </summary>
|
||||
public async Task<IActionResult> OnPostPurgeAndReingestAsync()
|
||||
{
|
||||
int rawCount, shifts, jobs, talent;
|
||||
await using (var tx = await _db.Database.BeginTransactionAsync())
|
||||
{
|
||||
rawCount = await _db.RawListings.ExecuteDeleteAsync(); // clear dedupe cache
|
||||
shifts = await _db.Shifts.Where(s => s.Source == ShiftSource.Aggregated).ExecuteDeleteAsync();
|
||||
jobs = await _db.JobOpenings.Where(j => j.Source == ShiftSource.Aggregated).ExecuteDeleteAsync();
|
||||
talent = await _db.TalentListings.Where(t => t.Source == ShiftSource.Aggregated).ExecuteDeleteAsync();
|
||||
await tx.CommitAsync();
|
||||
}
|
||||
|
||||
var s = await _ingest.RunAsync(); // fresh fetch → AI audit → publish/queue
|
||||
IngestMessage = $"پاکسازی شد (حذف: {rawCount} آیتم کش، {shifts} شیفت، {jobs} استخدام، {talent} آمادهبهکارِ جمعآوریشده). " +
|
||||
$"جمعآوری مجدد: {s.TotalPublished} منتشر، {s.TotalQueued} در صف، {s.TotalFlagged} پرچم، {s.TotalSpam} اسپم، {s.TotalDuplicates} تکراری.";
|
||||
return RedirectToPage();
|
||||
}
|
||||
|
||||
private async Task LoadAsync()
|
||||
{
|
||||
Queue = await _db.RawListings
|
||||
|
||||
@@ -282,13 +282,26 @@ public class ReviewModel : PageModel
|
||||
if (cityId is null) return null; // no cities seeded — cannot create a facility
|
||||
|
||||
// No facility named in the ad → use/create the shared placeholder.
|
||||
var name = string.IsNullOrWhiteSpace(NewFacilityName) ? UnknownFacilityName : NewFacilityName.Trim();
|
||||
var isPlaceholder = string.IsNullOrWhiteSpace(NewFacilityName);
|
||||
var name = isPlaceholder ? UnknownFacilityName : NewFacilityName.Trim();
|
||||
|
||||
// Approximate coords carried from the crawl (e.g. Divar). NEVER apply them to the shared
|
||||
// «نامشخص» placeholder — it's reused across many ads, so a single ad's point would mislead.
|
||||
bool HasGeo() => !isPlaceholder && Raw?.Lat is not null;
|
||||
|
||||
// Reuse an existing facility that's exactly or closely the same (Persian-aware fuzzy
|
||||
// match), so we don't create duplicates like «بیمارستان میلاد» vs «میلاد».
|
||||
var all = await _db.Facilities.ToListAsync();
|
||||
var match = FacilityMatcher.FindBest(all, name, cityId);
|
||||
if (match is not null) return match.Id;
|
||||
if (match is not null)
|
||||
{
|
||||
if (HasGeo() && match.Lat is null && match.Lng is null) // backfill only, never overwrite
|
||||
{
|
||||
match.Lat = Raw!.Lat; match.Lng = Raw.Lng;
|
||||
await _db.SaveChangesAsync();
|
||||
}
|
||||
return match.Id;
|
||||
}
|
||||
|
||||
var facility = new Facility
|
||||
{
|
||||
@@ -297,6 +310,8 @@ public class ReviewModel : PageModel
|
||||
Type = FacilityType.Hospital,
|
||||
Verification = VerificationStatus.Unverified,
|
||||
IsVerified = false,
|
||||
Lat = HasGeo() ? Raw!.Lat : null,
|
||||
Lng = HasGeo() ? Raw!.Lng : null,
|
||||
};
|
||||
_db.Facilities.Add(facility);
|
||||
await _db.SaveChangesAsync();
|
||||
|
||||
Reference in New Issue
Block a user