2026-06-03 01:43:55 +03:30
using JobsMedical.Web.Data ;
using JobsMedical.Web.Models ;
2026-06-03 08:18:19 +03:30
using JobsMedical.Web.Services.Scraping ;
2026-06-03 01:43:55 +03:30
using Microsoft.AspNetCore.Authorization ;
using Microsoft.AspNetCore.Mvc ;
using Microsoft.AspNetCore.Mvc.RazorPages ;
using Microsoft.EntityFrameworkCore ;
namespace JobsMedical.Web.Pages.Admin ;
2026-06-03 08:18:19 +03:30
[Authorize(Roles = "Admin")]
2026-06-03 01:43:55 +03:30
public class IndexModel : PageModel
{
private readonly AppDbContext _db ;
2026-06-03 08:18:19 +03:30
private readonly IngestionService _ingest ;
2026-06-20 14:24:20 +03:30
private readonly IServiceScopeFactory _scopes ;
private readonly ILogger < IndexModel > _log ;
2026-06-03 08:18:19 +03:30
2026-06-20 14:24:20 +03:30
public IndexModel ( AppDbContext db , IngestionService ingest , IServiceScopeFactory scopes , ILogger < IndexModel > log )
2026-06-03 08:18:19 +03:30
{
_db = db ;
_ingest = ingest ;
2026-06-20 14:24:20 +03:30
_scopes = scopes ;
_log = log ;
2026-06-03 08:18:19 +03:30
}
2026-06-03 01:43:55 +03:30
public List < RawListing > Queue { get ; private set ; } = new ( ) ;
2026-06-03 08:18:19 +03:30
public List < RawListing > Flagged { get ; private set ; } = new ( ) ;
2026-06-04 00:44:11 +03:30
public IReadOnlyList < string > SourceNames { get ; private set ; } = new List < string > ( ) ;
2026-06-03 01:43:55 +03:30
public int PublishedShifts { get ; private set ; }
public int PublishedJobs { get ; private set ; }
2026-06-08 06:23:58 +03:30
public List < IngestionRun > Runs { get ; private set ; } = new ( ) ;
2026-06-03 01:43:55 +03:30
[BindProperty] public string? SourceChannel { get ; set ; }
[BindProperty] public string? RawText { get ; set ; }
2026-06-03 08:18:19 +03:30
[TempData] public string? IngestMessage { get ; set ; }
2026-06-03 01:43:55 +03:30
public async Task OnGetAsync ( ) = > await LoadAsync ( ) ;
public async Task < IActionResult > OnPostAddAsync ( )
{
if ( ! string . IsNullOrWhiteSpace ( RawText ) )
{
_db . RawListings . Add ( new RawListing
{
SourceChannel = string . IsNullOrWhiteSpace ( SourceChannel ) ? "ورود دستی" : SourceChannel . Trim ( ) ,
RawText = RawText . Trim ( ) ,
Status = RawListingStatus . New ,
} ) ;
await _db . SaveChangesAsync ( ) ;
}
return RedirectToPage ( ) ;
}
2026-06-08 06:41:17 +03:30
/// <summary>Fast triage — reject (discard) a queued/flagged item without opening the review page.</summary>
public async Task < IActionResult > OnPostQuickDiscardAsync ( int id )
{
var raw = await _db . RawListings . FirstOrDefaultAsync ( r = > r . Id = = id ) ;
if ( raw is not null ) { raw . Status = RawListingStatus . Discarded ; await _db . SaveChangesAsync ( ) ; }
return RedirectToPage ( ) ;
}
2026-06-03 08:18:19 +03:30
public async Task < IActionResult > OnPostRunIngestionAsync ( )
{
var s = await _ingest . RunAsync ( ) ;
IngestMessage = $"جمعآوری انجام شد — {s.TotalQueued} در صف، {s.TotalFlagged} پرچمخورده، " +
$"{s.TotalSpam} اسپم، {s.TotalDuplicates} تکراری." ;
return RedirectToPage ( ) ;
}
2026-06-09 21:38:55 +03:30
/// <summary>
/// DESTRUCTIVE rebuild, in two distinct deletes:
/// 1. The DEDUPE CACHE — ALL RawListings, including any added via «افزودن دستی». These are not
/// published content; they're the crawl/staging rows whose ContentHash blocks re-ingesting
/// the same ad. Wiping them lets everything be re-fetched and re-judged by the AI.
/// 2. AGGREGATED listings only — Shifts/JobOpenings/TalentListings with Source==Aggregated, i.e.
/// produced by ingestion. Employer/admin-posted listings (Source==Direct) are left untouched.
/// Then re-fetch everything and re-run it through the (now AI-enabled) pipeline.
/// RawListings are deleted first so their LinkedShift/LinkedTalent FKs (SetNull) don't dangle;
/// DB cascade clears ContactMethods / Applications / InterestEvents when the posts are deleted.
/// </summary>
public async Task < IActionResult > OnPostPurgeAndReingestAsync ( )
{
int rawCount , shifts , jobs , talent ;
await using ( var tx = await _db . Database . BeginTransactionAsync ( ) )
{
rawCount = await _db . RawListings . ExecuteDeleteAsync ( ) ; // clear dedupe cache
shifts = await _db . Shifts . Where ( s = > s . Source = = ShiftSource . Aggregated ) . ExecuteDeleteAsync ( ) ;
jobs = await _db . JobOpenings . Where ( j = > j . Source = = ShiftSource . Aggregated ) . ExecuteDeleteAsync ( ) ;
talent = await _db . TalentListings . Where ( t = > t . Source = = ShiftSource . Aggregated ) . ExecuteDeleteAsync ( ) ;
await tx . CommitAsync ( ) ;
}
var s = await _ingest . RunAsync ( ) ; // fresh fetch → AI audit → publish/queue
IngestMessage = $"پاکسازی شد (حذف: {rawCount} آیتم کش، {shifts} شیفت، {jobs} استخدام، {talent} آمادهبهکارِ جمعآوریشده). " +
$"جمعآوری مجدد: {s.TotalPublished} منتشر، {s.TotalQueued} در صف، {s.TotalFlagged} پرچم، {s.TotalSpam} اسپم، {s.TotalDuplicates} تکراری." ;
return RedirectToPage ( ) ;
}
2026-06-20 14:24:20 +03:30
/// <summary>
/// Clean up EXISTING aggregated content by re-running the current pipeline over the stored raw
/// text — no re-fetch, so nothing is lost to sources only exposing recent posts. Long-running
/// (one AI call per item), so it runs on a background scope and returns immediately; the result
/// shows up as a new row in the «تاریخچهٔ اجرا» log when it finishes.
/// </summary>
public IActionResult OnPostReprocessStored ( )
{
_ = Task . Run ( async ( ) = >
{
using var scope = _scopes . CreateScope ( ) ;
var svc = scope . ServiceProvider . GetRequiredService < IngestionService > ( ) ;
var log = scope . ServiceProvider . GetRequiredService < ILogger < IndexModel > > ( ) ;
2026-06-20 16:08:20 +03:30
// talentOnly: «آماده به کار» is NoIndex/Disallow → rebuilding it doesn't churn any indexed
// URL. Shift/Job detail pages ARE indexed, so they're left to self-clean via turnover.
try { await svc . ReprocessAsync ( talentOnly : true ) ; }
2026-06-20 14:24:20 +03:30
catch ( Exception ex ) { log . LogError ( ex , "Background reprocess failed" ) ; }
} ) ;
IngestMessage = "پردازش مجدد آیتمهای ذخیرهشده در پسزمینه آغاز شد. نتیجه پس از اتمام در «تاریخچهٔ اجرا» نمایش داده میشود (بسته به تعداد آیتمها و سرعت هوش مصنوعی، چند دقیقه طول میکشد)." ;
return RedirectToPage ( ) ;
}
2026-06-21 05:09:39 +03:30
/// <summary>
/// Fill missing map coordinates on existing aggregated Tehran listings from their stored ad text
/// (TehranGeo). In place — no AI calls, no re-fetch, and crucially no delete/recreate, so indexed
/// shift/job URLs keep their IDs. Fast (pure DB + string matching), so it runs inline.
/// </summary>
public async Task < IActionResult > OnPostBackfillCoordsAsync ( )
{
var n = await _ingest . BackfillCoordsAsync ( ) ;
IngestMessage = $"مختصات تقریبی برای {n} آگهی جمعآوریشده از روی متن آگهی تکمیل شد (بدون تغییر شناسه یا آدرس صفحه)." ;
return RedirectToPage ( ) ;
}
/// <summary>
2026-06-21 05:25:51 +03:30
/// In-place cleanup of existing aggregated jobs/shifts: ARCHIVE (hide, keep the row) only the
/// out-of-scope ones (domestic-helper / promotional / spam) per the current validator, plus
/// near-duplicate job reposts. Archived pages drop from lists + sitemap and return 410 Gone.
/// Valid listings keep their IDs/URLs. Reversible, no re-fetch, no AI — runs inline.
2026-06-21 05:09:39 +03:30
/// </summary>
public async Task < IActionResult > OnPostPurgeInvalidAsync ( )
{
2026-06-21 05:25:51 +03:30
var ( archived , deduped ) = await _ingest . PurgeInvalidAggregatedAsync ( ) ;
IngestMessage = $"بایگانیِ درجا: {archived} آگهیِ خارج از حوزه (خدمات منزل/تبلیغاتی/اسپم) و {deduped} استخدامِ تکراری از سایت پنهان شد (وضعیت «بایگانی»؛ ردیف نگه داشته شد و قابل بازگشت است؛ صفحهشان ۴۱۰ Gone میدهد). آگهیهای معتبر و شناسه/آدرسشان دستنخورده ماند." ;
2026-06-21 05:09:39 +03:30
return RedirectToPage ( ) ;
}
2026-06-03 01:43:55 +03:30
private async Task LoadAsync ( )
{
Queue = await _db . RawListings
. Where ( r = > r . Status = = RawListingStatus . New )
2026-06-03 08:18:19 +03:30
. OrderByDescending ( r = > r . Confidence ) . ThenByDescending ( r = > r . FetchedAt ) . ToListAsync ( ) ;
Flagged = await _db . RawListings
. Where ( r = > r . Status = = RawListingStatus . Flagged )
2026-06-03 01:43:55 +03:30
. OrderByDescending ( r = > r . FetchedAt ) . ToListAsync ( ) ;
2026-06-04 00:44:11 +03:30
SourceNames = _ingest . SourceNames ;
2026-06-03 01:43:55 +03:30
PublishedShifts = await _db . Shifts . CountAsync ( s = > s . Source ! = ShiftSource . Direct ) ;
PublishedJobs = await _db . JobOpenings . CountAsync ( ) ;
2026-06-08 06:23:58 +03:30
Runs = await _db . IngestionRuns . OrderByDescending ( r = > r . RunAt ) . Take ( 15 ) . ToListAsync ( ) ;
2026-06-03 01:43:55 +03:30
}
}