Reprocess: SEO-safe applicants-only default (don't churn indexed shift/job URLs)
Reprocess deletes+rebuilds aggregated listings, which changes their IDs. Shift/Job detail pages are indexed and in the sitemap, so churning them would 404 ranked URLs. «آماده به کار» pages are NoIndex + Disallow, so rebuilding them has zero SEO impact — and that's where all the duplicate/sprawl problems were. ReprocessAsync(talentOnly: true) now only deletes/rebuilds TalentListings and skips non-talent raws (leaving shift/job listings + their RawListing links untouched). Admin button relabelled «پردازش مجددِ آماده به کارها (امن برای SEO)». Shifts/jobs self-clean via normal ingestion turnover. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
@@ -176,7 +176,11 @@ public class IngestionService
|
||||
/// Deletes the old aggregated posts, then republishes from the stored raw text. Long-running
|
||||
/// (one AI call per item) — call it on a background scope, not inside a request.
|
||||
/// </summary>
|
||||
public async Task<IngestionSummary> ReprocessAsync(CancellationToken ct = default)
|
||||
/// <param name="talentOnly">SEO-safe default: only «آماده به کار» (which is NoIndex/Disallow) is
|
||||
/// deleted & rebuilt, so no INDEXED url changes. Shift/Job detail pages are indexed + in the
|
||||
/// sitemap, so churning their IDs would 404 ranked pages — instead they self-clean via turnover.
|
||||
/// Pass false only when you accept that SEO hit.</param>
|
||||
public async Task<IngestionSummary> ReprocessAsync(bool talentOnly = true, CancellationToken ct = default)
|
||||
{
|
||||
var settings = await _settings.GetAsync();
|
||||
var roles = await _db.Roles.ToListAsync(ct);
|
||||
@@ -189,19 +193,28 @@ public class IngestionService
|
||||
|
||||
// Drop previously-published aggregated content; it's regenerated below from the raw text.
|
||||
// DB cascade clears their ContactMethods/Applications/InterestEvents; RawListing back-refs SetNull.
|
||||
await _db.Shifts.Where(s => s.Source == ShiftSource.Aggregated).ExecuteDeleteAsync(ct);
|
||||
await _db.JobOpenings.Where(j => j.Source == ShiftSource.Aggregated).ExecuteDeleteAsync(ct);
|
||||
await _db.TalentListings.Where(t => t.Source == ShiftSource.Aggregated).ExecuteDeleteAsync(ct);
|
||||
if (!talentOnly)
|
||||
{
|
||||
await _db.Shifts.Where(s => s.Source == ShiftSource.Aggregated).ExecuteDeleteAsync(ct);
|
||||
await _db.JobOpenings.Where(j => j.Source == ShiftSource.Aggregated).ExecuteDeleteAsync(ct);
|
||||
}
|
||||
|
||||
int fetched = 0, queued = 0, published = 0, flagged = 0, spam = 0;
|
||||
var raws = await _db.RawListings.OrderBy(r => r.Id).ToListAsync(ct);
|
||||
foreach (var raw in raws)
|
||||
{
|
||||
ct.ThrowIfCancellationRequested();
|
||||
fetched++;
|
||||
raw.LinkedShiftId = null; raw.LinkedTalentId = null; // old links were just deleted
|
||||
|
||||
var parsed = _parser.Parse(raw.RawText, roleNames, cityNames, districtNames);
|
||||
|
||||
// SEO-safe scope: in talent-only mode, leave indexed shift/job listings (and their
|
||||
// RawListing links/status) completely untouched — only applicants are rebuilt.
|
||||
if (talentOnly && parsed.Kind != ListingKind.Talent) continue;
|
||||
|
||||
fetched++;
|
||||
raw.LinkedTalentId = null; // talent rows were just deleted
|
||||
if (!talentOnly) raw.LinkedShiftId = null;
|
||||
|
||||
var val = _validator.Validate(raw.RawText, parsed);
|
||||
|
||||
// Stale-applicant filter — age from the Persian "time ago" phrase in the text (Divar).
|
||||
|
||||
Reference in New Issue
Block a user