Add scrape/ingestion engine + validation, and 24h shift hour-range visualization
Scrape engine (Services/Scraping/): pluggable IListingSource (working sample + Telegram/Divar credential-ready stubs) → IngestionService (content-hash dedupe → parse → validate → review queue) → ListingValidator (completeness score + spam screen) → IngestionWorker (config-gated hosted service). RawListing gains ContentHash/Confidence/ValidationNotes; RawListingStatus.Flagged. Admin /Admin gets run-now, source list, confidence + flagged queue. Hour-range viz: _HourBar 24h timeline bar (colored by type, overnight wrap) on shift cards, recommendation cards, and detail. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
@@ -55,9 +55,10 @@ public enum ApplicationStatus
|
||||
|
||||
public enum RawListingStatus
|
||||
{
|
||||
New = 0, // جدید
|
||||
Normalized = 1, // تبدیل شده به شیفت
|
||||
Discarded = 2 // کنار گذاشته شده
|
||||
New = 0, // جدید (آماده بررسی)
|
||||
Normalized = 1, // تبدیل شده به شیفت/استخدام
|
||||
Discarded = 2, // کنار گذاشته شده (یا اسپم)
|
||||
Flagged = 3 // ناقص/مشکوک — نیازمند بررسی دستی بیشتر
|
||||
}
|
||||
|
||||
public enum EmploymentType
|
||||
|
||||
@@ -27,5 +27,16 @@ public class RawListing
|
||||
[MaxLength(500)]
|
||||
public string? SourceUrl { get; set; }
|
||||
|
||||
/// <summary>SHA-256 of the normalized text — used to dedupe across ingestion runs.</summary>
|
||||
[MaxLength(64)]
|
||||
public string? ContentHash { get; set; }
|
||||
|
||||
/// <summary>Parser+validator confidence 0–100 (how complete/usable the listing looks).</summary>
|
||||
public int Confidence { get; set; }
|
||||
|
||||
/// <summary>Human-readable validation findings (missing fields, spam flags, etc.).</summary>
|
||||
[MaxLength(1000)]
|
||||
public string? ValidationNotes { get; set; }
|
||||
|
||||
public DateTime FetchedAt { get; set; } = DateTime.UtcNow;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user