Reject filler/verb words as applicant names
The person-name extractor was grabbing the word after a title even when it was a verb/filler/ availability/role word, producing garbage headings like «خانم هستم»، «دکتر ام»، «دکتر داروساز آماده». Stop collecting at a NameNoise word (هستم/ام/آماده/جویای/role words…), so a real name («دکتر سپیده علیزاده») still works but these fall back to the role heading. New ingests only; existing rebuild via the talent reprocess button. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
@@ -236,6 +236,16 @@ public class HeuristicListingParser : IListingParser
|
||||
// Titles that introduce a person's name in «آماده به کار» posts.
|
||||
private static readonly string[] PersonTitles = { "دکتر", "خانم دکتر", "آقای دکتر", "مهندس", "سرکار خانم", "جناب آقای", "خانم", "آقای" };
|
||||
|
||||
// Words that are NOT a person's name — verbs/fillers/availability/role words the extractor was
|
||||
// grabbing after a title («خانم هستم»، «دکتر ام»، «دکتر داروساز آماده»). Stop collecting at one.
|
||||
private static readonly string[] NameNoise =
|
||||
{
|
||||
"هستم", "هستیم", "هستش", "ام", "بودم", "میباشم", "میباشد", "باشم", "آماده", "آمادهام",
|
||||
"جویای", "بکار", "بهکار", "کار", "همکاری", "نیازمند", "استخدام", "جذب", "عزیز", "محترم",
|
||||
"گرامی", "خانم", "آقا", "اقا", "دکتر", "پزشک", "پرستار", "بهیار", "ماما", "دندانپزشک",
|
||||
"داروساز", "تکنسین", "کارشناس", "متخصص", "عمومی", "مراقب", "کمک",
|
||||
};
|
||||
|
||||
/// <summary>Best-effort person name: a title (دکتر/خانم/…) plus up to two following words.</summary>
|
||||
private static string? ExtractPersonName(string text)
|
||||
{
|
||||
@@ -251,6 +261,7 @@ public class HeuristicListingParser : IListingParser
|
||||
foreach (var w in words)
|
||||
{
|
||||
if (NameStops.Contains(w)) break;
|
||||
if (NameNoise.Any(n => Normalize(n) == Normalize(w))) break; // «خانم هستم»/«دکتر ام»…
|
||||
if (Regex.IsMatch(w, @"[\d]")) break;
|
||||
if (w.Length == 1) break;
|
||||
picked.Add(w);
|
||||
|
||||
Reference in New Issue
Block a user