diff --git a/src/JobsMedical.Web/Services/ListingParser.cs b/src/JobsMedical.Web/Services/ListingParser.cs index ea067e2..cd73128 100644 --- a/src/JobsMedical.Web/Services/ListingParser.cs +++ b/src/JobsMedical.Web/Services/ListingParser.cs @@ -236,6 +236,16 @@ public class HeuristicListingParser : IListingParser // Titles that introduce a person's name in «آماده به کار» posts. private static readonly string[] PersonTitles = { "دکتر", "خانم دکتر", "آقای دکتر", "مهندس", "سرکار خانم", "جناب آقای", "خانم", "آقای" }; + // Words that are NOT a person's name — verbs/fillers/availability/role words the extractor was + // grabbing after a title («خانم هستم»، «دکتر ام»، «دکتر داروساز آماده»). Stop collecting at one. + private static readonly string[] NameNoise = + { + "هستم", "هستیم", "هستش", "ام", "بودم", "میباشم", "میباشد", "باشم", "آماده", "آماده‌ام", + "جویای", "بکار", "به‌کار", "کار", "همکاری", "نیازمند", "استخدام", "جذب", "عزیز", "محترم", + "گرامی", "خانم", "آقا", "اقا", "دکتر", "پزشک", "پرستار", "بهیار", "ماما", "دندانپزشک", + "داروساز", "تکنسین", "کارشناس", "متخصص", "عمومی", "مراقب", "کمک", + }; + /// Best-effort person name: a title (دکتر/خانم/…) plus up to two following words. private static string? ExtractPersonName(string text) { @@ -251,6 +261,7 @@ public class HeuristicListingParser : IListingParser foreach (var w in words) { if (NameStops.Contains(w)) break; + if (NameNoise.Any(n => Normalize(n) == Normalize(w))) break; // «خانم هستم»/«دکتر ام»… if (Regex.IsMatch(w, @"[\d]")) break; if (w.Length == 1) break; picked.Add(w);