ignore email parts

This commit is contained in:
jetsparrow 2019-08-19 21:58:36 +03:00
parent ed5be399da
commit a66d5d733c
3 changed files with 12 additions and 0 deletions

View File

@ -78,5 +78,13 @@ namespace AntiAntiSwearingBot.Tests
[InlineData("#BalıkBurcuKızıylaEvlenmek")] [InlineData("#BalıkBurcuKızıylaEvlenmek")]
public void IgnoreHashtags(string text) => Assert.Null(ubl.UnbleepSwears(text)); public void IgnoreHashtags(string text) => Assert.Null(ubl.UnbleepSwears(text));
[Theory]
[InlineData("ipetrov@mail.ru")]
[InlineData("ipetrov@русская.mail.ru")]
[InlineData("ипетров@почта.рф")]
[InlineData("admin@local")]
[InlineData("админ@local")]
public void IgnoreEmails(string text) => Assert.Null(ubl.UnbleepSwears(text));
} }
} }

View File

@ -57,11 +57,14 @@ namespace AntiAntiSwearingBot
=> a == b || !char.IsLetterOrDigit(a) || !char.IsLetterOrDigit(b); => a == b || !char.IsLetterOrDigit(a) || !char.IsLetterOrDigit(b);
static readonly Regex MentionRegex = new Regex("^@[a-zA-Z0-9_]+$", RegexOptions.Compiled); static readonly Regex MentionRegex = new Regex("^@[a-zA-Z0-9_]+$", RegexOptions.Compiled);
static readonly Regex EmailPartRegex = new Regex("^\\w+@\\w+$", RegexOptions.Compiled);
static readonly Regex HashTagRegex = new Regex("^#\\w+$", RegexOptions.Compiled); static readonly Regex HashTagRegex = new Regex("^#\\w+$", RegexOptions.Compiled);
public static bool IsTelegramMention(string word) => MentionRegex.IsMatch(word); public static bool IsTelegramMention(string word) => MentionRegex.IsMatch(word);
public static bool IsEmailPart(string word) => EmailPartRegex.IsMatch(word);
public static bool IsHashTag(string word) => HashTagRegex.IsMatch(word); public static bool IsHashTag(string word) => HashTagRegex.IsMatch(word);
public static bool HasNonWordChars(string arg) => arg.Any(c => !char.IsLetterOrDigit(c)); public static bool HasNonWordChars(string arg) => arg.Any(c => !char.IsLetterOrDigit(c));

View File

@ -36,6 +36,7 @@ namespace AntiAntiSwearingBot
var candidates = words var candidates = words
.Where(w => .Where(w =>
!Language.IsTelegramMention(w) !Language.IsTelegramMention(w)
&& !Language.IsEmailPart(w)
&& Language.HasNonWordChars(w) && Language.HasNonWordChars(w)
&& !Language.IsHashTag(w) && !Language.IsHashTag(w)
&& (Language.HasWordChars(w) || w.Length >= Cfg.MinAmbiguousWordLength) && (Language.HasWordChars(w) || w.Length >= Cfg.MinAmbiguousWordLength)