diff --git a/AntiAntiSwearingBot.Tests/AntiAntiSwearingBot.Tests.csproj b/AntiAntiSwearingBot.Tests/AntiAntiSwearingBot.Tests.csproj
new file mode 100644
index 0000000..d6f5994
--- /dev/null
+++ b/AntiAntiSwearingBot.Tests/AntiAntiSwearingBot.Tests.csproj
@@ -0,0 +1,19 @@
+
+
+
+ netcoreapp2.1
+
+ false
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/AntiAntiSwearingBot.Tests/FilterTests.cs b/AntiAntiSwearingBot.Tests/FilterTests.cs
new file mode 100644
index 0000000..cb95e09
--- /dev/null
+++ b/AntiAntiSwearingBot.Tests/FilterTests.cs
@@ -0,0 +1,74 @@
+using System;
+using Xunit;
+
+namespace AntiAntiSwearingBot.Tests
+{
+ public class FilterTests
+ {
+ Unbleeper ubl { get; }
+ public FilterTests()
+ {
+ var cfg = Config.Load("aasb.cfg.json", "aasb.cfg.secret.json");
+ var dict = new SearchDictionary(cfg);
+ ubl = new Unbleeper(dict, cfg.Unbleeper);
+ }
+
+ [Theory]
+ [InlineData("бл**ь", "*блядь")]
+ [InlineData("ж**а", "*жопа")]
+ public void UnbleepSimpleSwears(string word, string expected)
+ {
+ var unbleep = ubl.UnbleepSwears(word).TrimEnd(Environment.NewLine.ToCharArray());
+ Assert.Equal(expected, unbleep);
+ }
+
+ [Theory]
+ [InlineData("*")]
+ [InlineData("**#")]
+ [InlineData("@**#")]
+ public void IgnoreShortGrawlixes(string text) => Assert.Null(ubl.UnbleepSwears(text));
+
+ [Theory]
+ [InlineData("@pvkuznetsov https://github.com/jacksondunstan/UnityNativeScripting")]
+ [InlineData("@JohnnyMnemonic")]
+ [InlineData("@Artyom по поводу")]
+ [InlineData("@Laima прошу блины!")]
+ [InlineData("эй админ @harry0xfefecaca верни бота")]
+ public void IgnoreMentions(string text) => Assert.Null(ubl.UnbleepSwears(text));
+
+ [Theory]
+ [InlineData("x - floor(abs(x)) * sign(x) -- вроде такая формула для frac(x)")]
+ public void IgnoresWeirdShit(string text) => Assert.Null(ubl.UnbleepSwears(text));
+
+ [Theory]
+ [InlineData("/poll")]
+ [InlineData("/roll 2d6")]
+ [InlineData("/award medal")]
+ [InlineData("/status@MinecraftServerBot")]
+ [InlineData("/broadcast@MinecraftServerBot пи#*ец вы понастроили тут")]
+ [InlineData("/ban@MinecraftServerBot @dirty_johnny86")]
+ public void IgnoreCommands(string text) => Assert.Null(ubl.UnbleepSwears(text));
+
+ [Theory]
+ [InlineData("#UEeğitimKarazin")]
+ [InlineData("#KöksalBabaCafeTrabzonda")]
+ [InlineData("#ZehraHanımSüresizeKadro")]
+ [InlineData("#define")]
+ [InlineData("#ifndef")]
+ [InlineData("#trashtag")]
+ [InlineData("#MeToo")]
+ [InlineData("#инстаграм")]
+ [InlineData("#битваБлогеров")]
+ [InlineData("#зенитахмат")]
+ [InlineData("#HappyKWONJIYONGDay")]
+ [InlineData("#MCITOT")]
+ [InlineData("#ТамбовКраснодар")]
+ [InlineData("#JRockконвент2019")]
+ [InlineData("#DonaldTrumpAgain")]
+ [InlineData("#ZodiacKillerStrikesAgain")]
+ [InlineData("#ThanksObama")]
+ [InlineData("#BalıkBurcuKızıylaEvlenmek")]
+ public void IgnoreHashtags(string text) => Assert.Null(ubl.UnbleepSwears(text));
+
+ }
+}
diff --git a/AntiAntiSwearingBot.sln b/AntiAntiSwearingBot.sln
index 7ea08cc..886cca0 100644
--- a/AntiAntiSwearingBot.sln
+++ b/AntiAntiSwearingBot.sln
@@ -3,7 +3,9 @@ Microsoft Visual Studio Solution File, Format Version 12.00
# Visual Studio 15
VisualStudioVersion = 15.0.28010.2036
MinimumVisualStudioVersion = 10.0.40219.1
-Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "AntiAntiSwearingBot", "AntiAntiSwearingBot\AntiAntiSwearingBot.csproj", "{66AFFD7B-5B2D-4C85-8523-770702255511}"
+Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "AntiAntiSwearingBot", "AntiAntiSwearingBot\AntiAntiSwearingBot.csproj", "{66AFFD7B-5B2D-4C85-8523-770702255511}"
+EndProject
+Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "AntiAntiSwearingBot.Tests", "AntiAntiSwearingBot.Tests\AntiAntiSwearingBot.Tests.csproj", "{AA3CB2CB-05F1-46C4-8710-2702BD663A8B}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
@@ -15,6 +17,10 @@ Global
{66AFFD7B-5B2D-4C85-8523-770702255511}.Debug|Any CPU.Build.0 = Debug|Any CPU
{66AFFD7B-5B2D-4C85-8523-770702255511}.Release|Any CPU.ActiveCfg = Release|Any CPU
{66AFFD7B-5B2D-4C85-8523-770702255511}.Release|Any CPU.Build.0 = Release|Any CPU
+ {AA3CB2CB-05F1-46C4-8710-2702BD663A8B}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+ {AA3CB2CB-05F1-46C4-8710-2702BD663A8B}.Debug|Any CPU.Build.0 = Debug|Any CPU
+ {AA3CB2CB-05F1-46C4-8710-2702BD663A8B}.Release|Any CPU.ActiveCfg = Release|Any CPU
+ {AA3CB2CB-05F1-46C4-8710-2702BD663A8B}.Release|Any CPU.Build.0 = Release|Any CPU
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
diff --git a/AntiAntiSwearingBot/AntiAntiSwearingBot.cs b/AntiAntiSwearingBot/AntiAntiSwearingBot.cs
index 5dbfe7b..89d98d6 100644
--- a/AntiAntiSwearingBot/AntiAntiSwearingBot.cs
+++ b/AntiAntiSwearingBot/AntiAntiSwearingBot.cs
@@ -16,14 +16,13 @@ namespace AntiAntiSwearingBot
{
Config Config { get; }
SearchDictionary Dict { get; }
+ Unbleeper Unbleeper { get; }
public AntiAntiSwearingBot(Config cfg, SearchDictionary dict)
{
Config = cfg;
Dict = dict;
- BleepedSwearsRegex = new Regex(cfg.BleepedSwearsRegex, RegexOptions.Compiled);
- NonWordRegex = new Regex("\\W", RegexOptions.Compiled);
- MentionRegex = new Regex("@[a-zA-Z0-9_]+", RegexOptions.Compiled);
+ Unbleeper = new Unbleeper(dict, cfg.Unbleeper);
}
TelegramBotClient Client { get; set; }
@@ -49,41 +48,11 @@ namespace AntiAntiSwearingBot
public async Task Stop()
{
- Dict.Save();
Dispose();
}
#region service
- Regex BleepedSwearsRegex { get; }
- Regex NonWordRegex { get; }
- Regex MentionRegex { get; }
-
- string UnbleepSwears(string text)
- {
- if (string.IsNullOrWhiteSpace(text))
- return null;
-
- var words = BleepedSwearsRegex.Matches(text)
- .Select(m => m.Value)
- .Where(m => NonWordRegex.IsMatch(m))
- .Where(m => !MentionRegex.IsMatch(m))
- .ToArray();
-
- if (words.Any())
- {
- var response = new StringBuilder();
- for (int i = 0; i < words.Length; ++i)
- {
- var m = Dict.Match(words[i]);
- response.AppendLine(new string('*', i + 1) + m.Word + new string('?', m.Distance));
- }
- return response.ToString();
- }
- else
- return null;
- }
-
void BotOnMessageReceived(object sender, MessageEventArgs args)
{
var msg = args.Message;
@@ -104,7 +73,7 @@ namespace AntiAntiSwearingBot
}
else
{
- var unbleepResponse = UnbleepSwears(msg.Text);
+ var unbleepResponse = Unbleeper.UnbleepSwears(msg.Text);
if (unbleepResponse != null)
Client.SendTextMessageAsync(
args.Message.Chat.Id,
diff --git a/AntiAntiSwearingBot/CommandRouter.cs b/AntiAntiSwearingBot/CommandRouter.cs
index eae4a84..800a8de 100644
--- a/AntiAntiSwearingBot/CommandRouter.cs
+++ b/AntiAntiSwearingBot/CommandRouter.cs
@@ -29,10 +29,7 @@ namespace AntiAntiSwearingBot
if (cmd.UserName != null && cmd.UserName != Username)
return null;
if (Commands.ContainsKey(cmd.Command))
- {
- try { return Commands[cmd.Command].Execute(cmd, args); }
- catch { }
- }
+ return Commands[cmd.Command].Execute(cmd, args);
}
return null;
}
diff --git a/AntiAntiSwearingBot/Config.cs b/AntiAntiSwearingBot/Config.cs
index bb86862..c8f4f03 100644
--- a/AntiAntiSwearingBot/Config.cs
+++ b/AntiAntiSwearingBot/Config.cs
@@ -3,30 +3,33 @@
public class Config : ConfigBase
{
public string ApiKey { get; private set; }
-
- public string BleepedSwearsRegex { get; private set; }
-
- public struct ProxySettings
- {
- public string Url { get; private set; }
- public int Port { get; private set; }
- public string Login { get; private set; }
- public string Password { get; private set; }
- }
-
public ProxySettings Proxy { get; private set; }
-
- public struct SearchDictionarySettings
- {
- public string DictionaryPath { get; private set; }
-
- public double LearnNudgeFactor { get; private set; }
- public double LearnInitialRating { get; private set; }
- public int MinUnlearnNudge { get; private set; }
- public double UnlearnNudgeFactor { get; private set; }
- }
-
public SearchDictionarySettings SearchDictionary { get; private set; }
+ public UnbleeperSettings Unbleeper { get; private set; }
}
+
+ public struct UnbleeperSettings
+ {
+ public string BleepedSwearsRegex { get; private set; }
+ }
+
+ public struct SearchDictionarySettings
+ {
+ public string DictionaryPath { get; private set; }
+
+ public double LearnNudgeFactor { get; private set; }
+ public double LearnInitialRating { get; private set; }
+ public int MinUnlearnNudge { get; private set; }
+ public double UnlearnNudgeFactor { get; private set; }
+ }
+
+ public struct ProxySettings
+ {
+ public string Url { get; private set; }
+ public int Port { get; private set; }
+ public string Login { get; private set; }
+ public string Password { get; private set; }
+ }
+
}
diff --git a/AntiAntiSwearingBot/Language.cs b/AntiAntiSwearingBot/Language.cs
index 76e6b59..b4cf7fe 100644
--- a/AntiAntiSwearingBot/Language.cs
+++ b/AntiAntiSwearingBot/Language.cs
@@ -2,11 +2,14 @@
using System.Collections.Generic;
using System.Linq;
using System.Text;
+using System.Text.RegularExpressions;
namespace AntiAntiSwearingBot
{
- static class Language
+ public static class Language
{
+ static int min(int a, int b, int c) { return Math.Min(Math.Min(a, b), c); }
+
public static int HammingDistance(string a, string b)
{
if (string.IsNullOrEmpty(a))
@@ -26,8 +29,6 @@ namespace AntiAntiSwearingBot
return leftover + dist;
}
- static int min(int a, int b, int c) { return Math.Min(Math.Min(a, b), c); }
-
public static int LevenshteinDistance(string a, string b)
{
int[] prevRow = new int[b.Length + 1];
@@ -55,46 +56,18 @@ namespace AntiAntiSwearingBot
public static bool CharMatch(char a, char b)
=> a == b || !char.IsLetterOrDigit(a) || !char.IsLetterOrDigit(b);
- ///
- /// Compute the distance between two strings.
- ///
- public static int Compute(string s, string t)
- {
- int n = s.Length;
- int m = t.Length;
- int[,] d = new int[n + 1, m + 1];
+ static readonly Regex MentionRegex = new Regex("^@[a-zA-Z0-9_]+$", RegexOptions.Compiled);
- if (n == 0)
- return m;
- if (m == 0)
- return n;
+ static readonly Regex HashTagRegex = new Regex("^#\\w+$", RegexOptions.Compiled);
- // Step 2
- for (int i = 0; i <= n; d[i, 0] = i++)
- {
- }
+ public static bool IsTelegramMention(string word) => MentionRegex.IsMatch(word);
- for (int j = 0; j <= m; d[0, j] = j++)
- {
- }
+ public static bool IsHashTag(string word) => HashTagRegex.IsMatch(word);
+
+ public static bool HasNonWordChars(string arg) => arg.Any(c => !char.IsLetterOrDigit(c));
+
+ public static bool HasWordChars(string arg) => arg.Any(char.IsLetter);
- // Step 3
- for (int i = 1; i <= n; i++)
- {
- //Step 4
- for (int j = 1; j <= m; j++)
- {
- // Step 5
- int cost = (t[j - 1] == s[i - 1]) ? 0 : 1;
- // Step 6
- d[i, j] = Math.Min(
- Math.Min(d[i - 1, j] + 1, d[i, j - 1] + 1),
- d[i - 1, j - 1] + cost);
- }
- }
- // Step 7
- return d[n, m];
- }
}
}
diff --git a/AntiAntiSwearingBot/Program.cs b/AntiAntiSwearingBot/Program.cs
index 622d6d0..dc86087 100644
--- a/AntiAntiSwearingBot/Program.cs
+++ b/AntiAntiSwearingBot/Program.cs
@@ -26,6 +26,7 @@ namespace AntiAntiSwearingBot
Console.ReadKey();
Console.WriteLine("Waiting for exit...");
bot.Stop().Wait();
+ dict.Save();
return (int)ExitCode.Ok;
}
catch (Exception ex)
diff --git a/AntiAntiSwearingBot/SearchDictionary.cs b/AntiAntiSwearingBot/SearchDictionary.cs
index 76eaca8..b4be9a1 100644
--- a/AntiAntiSwearingBot/SearchDictionary.cs
+++ b/AntiAntiSwearingBot/SearchDictionary.cs
@@ -11,6 +11,8 @@ namespace AntiAntiSwearingBot
{
var s = cfg.SearchDictionary;
path = s.DictionaryPath;
+ tmppath = path + ".tmp";
+
learnInitialRating = Math.Clamp(s.LearnInitialRating, 0,1);
learnNudgeFactor = Math.Clamp(s.LearnNudgeFactor, 0, 1);
unlearnNudgeFactor = Math.Clamp(s.UnlearnNudgeFactor, 0, 1);
@@ -21,8 +23,12 @@ namespace AntiAntiSwearingBot
public void Save()
{
- File.WriteAllLines(path + ".tmp", words);
- File.Move(path + ".tmp", path);
+ if (File.Exists(tmppath))
+ File.Delete(tmppath);
+ File.WriteAllLines(tmppath, words);
+ if (File.Exists(path))
+ File.Delete(path);
+ File.Move(tmppath, path);
}
public struct WordMatch
@@ -93,7 +99,7 @@ namespace AntiAntiSwearingBot
#region service
- string path;
+ readonly string path, tmppath;
double learnInitialRating = 0.75;
double learnNudgeFactor = 0.5;
diff --git a/AntiAntiSwearingBot/Unbleeper.cs b/AntiAntiSwearingBot/Unbleeper.cs
new file mode 100644
index 0000000..239e53e
--- /dev/null
+++ b/AntiAntiSwearingBot/Unbleeper.cs
@@ -0,0 +1,59 @@
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+using System.Text.RegularExpressions;
+
+namespace AntiAntiSwearingBot
+{
+ public class Unbleeper
+ {
+ SearchDictionary Dict { get; }
+ UnbleeperSettings Cfg { get; }
+
+ public Unbleeper(SearchDictionary dict, UnbleeperSettings cfg)
+ {
+ Dict = dict;
+ Cfg = cfg;
+ BleepedSwearsRegex = new Regex("^" + Cfg.BleepedSwearsRegex + "$", RegexOptions.Compiled);
+ }
+
+ Regex BleepedSwearsRegex { get; }
+
+ public string UnbleepSwears(string text)
+ {
+ if (string.IsNullOrWhiteSpace(text))
+ return null;
+
+ text = text.Trim();
+
+ if (text.StartsWith('/')) // is chat command
+ return null;
+
+ var words = text.Split(new char[0], StringSplitOptions.RemoveEmptyEntries);
+ var candidates = words
+ .Where(w =>
+ !Language.IsTelegramMention(w)
+ && Language.HasNonWordChars(w)
+ && !Language.IsHashTag(w)
+ && (Language.HasWordChars(w) || w.Length > 5)
+ && w.Length > 2
+ && BleepedSwearsRegex.IsMatch(w)
+ )
+ .ToArray();
+
+ if (candidates.Any())
+ {
+ var response = new StringBuilder();
+ for (int i = 0; i < candidates.Length; ++i)
+ {
+ var m = Dict.Match(candidates[i]);
+ response.AppendLine(new string('*', i + 1) + m.Word + new string('?', m.Distance));
+ }
+ return response.ToString();
+ }
+ else
+ return null;
+ }
+ }
+}
diff --git a/AntiAntiSwearingBot/aasb.cfg.json b/AntiAntiSwearingBot/aasb.cfg.json
index 4898bb5..d07d182 100644
--- a/AntiAntiSwearingBot/aasb.cfg.json
+++ b/AntiAntiSwearingBot/aasb.cfg.json
@@ -1,5 +1,7 @@
{
- "BleepedSwearsRegex": "[а-яА-Я@\\*#]+",
+ "Unbleeper": {
+ "BleepedSwearsRegex": "[а-яА-Я@\\*#]+"
+ },
"SearchDictionary": {
"DictionaryPath": "dict/ObsceneDictionaryRu.txt",
"LearnNudgeFactor": 0.5,
diff --git a/AntiAntiSwearingBot/dict/ObsceneDictionaryRu.txt b/AntiAntiSwearingBot/dict/ObsceneDictionaryRu.txt
index 0847a2f..ff62eac 100644
--- a/AntiAntiSwearingBot/dict/ObsceneDictionaryRu.txt
+++ b/AntiAntiSwearingBot/dict/ObsceneDictionaryRu.txt
@@ -1,4 +1,3 @@
-еб
бля
хуй
блядь