Make unbleeper testable

- separate unbleeper as dependency of aasb
- fix algorithm
- tests
This commit is contained in:
jetsparrow 2019-08-17 22:23:13 +03:00
parent 2b8fd15e8e
commit a4e7b836b8
12 changed files with 213 additions and 105 deletions

View File

@ -0,0 +1,19 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<TargetFramework>netcoreapp2.1</TargetFramework>
<IsPackable>false</IsPackable>
</PropertyGroup>
<ItemGroup>
<PackageReference Include="Microsoft.NET.Test.Sdk" Version="15.9.0" />
<PackageReference Include="xunit" Version="2.4.0" />
<PackageReference Include="xunit.runner.visualstudio" Version="2.4.0" />
</ItemGroup>
<ItemGroup>
<ProjectReference Include="..\AntiAntiSwearingBot\AntiAntiSwearingBot.csproj" />
</ItemGroup>
</Project>

View File

@ -0,0 +1,74 @@
using System;
using Xunit;
namespace AntiAntiSwearingBot.Tests
{
public class FilterTests
{
Unbleeper ubl { get; }
public FilterTests()
{
var cfg = Config.Load<Config>("aasb.cfg.json", "aasb.cfg.secret.json");
var dict = new SearchDictionary(cfg);
ubl = new Unbleeper(dict, cfg.Unbleeper);
}
[Theory]
[InlineData("бл**ь", "*блядь")]
[InlineData("ж**а", "*жопа")]
public void UnbleepSimpleSwears(string word, string expected)
{
var unbleep = ubl.UnbleepSwears(word).TrimEnd(Environment.NewLine.ToCharArray());
Assert.Equal(expected, unbleep);
}
[Theory]
[InlineData("*")]
[InlineData("**#")]
[InlineData("@**#")]
public void IgnoreShortGrawlixes(string text) => Assert.Null(ubl.UnbleepSwears(text));
[Theory]
[InlineData("@pvkuznetsov https://github.com/jacksondunstan/UnityNativeScripting")]
[InlineData("@JohnnyMnemonic")]
[InlineData("@Artyom по поводу")]
[InlineData("@Laima прошу блины!")]
[InlineData("эй админ @harry0xfefecaca верни бота")]
public void IgnoreMentions(string text) => Assert.Null(ubl.UnbleepSwears(text));
[Theory]
[InlineData("x - floor(abs(x)) * sign(x) -- вроде такая формула для frac(x)")]
public void IgnoresWeirdShit(string text) => Assert.Null(ubl.UnbleepSwears(text));
[Theory]
[InlineData("/poll")]
[InlineData("/roll 2d6")]
[InlineData("/award medal")]
[InlineData("/status@MinecraftServerBot")]
[InlineData("/broadcast@MinecraftServerBot пи#*ец вы понастроили тут")]
[InlineData("/ban@MinecraftServerBot @dirty_johnny86")]
public void IgnoreCommands(string text) => Assert.Null(ubl.UnbleepSwears(text));
[Theory]
[InlineData("#UEeğitimKarazin")]
[InlineData("#KöksalBabaCafeTrabzonda")]
[InlineData("#ZehraHanımSüresizeKadro")]
[InlineData("#define")]
[InlineData("#ifndef")]
[InlineData("#trashtag")]
[InlineData("#MeToo")]
[InlineData("#инстаграм")]
[InlineData("#битваБлогеров")]
[InlineData("#зенитахмат")]
[InlineData("#HappyKWONJIYONGDay")]
[InlineData("#MCITOT")]
[InlineData("#ТамбовКраснодар")]
[InlineData("#JRockконвент2019")]
[InlineData("#DonaldTrumpAgain")]
[InlineData("#ZodiacKillerStrikesAgain")]
[InlineData("#ThanksObama")]
[InlineData("#BalıkBurcuKızıylaEvlenmek")]
public void IgnoreHashtags(string text) => Assert.Null(ubl.UnbleepSwears(text));
}
}

View File

@ -3,7 +3,9 @@ Microsoft Visual Studio Solution File, Format Version 12.00
# Visual Studio 15
VisualStudioVersion = 15.0.28010.2036
MinimumVisualStudioVersion = 10.0.40219.1
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "AntiAntiSwearingBot", "AntiAntiSwearingBot\AntiAntiSwearingBot.csproj", "{66AFFD7B-5B2D-4C85-8523-770702255511}"
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "AntiAntiSwearingBot", "AntiAntiSwearingBot\AntiAntiSwearingBot.csproj", "{66AFFD7B-5B2D-4C85-8523-770702255511}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "AntiAntiSwearingBot.Tests", "AntiAntiSwearingBot.Tests\AntiAntiSwearingBot.Tests.csproj", "{AA3CB2CB-05F1-46C4-8710-2702BD663A8B}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
@ -15,6 +17,10 @@ Global
{66AFFD7B-5B2D-4C85-8523-770702255511}.Debug|Any CPU.Build.0 = Debug|Any CPU
{66AFFD7B-5B2D-4C85-8523-770702255511}.Release|Any CPU.ActiveCfg = Release|Any CPU
{66AFFD7B-5B2D-4C85-8523-770702255511}.Release|Any CPU.Build.0 = Release|Any CPU
{AA3CB2CB-05F1-46C4-8710-2702BD663A8B}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{AA3CB2CB-05F1-46C4-8710-2702BD663A8B}.Debug|Any CPU.Build.0 = Debug|Any CPU
{AA3CB2CB-05F1-46C4-8710-2702BD663A8B}.Release|Any CPU.ActiveCfg = Release|Any CPU
{AA3CB2CB-05F1-46C4-8710-2702BD663A8B}.Release|Any CPU.Build.0 = Release|Any CPU
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE

View File

@ -16,14 +16,13 @@ namespace AntiAntiSwearingBot
{
Config Config { get; }
SearchDictionary Dict { get; }
Unbleeper Unbleeper { get; }
public AntiAntiSwearingBot(Config cfg, SearchDictionary dict)
{
Config = cfg;
Dict = dict;
BleepedSwearsRegex = new Regex(cfg.BleepedSwearsRegex, RegexOptions.Compiled);
NonWordRegex = new Regex("\\W", RegexOptions.Compiled);
MentionRegex = new Regex("@[a-zA-Z0-9_]+", RegexOptions.Compiled);
Unbleeper = new Unbleeper(dict, cfg.Unbleeper);
}
TelegramBotClient Client { get; set; }
@ -49,41 +48,11 @@ namespace AntiAntiSwearingBot
public async Task Stop()
{
Dict.Save();
Dispose();
}
#region service
Regex BleepedSwearsRegex { get; }
Regex NonWordRegex { get; }
Regex MentionRegex { get; }
string UnbleepSwears(string text)
{
if (string.IsNullOrWhiteSpace(text))
return null;
var words = BleepedSwearsRegex.Matches(text)
.Select(m => m.Value)
.Where(m => NonWordRegex.IsMatch(m))
.Where(m => !MentionRegex.IsMatch(m))
.ToArray();
if (words.Any())
{
var response = new StringBuilder();
for (int i = 0; i < words.Length; ++i)
{
var m = Dict.Match(words[i]);
response.AppendLine(new string('*', i + 1) + m.Word + new string('?', m.Distance));
}
return response.ToString();
}
else
return null;
}
void BotOnMessageReceived(object sender, MessageEventArgs args)
{
var msg = args.Message;
@ -104,7 +73,7 @@ namespace AntiAntiSwearingBot
}
else
{
var unbleepResponse = UnbleepSwears(msg.Text);
var unbleepResponse = Unbleeper.UnbleepSwears(msg.Text);
if (unbleepResponse != null)
Client.SendTextMessageAsync(
args.Message.Chat.Id,

View File

@ -29,10 +29,7 @@ namespace AntiAntiSwearingBot
if (cmd.UserName != null && cmd.UserName != Username)
return null;
if (Commands.ContainsKey(cmd.Command))
{
try { return Commands[cmd.Command].Execute(cmd, args); }
catch { }
}
return Commands[cmd.Command].Execute(cmd, args);
}
return null;
}

View File

@ -3,18 +3,15 @@
public class Config : ConfigBase
{
public string ApiKey { get; private set; }
public string BleepedSwearsRegex { get; private set; }
public struct ProxySettings
{
public string Url { get; private set; }
public int Port { get; private set; }
public string Login { get; private set; }
public string Password { get; private set; }
public ProxySettings Proxy { get; private set; }
public SearchDictionarySettings SearchDictionary { get; private set; }
public UnbleeperSettings Unbleeper { get; private set; }
}
public ProxySettings Proxy { get; private set; }
public struct UnbleeperSettings
{
public string BleepedSwearsRegex { get; private set; }
}
public struct SearchDictionarySettings
{
@ -26,7 +23,13 @@
public double UnlearnNudgeFactor { get; private set; }
}
public SearchDictionarySettings SearchDictionary { get; private set; }
public struct ProxySettings
{
public string Url { get; private set; }
public int Port { get; private set; }
public string Login { get; private set; }
public string Password { get; private set; }
}
}

View File

@ -2,11 +2,14 @@
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Text.RegularExpressions;
namespace AntiAntiSwearingBot
{
static class Language
public static class Language
{
static int min(int a, int b, int c) { return Math.Min(Math.Min(a, b), c); }
public static int HammingDistance(string a, string b)
{
if (string.IsNullOrEmpty(a))
@ -26,8 +29,6 @@ namespace AntiAntiSwearingBot
return leftover + dist;
}
static int min(int a, int b, int c) { return Math.Min(Math.Min(a, b), c); }
public static int LevenshteinDistance(string a, string b)
{
int[] prevRow = new int[b.Length + 1];
@ -55,46 +56,18 @@ namespace AntiAntiSwearingBot
public static bool CharMatch(char a, char b)
=> a == b || !char.IsLetterOrDigit(a) || !char.IsLetterOrDigit(b);
/// <summary>
/// Compute the distance between two strings.
/// </summary>
public static int Compute(string s, string t)
{
int n = s.Length;
int m = t.Length;
int[,] d = new int[n + 1, m + 1];
static readonly Regex MentionRegex = new Regex("^@[a-zA-Z0-9_]+$", RegexOptions.Compiled);
if (n == 0)
return m;
if (m == 0)
return n;
static readonly Regex HashTagRegex = new Regex("^#\\w+$", RegexOptions.Compiled);
// Step 2
for (int i = 0; i <= n; d[i, 0] = i++)
{
}
public static bool IsTelegramMention(string word) => MentionRegex.IsMatch(word);
for (int j = 0; j <= m; d[0, j] = j++)
{
}
public static bool IsHashTag(string word) => HashTagRegex.IsMatch(word);
public static bool HasNonWordChars(string arg) => arg.Any(c => !char.IsLetterOrDigit(c));
public static bool HasWordChars(string arg) => arg.Any(char.IsLetter);
// Step 3
for (int i = 1; i <= n; i++)
{
//Step 4
for (int j = 1; j <= m; j++)
{
// Step 5
int cost = (t[j - 1] == s[i - 1]) ? 0 : 1;
// Step 6
d[i, j] = Math.Min(
Math.Min(d[i - 1, j] + 1, d[i, j - 1] + 1),
d[i - 1, j - 1] + cost);
}
}
// Step 7
return d[n, m];
}
}
}

View File

@ -26,6 +26,7 @@ namespace AntiAntiSwearingBot
Console.ReadKey();
Console.WriteLine("Waiting for exit...");
bot.Stop().Wait();
dict.Save();
return (int)ExitCode.Ok;
}
catch (Exception ex)

View File

@ -11,6 +11,8 @@ namespace AntiAntiSwearingBot
{
var s = cfg.SearchDictionary;
path = s.DictionaryPath;
tmppath = path + ".tmp";
learnInitialRating = Math.Clamp(s.LearnInitialRating, 0,1);
learnNudgeFactor = Math.Clamp(s.LearnNudgeFactor, 0, 1);
unlearnNudgeFactor = Math.Clamp(s.UnlearnNudgeFactor, 0, 1);
@ -21,8 +23,12 @@ namespace AntiAntiSwearingBot
public void Save()
{
File.WriteAllLines(path + ".tmp", words);
File.Move(path + ".tmp", path);
if (File.Exists(tmppath))
File.Delete(tmppath);
File.WriteAllLines(tmppath, words);
if (File.Exists(path))
File.Delete(path);
File.Move(tmppath, path);
}
public struct WordMatch
@ -93,7 +99,7 @@ namespace AntiAntiSwearingBot
#region service
string path;
readonly string path, tmppath;
double learnInitialRating = 0.75;
double learnNudgeFactor = 0.5;

View File

@ -0,0 +1,59 @@
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Text.RegularExpressions;
namespace AntiAntiSwearingBot
{
public class Unbleeper
{
SearchDictionary Dict { get; }
UnbleeperSettings Cfg { get; }
public Unbleeper(SearchDictionary dict, UnbleeperSettings cfg)
{
Dict = dict;
Cfg = cfg;
BleepedSwearsRegex = new Regex("^" + Cfg.BleepedSwearsRegex + "$", RegexOptions.Compiled);
}
Regex BleepedSwearsRegex { get; }
public string UnbleepSwears(string text)
{
if (string.IsNullOrWhiteSpace(text))
return null;
text = text.Trim();
if (text.StartsWith('/')) // is chat command
return null;
var words = text.Split(new char[0], StringSplitOptions.RemoveEmptyEntries);
var candidates = words
.Where(w =>
!Language.IsTelegramMention(w)
&& Language.HasNonWordChars(w)
&& !Language.IsHashTag(w)
&& (Language.HasWordChars(w) || w.Length > 5)
&& w.Length > 2
&& BleepedSwearsRegex.IsMatch(w)
)
.ToArray();
if (candidates.Any())
{
var response = new StringBuilder();
for (int i = 0; i < candidates.Length; ++i)
{
var m = Dict.Match(candidates[i]);
response.AppendLine(new string('*', i + 1) + m.Word + new string('?', m.Distance));
}
return response.ToString();
}
else
return null;
}
}
}

View File

@ -1,5 +1,7 @@
{
"BleepedSwearsRegex": "[а-яА-Я@\\*#]+",
"Unbleeper": {
"BleepedSwearsRegex": "[а-яА-Я@\\*#]+"
},
"SearchDictionary": {
"DictionaryPath": "dict/ObsceneDictionaryRu.txt",
"LearnNudgeFactor": 0.5,

View File

@ -1,4 +1,3 @@
еб
бля
хуй
блядь