mirror of
https://github.com/Jetsparrow/antiantiswearingbot.git
synced 2026-01-20 23:16:08 +03:00
Make unbleeper testable
- separate unbleeper as dependency of aasb - fix algorithm - tests
This commit is contained in:
parent
2b8fd15e8e
commit
a4e7b836b8
19
AntiAntiSwearingBot.Tests/AntiAntiSwearingBot.Tests.csproj
Normal file
19
AntiAntiSwearingBot.Tests/AntiAntiSwearingBot.Tests.csproj
Normal file
@ -0,0 +1,19 @@
|
||||
<Project Sdk="Microsoft.NET.Sdk">
|
||||
|
||||
<PropertyGroup>
|
||||
<TargetFramework>netcoreapp2.1</TargetFramework>
|
||||
|
||||
<IsPackable>false</IsPackable>
|
||||
</PropertyGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<PackageReference Include="Microsoft.NET.Test.Sdk" Version="15.9.0" />
|
||||
<PackageReference Include="xunit" Version="2.4.0" />
|
||||
<PackageReference Include="xunit.runner.visualstudio" Version="2.4.0" />
|
||||
</ItemGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<ProjectReference Include="..\AntiAntiSwearingBot\AntiAntiSwearingBot.csproj" />
|
||||
</ItemGroup>
|
||||
|
||||
</Project>
|
||||
74
AntiAntiSwearingBot.Tests/FilterTests.cs
Normal file
74
AntiAntiSwearingBot.Tests/FilterTests.cs
Normal file
@ -0,0 +1,74 @@
|
||||
using System;
|
||||
using Xunit;
|
||||
|
||||
namespace AntiAntiSwearingBot.Tests
|
||||
{
|
||||
public class FilterTests
|
||||
{
|
||||
Unbleeper ubl { get; }
|
||||
public FilterTests()
|
||||
{
|
||||
var cfg = Config.Load<Config>("aasb.cfg.json", "aasb.cfg.secret.json");
|
||||
var dict = new SearchDictionary(cfg);
|
||||
ubl = new Unbleeper(dict, cfg.Unbleeper);
|
||||
}
|
||||
|
||||
[Theory]
|
||||
[InlineData("бл**ь", "*блядь")]
|
||||
[InlineData("ж**а", "*жопа")]
|
||||
public void UnbleepSimpleSwears(string word, string expected)
|
||||
{
|
||||
var unbleep = ubl.UnbleepSwears(word).TrimEnd(Environment.NewLine.ToCharArray());
|
||||
Assert.Equal(expected, unbleep);
|
||||
}
|
||||
|
||||
[Theory]
|
||||
[InlineData("*")]
|
||||
[InlineData("**#")]
|
||||
[InlineData("@**#")]
|
||||
public void IgnoreShortGrawlixes(string text) => Assert.Null(ubl.UnbleepSwears(text));
|
||||
|
||||
[Theory]
|
||||
[InlineData("@pvkuznetsov https://github.com/jacksondunstan/UnityNativeScripting")]
|
||||
[InlineData("@JohnnyMnemonic")]
|
||||
[InlineData("@Artyom по поводу")]
|
||||
[InlineData("@Laima прошу блины!")]
|
||||
[InlineData("эй админ @harry0xfefecaca верни бота")]
|
||||
public void IgnoreMentions(string text) => Assert.Null(ubl.UnbleepSwears(text));
|
||||
|
||||
[Theory]
|
||||
[InlineData("x - floor(abs(x)) * sign(x) -- вроде такая формула для frac(x)")]
|
||||
public void IgnoresWeirdShit(string text) => Assert.Null(ubl.UnbleepSwears(text));
|
||||
|
||||
[Theory]
|
||||
[InlineData("/poll")]
|
||||
[InlineData("/roll 2d6")]
|
||||
[InlineData("/award medal")]
|
||||
[InlineData("/status@MinecraftServerBot")]
|
||||
[InlineData("/broadcast@MinecraftServerBot пи#*ец вы понастроили тут")]
|
||||
[InlineData("/ban@MinecraftServerBot @dirty_johnny86")]
|
||||
public void IgnoreCommands(string text) => Assert.Null(ubl.UnbleepSwears(text));
|
||||
|
||||
[Theory]
|
||||
[InlineData("#UEeğitimKarazin")]
|
||||
[InlineData("#KöksalBabaCafeTrabzonda")]
|
||||
[InlineData("#ZehraHanımSüresizeKadro")]
|
||||
[InlineData("#define")]
|
||||
[InlineData("#ifndef")]
|
||||
[InlineData("#trashtag")]
|
||||
[InlineData("#MeToo")]
|
||||
[InlineData("#инстаграм")]
|
||||
[InlineData("#битваБлогеров")]
|
||||
[InlineData("#зенитахмат")]
|
||||
[InlineData("#HappyKWONJIYONGDay")]
|
||||
[InlineData("#MCITOT")]
|
||||
[InlineData("#ТамбовКраснодар")]
|
||||
[InlineData("#JRockконвент2019")]
|
||||
[InlineData("#DonaldTrumpAgain")]
|
||||
[InlineData("#ZodiacKillerStrikesAgain")]
|
||||
[InlineData("#ThanksObama")]
|
||||
[InlineData("#BalıkBurcuKızıylaEvlenmek")]
|
||||
public void IgnoreHashtags(string text) => Assert.Null(ubl.UnbleepSwears(text));
|
||||
|
||||
}
|
||||
}
|
||||
@ -3,7 +3,9 @@ Microsoft Visual Studio Solution File, Format Version 12.00
|
||||
# Visual Studio 15
|
||||
VisualStudioVersion = 15.0.28010.2036
|
||||
MinimumVisualStudioVersion = 10.0.40219.1
|
||||
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "AntiAntiSwearingBot", "AntiAntiSwearingBot\AntiAntiSwearingBot.csproj", "{66AFFD7B-5B2D-4C85-8523-770702255511}"
|
||||
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "AntiAntiSwearingBot", "AntiAntiSwearingBot\AntiAntiSwearingBot.csproj", "{66AFFD7B-5B2D-4C85-8523-770702255511}"
|
||||
EndProject
|
||||
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "AntiAntiSwearingBot.Tests", "AntiAntiSwearingBot.Tests\AntiAntiSwearingBot.Tests.csproj", "{AA3CB2CB-05F1-46C4-8710-2702BD663A8B}"
|
||||
EndProject
|
||||
Global
|
||||
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
||||
@ -15,6 +17,10 @@ Global
|
||||
{66AFFD7B-5B2D-4C85-8523-770702255511}.Debug|Any CPU.Build.0 = Debug|Any CPU
|
||||
{66AFFD7B-5B2D-4C85-8523-770702255511}.Release|Any CPU.ActiveCfg = Release|Any CPU
|
||||
{66AFFD7B-5B2D-4C85-8523-770702255511}.Release|Any CPU.Build.0 = Release|Any CPU
|
||||
{AA3CB2CB-05F1-46C4-8710-2702BD663A8B}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
|
||||
{AA3CB2CB-05F1-46C4-8710-2702BD663A8B}.Debug|Any CPU.Build.0 = Debug|Any CPU
|
||||
{AA3CB2CB-05F1-46C4-8710-2702BD663A8B}.Release|Any CPU.ActiveCfg = Release|Any CPU
|
||||
{AA3CB2CB-05F1-46C4-8710-2702BD663A8B}.Release|Any CPU.Build.0 = Release|Any CPU
|
||||
EndGlobalSection
|
||||
GlobalSection(SolutionProperties) = preSolution
|
||||
HideSolutionNode = FALSE
|
||||
|
||||
@ -16,14 +16,13 @@ namespace AntiAntiSwearingBot
|
||||
{
|
||||
Config Config { get; }
|
||||
SearchDictionary Dict { get; }
|
||||
Unbleeper Unbleeper { get; }
|
||||
|
||||
public AntiAntiSwearingBot(Config cfg, SearchDictionary dict)
|
||||
{
|
||||
Config = cfg;
|
||||
Dict = dict;
|
||||
BleepedSwearsRegex = new Regex(cfg.BleepedSwearsRegex, RegexOptions.Compiled);
|
||||
NonWordRegex = new Regex("\\W", RegexOptions.Compiled);
|
||||
MentionRegex = new Regex("@[a-zA-Z0-9_]+", RegexOptions.Compiled);
|
||||
Unbleeper = new Unbleeper(dict, cfg.Unbleeper);
|
||||
}
|
||||
|
||||
TelegramBotClient Client { get; set; }
|
||||
@ -49,41 +48,11 @@ namespace AntiAntiSwearingBot
|
||||
|
||||
public async Task Stop()
|
||||
{
|
||||
Dict.Save();
|
||||
Dispose();
|
||||
}
|
||||
|
||||
#region service
|
||||
|
||||
Regex BleepedSwearsRegex { get; }
|
||||
Regex NonWordRegex { get; }
|
||||
Regex MentionRegex { get; }
|
||||
|
||||
string UnbleepSwears(string text)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(text))
|
||||
return null;
|
||||
|
||||
var words = BleepedSwearsRegex.Matches(text)
|
||||
.Select(m => m.Value)
|
||||
.Where(m => NonWordRegex.IsMatch(m))
|
||||
.Where(m => !MentionRegex.IsMatch(m))
|
||||
.ToArray();
|
||||
|
||||
if (words.Any())
|
||||
{
|
||||
var response = new StringBuilder();
|
||||
for (int i = 0; i < words.Length; ++i)
|
||||
{
|
||||
var m = Dict.Match(words[i]);
|
||||
response.AppendLine(new string('*', i + 1) + m.Word + new string('?', m.Distance));
|
||||
}
|
||||
return response.ToString();
|
||||
}
|
||||
else
|
||||
return null;
|
||||
}
|
||||
|
||||
void BotOnMessageReceived(object sender, MessageEventArgs args)
|
||||
{
|
||||
var msg = args.Message;
|
||||
@ -104,7 +73,7 @@ namespace AntiAntiSwearingBot
|
||||
}
|
||||
else
|
||||
{
|
||||
var unbleepResponse = UnbleepSwears(msg.Text);
|
||||
var unbleepResponse = Unbleeper.UnbleepSwears(msg.Text);
|
||||
if (unbleepResponse != null)
|
||||
Client.SendTextMessageAsync(
|
||||
args.Message.Chat.Id,
|
||||
|
||||
@ -29,10 +29,7 @@ namespace AntiAntiSwearingBot
|
||||
if (cmd.UserName != null && cmd.UserName != Username)
|
||||
return null;
|
||||
if (Commands.ContainsKey(cmd.Command))
|
||||
{
|
||||
try { return Commands[cmd.Command].Execute(cmd, args); }
|
||||
catch { }
|
||||
}
|
||||
return Commands[cmd.Command].Execute(cmd, args);
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
@ -3,18 +3,15 @@
|
||||
public class Config : ConfigBase
|
||||
{
|
||||
public string ApiKey { get; private set; }
|
||||
|
||||
public string BleepedSwearsRegex { get; private set; }
|
||||
|
||||
public struct ProxySettings
|
||||
{
|
||||
public string Url { get; private set; }
|
||||
public int Port { get; private set; }
|
||||
public string Login { get; private set; }
|
||||
public string Password { get; private set; }
|
||||
public ProxySettings Proxy { get; private set; }
|
||||
public SearchDictionarySettings SearchDictionary { get; private set; }
|
||||
public UnbleeperSettings Unbleeper { get; private set; }
|
||||
}
|
||||
|
||||
public ProxySettings Proxy { get; private set; }
|
||||
public struct UnbleeperSettings
|
||||
{
|
||||
public string BleepedSwearsRegex { get; private set; }
|
||||
}
|
||||
|
||||
public struct SearchDictionarySettings
|
||||
{
|
||||
@ -26,7 +23,13 @@
|
||||
public double UnlearnNudgeFactor { get; private set; }
|
||||
}
|
||||
|
||||
public SearchDictionarySettings SearchDictionary { get; private set; }
|
||||
}
|
||||
public struct ProxySettings
|
||||
{
|
||||
public string Url { get; private set; }
|
||||
public int Port { get; private set; }
|
||||
public string Login { get; private set; }
|
||||
public string Password { get; private set; }
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
@ -2,11 +2,14 @@
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using System.Text;
|
||||
using System.Text.RegularExpressions;
|
||||
|
||||
namespace AntiAntiSwearingBot
|
||||
{
|
||||
static class Language
|
||||
public static class Language
|
||||
{
|
||||
static int min(int a, int b, int c) { return Math.Min(Math.Min(a, b), c); }
|
||||
|
||||
public static int HammingDistance(string a, string b)
|
||||
{
|
||||
if (string.IsNullOrEmpty(a))
|
||||
@ -26,8 +29,6 @@ namespace AntiAntiSwearingBot
|
||||
return leftover + dist;
|
||||
}
|
||||
|
||||
static int min(int a, int b, int c) { return Math.Min(Math.Min(a, b), c); }
|
||||
|
||||
public static int LevenshteinDistance(string a, string b)
|
||||
{
|
||||
int[] prevRow = new int[b.Length + 1];
|
||||
@ -55,46 +56,18 @@ namespace AntiAntiSwearingBot
|
||||
public static bool CharMatch(char a, char b)
|
||||
=> a == b || !char.IsLetterOrDigit(a) || !char.IsLetterOrDigit(b);
|
||||
|
||||
/// <summary>
|
||||
/// Compute the distance between two strings.
|
||||
/// </summary>
|
||||
public static int Compute(string s, string t)
|
||||
{
|
||||
int n = s.Length;
|
||||
int m = t.Length;
|
||||
int[,] d = new int[n + 1, m + 1];
|
||||
static readonly Regex MentionRegex = new Regex("^@[a-zA-Z0-9_]+$", RegexOptions.Compiled);
|
||||
|
||||
if (n == 0)
|
||||
return m;
|
||||
if (m == 0)
|
||||
return n;
|
||||
static readonly Regex HashTagRegex = new Regex("^#\\w+$", RegexOptions.Compiled);
|
||||
|
||||
// Step 2
|
||||
for (int i = 0; i <= n; d[i, 0] = i++)
|
||||
{
|
||||
}
|
||||
public static bool IsTelegramMention(string word) => MentionRegex.IsMatch(word);
|
||||
|
||||
for (int j = 0; j <= m; d[0, j] = j++)
|
||||
{
|
||||
}
|
||||
public static bool IsHashTag(string word) => HashTagRegex.IsMatch(word);
|
||||
|
||||
public static bool HasNonWordChars(string arg) => arg.Any(c => !char.IsLetterOrDigit(c));
|
||||
|
||||
public static bool HasWordChars(string arg) => arg.Any(char.IsLetter);
|
||||
|
||||
// Step 3
|
||||
for (int i = 1; i <= n; i++)
|
||||
{
|
||||
//Step 4
|
||||
for (int j = 1; j <= m; j++)
|
||||
{
|
||||
// Step 5
|
||||
int cost = (t[j - 1] == s[i - 1]) ? 0 : 1;
|
||||
|
||||
// Step 6
|
||||
d[i, j] = Math.Min(
|
||||
Math.Min(d[i - 1, j] + 1, d[i, j - 1] + 1),
|
||||
d[i - 1, j - 1] + cost);
|
||||
}
|
||||
}
|
||||
// Step 7
|
||||
return d[n, m];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -26,6 +26,7 @@ namespace AntiAntiSwearingBot
|
||||
Console.ReadKey();
|
||||
Console.WriteLine("Waiting for exit...");
|
||||
bot.Stop().Wait();
|
||||
dict.Save();
|
||||
return (int)ExitCode.Ok;
|
||||
}
|
||||
catch (Exception ex)
|
||||
|
||||
@ -11,6 +11,8 @@ namespace AntiAntiSwearingBot
|
||||
{
|
||||
var s = cfg.SearchDictionary;
|
||||
path = s.DictionaryPath;
|
||||
tmppath = path + ".tmp";
|
||||
|
||||
learnInitialRating = Math.Clamp(s.LearnInitialRating, 0,1);
|
||||
learnNudgeFactor = Math.Clamp(s.LearnNudgeFactor, 0, 1);
|
||||
unlearnNudgeFactor = Math.Clamp(s.UnlearnNudgeFactor, 0, 1);
|
||||
@ -21,8 +23,12 @@ namespace AntiAntiSwearingBot
|
||||
|
||||
public void Save()
|
||||
{
|
||||
File.WriteAllLines(path + ".tmp", words);
|
||||
File.Move(path + ".tmp", path);
|
||||
if (File.Exists(tmppath))
|
||||
File.Delete(tmppath);
|
||||
File.WriteAllLines(tmppath, words);
|
||||
if (File.Exists(path))
|
||||
File.Delete(path);
|
||||
File.Move(tmppath, path);
|
||||
}
|
||||
|
||||
public struct WordMatch
|
||||
@ -93,7 +99,7 @@ namespace AntiAntiSwearingBot
|
||||
|
||||
#region service
|
||||
|
||||
string path;
|
||||
readonly string path, tmppath;
|
||||
|
||||
double learnInitialRating = 0.75;
|
||||
double learnNudgeFactor = 0.5;
|
||||
|
||||
59
AntiAntiSwearingBot/Unbleeper.cs
Normal file
59
AntiAntiSwearingBot/Unbleeper.cs
Normal file
@ -0,0 +1,59 @@
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using System.Text;
|
||||
using System.Text.RegularExpressions;
|
||||
|
||||
namespace AntiAntiSwearingBot
|
||||
{
|
||||
public class Unbleeper
|
||||
{
|
||||
SearchDictionary Dict { get; }
|
||||
UnbleeperSettings Cfg { get; }
|
||||
|
||||
public Unbleeper(SearchDictionary dict, UnbleeperSettings cfg)
|
||||
{
|
||||
Dict = dict;
|
||||
Cfg = cfg;
|
||||
BleepedSwearsRegex = new Regex("^" + Cfg.BleepedSwearsRegex + "$", RegexOptions.Compiled);
|
||||
}
|
||||
|
||||
Regex BleepedSwearsRegex { get; }
|
||||
|
||||
public string UnbleepSwears(string text)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(text))
|
||||
return null;
|
||||
|
||||
text = text.Trim();
|
||||
|
||||
if (text.StartsWith('/')) // is chat command
|
||||
return null;
|
||||
|
||||
var words = text.Split(new char[0], StringSplitOptions.RemoveEmptyEntries);
|
||||
var candidates = words
|
||||
.Where(w =>
|
||||
!Language.IsTelegramMention(w)
|
||||
&& Language.HasNonWordChars(w)
|
||||
&& !Language.IsHashTag(w)
|
||||
&& (Language.HasWordChars(w) || w.Length > 5)
|
||||
&& w.Length > 2
|
||||
&& BleepedSwearsRegex.IsMatch(w)
|
||||
)
|
||||
.ToArray();
|
||||
|
||||
if (candidates.Any())
|
||||
{
|
||||
var response = new StringBuilder();
|
||||
for (int i = 0; i < candidates.Length; ++i)
|
||||
{
|
||||
var m = Dict.Match(candidates[i]);
|
||||
response.AppendLine(new string('*', i + 1) + m.Word + new string('?', m.Distance));
|
||||
}
|
||||
return response.ToString();
|
||||
}
|
||||
else
|
||||
return null;
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -1,5 +1,7 @@
|
||||
{
|
||||
"BleepedSwearsRegex": "[а-яА-Я@\\*#]+",
|
||||
"Unbleeper": {
|
||||
"BleepedSwearsRegex": "[а-яА-Я@\\*#]+"
|
||||
},
|
||||
"SearchDictionary": {
|
||||
"DictionaryPath": "dict/ObsceneDictionaryRu.txt",
|
||||
"LearnNudgeFactor": 0.5,
|
||||
|
||||
@ -1,4 +1,3 @@
|
||||
еб
|
||||
бля
|
||||
хуй
|
||||
блядь
|
||||
|
||||
Loading…
Reference in New Issue
Block a user