using System.Collections.Generic;
public static void Main()
NGramTester nGramTester = new NGramTester();
DiceTester diceTester = new DiceTester();
diceTester.TestDiceCalculatorWith3Grams();
diceTester.TestDiceCalculatorWith4Grams();
DiceCalculator calculator = new DiceCalculator(new NGramCalculator(3));
Console.WriteLine(calculator.Match("Scotland", "Schottland"));
Console.WriteLine(calculator.Match("Luxemburg", "Luxembourg"));
Console.WriteLine(calculator.Match("Hamburg", "Hambourgho"));
Console.WriteLine("Alle Tests erfolgreich");
NGramCalculator nGramCalculator = new NGramCalculator(3);
var testData1 = nGramCalculator.GetNGrams("Tst");
Assert.AreEqual(1, testData1.Count());
Assert.AreEqual("Tst", testData1.First());
var testData2 = nGramCalculator.GetNGrams("Test");
Assert.AreEqual(2, testData2.Count());
Assert.AreEqual("Tes", testData2.First());
Assert.AreEqual("est", testData2.Last());
var testData3 = nGramCalculator.GetNGrams("The quick brown Fox");
Assert.AreEqual(17, testData3.Count());
Assert.AreEqual("The", testData3.First());
Assert.AreEqual("ick", testData3.ElementAt(6));
Assert.AreEqual("Fox", testData3.Last());
var testData4 = nGramCalculator.GetNGrams("TT");
Assert.AreEqual(0, testData4.Count());
var testData5 = nGramCalculator.GetNGrams("Z");
Assert.AreEqual(0, testData5.Count());
var testData6 = nGramCalculator.GetNGrams("");
Assert.AreEqual(0, testData6.Count());
NGramCalculator nGramCalculator = new NGramCalculator(4);
var testData1 = nGramCalculator.GetNGrams("");
Assert.AreEqual(0, testData1.Count());
var testData2 = nGramCalculator.GetNGrams("T");
Assert.AreEqual(0, testData2.Count());
var testData3 = nGramCalculator.GetNGrams("Th");
Assert.AreEqual(0, testData3.Count());
var testData4 = nGramCalculator.GetNGrams("The");
Assert.AreEqual(0, testData4.Count());
var testData5 = nGramCalculator.GetNGrams("The ");
Assert.AreEqual(1, testData5.Count());
Assert.AreEqual("The ", testData5.First());
var testData6 = nGramCalculator.GetNGrams("The q");
Assert.AreEqual(2, testData6.Count());
Assert.AreEqual("The ", testData6.First());
Assert.AreEqual("he q", testData6.Last());
var testData7 = nGramCalculator.GetNGrams("The quick brown Fox");
Assert.AreEqual(16, testData7.Count());
Assert.AreEqual("The ", testData7.First());
Assert.AreEqual("ick ", testData7.ElementAt(6));
Assert.AreEqual(" Fox", testData7.Last());
public void TestDiceCalculatorWith3Grams()
DiceCalculator calculator = new DiceCalculator(new NGramCalculator(3));
Assert.AreEqual(1.0, calculator.Match("Test", "Test"), 1e-6);
Assert.AreEqual(.6666666 , calculator.Match("Test", "Tes"), 1e-6);
Assert.AreEqual(.5 , calculator.Match("Test", "TesX"), 1e-6);
Assert.AreEqual(.3333333, calculator.Match("Teste", "TesXX"), 1e-6);
Assert.AreEqual(.0, calculator.Match("TTT", "TT"));
Assert.AreEqual(.0, calculator.Match("TT", "TT"));
public void TestDiceCalculatorWith4Grams()
DiceCalculator calculator = new DiceCalculator(new NGramCalculator(4));
Assert.AreEqual(1.0, calculator.Match("FooB", "FooB"));
Assert.AreEqual(.66666666 , calculator.Match("FooB", "FooBa"), 1e-6);
Assert.AreEqual(.5 , calculator.Match("FooBa", "FooBX"), 1e-6);
public class DiceCalculator
NGramCalculator nGramCalculator;
public DiceCalculator(NGramCalculator nGramCalculator)
this.nGramCalculator = nGramCalculator;
public double Match(string word1, string word2)
var word1Ngrams = nGramCalculator.GetNGrams(word1);
var word2Ngrams = nGramCalculator.GetNGrams(word2);
var ngramsIntersection = word1Ngrams.Intersect(word2Ngrams).Distinct();
if(word1Ngrams.Any() || word2Ngrams.Any())
return 2 * (double)ngramsIntersection.Count() / (word1Ngrams.Count() + word2Ngrams.Count());
public class NGramCalculator
public int N { get; set;}
public NGramCalculator(int n)
public IEnumerable<string> GetNGrams(string word)
int numberOfNGrams = word.Length >= N ? word.Length-(N-1) : 0;
return from index in Enumerable.Range(0, numberOfNGrams)
select word.Substring(index, N);