using System.Collections.Generic;
using System.Text.RegularExpressions;
public static void Main()
var lines_list = new List<string>();
var wordsArr = new string[]{"alpha", "beta", "gama", "delta", "Epsilon", "Zeta", "Eta", "Theta", "Iota", "kaPPa", "LamBda", "mU", "Nu", "xi", "omicron", "pi", "rHo", "siGma", "Tau", "UpSiLoN", "pHi", "chi", "psi", "omega"};
var ss = new StringBuilder();
var rnd = new Random( 12345 );
for ( int i = 0; i < 50000; i++ )
var words_arr = new List<string>();
for ( int j = 0; j < rnd.Next(10,20); j++)
words_arr.Add( wordsArr[ rnd.Next( wordsArr.Length ) ] );
var tab_count = rnd.Next( 0, 5 );
var line = new StringBuilder();
for ( int k = 0; k < tab_count; k++ ) line.Append("\t");
foreach( var w in words_arr )
var result = line + "\n";
lines_list.Add( result );
var sw = new System.Diagnostics.Stopwatch();
var dp = new TextProcessor();
dp.ProcessFile ( str, wordsArr );
Console.WriteLine("Time:" + sw.ElapsedMilliseconds + " ms.");
public class TextProcessor
public Dictionary<string, int> data;
public void ProcessFile(string doc , string[] words)
var data = new Dictionary<string, ValuePair>();
var spaces = new char[]{' ',' '};
using (System.IO.StringReader sr = new System.IO.StringReader(doc))
while ((line = sr.ReadLine()) != null)
var word = line.TrimStart(spaces).Split()[0].ToLower();
if (Array.IndexOf(words, word) > -1)
if (data.ContainsKey(word))
data[key].Add(new ValuePair {Value1=index, Value2=line});
data.Add(word, new ValuePair {Value1=index, Value2=line});
keys = new string[data.Keys.Count];
data.Keys.CopyTo(keys, 0);
vals = new int[data.Values.Count];