using System.Collections.Generic;
using System.Diagnostics;
using System.IO.Compression;
using System.Threading.Tasks;
var sourceUrl = "https://www.gutenberg.org/files/2600/2600-0.txt";
string filePath = "source.txt.gz";
await FileDownloader.DownloadFile(sourceUrl, filePath);
new Solution1(filePath, proximity).RunAndDisplayResults();
public class Solution1 : OverusedWordSolutionBase {
public Solution1(string filePath, int proximity)
: base(filePath, proximity) { }
public override List<WordOveruse> Run() {
return new List<WordOveruse>();
public class WordOveruse {
public string Word { get; set; }
public int Count { get; set; }
public abstract class OverusedWordSolutionBase {
protected string _filePath;
protected int _proximity;
private Stopwatch _stopWatch = new Stopwatch();
private WordParser _wordParser;
public int RepeatRuns { get; set; } = 3;
public OverusedWordSolutionBase(string filePath, int proximity) {
public List<WordOveruse> RunAndDisplayResults() {
TimeSpan solutionTime = TimeSpan.Zero;
TimeSpan totalTime = TimeSpan.Zero;
List<WordOveruse> results = null;
var totalStopWatch = new Stopwatch();
for (int i = 0; i < RepeatRuns; i++) {
_wordParser = new WordParser(_filePath);
solutionTime += _stopWatch.Elapsed;
totalTime += totalStopWatch.Elapsed;
solutionTime = TimeSpan.FromTicks(solutionTime.Ticks / RepeatRuns);
totalTime = TimeSpan.FromTicks(totalTime.Ticks / RepeatRuns);
Console.WriteLine($"{this.GetType().Name}:");
Console.WriteLine($"Time: {solutionTime.TotalMilliseconds / 1000:n3} seconds (total time: {totalTime.TotalMilliseconds / 1000:n3} seconds)");
Console.WriteLine($"Found {results.Sum(r => r.Count):n0} total overuses of {results.Count():n0} words, out of a total of {_totalWords:n0} words.");
Console.WriteLine($"Top 10:\r\n {String.Join(", ", results.Take(10).Select(r => $"{r.Word}: {r.Count:n0}"))}\r\n");
public abstract List<WordOveruse> Run();
protected string GetNextWord() {
var result = _wordParser.GetNextWord();
public class WordParser : IDisposable {
const int _minWordLength = 2;
public WordParser(string filePath) {
if (!File.Exists(filePath)) {
throw new InvalidOperationException($"File {filePath} was not found.");
if (Path.GetExtension(filePath).Equals(".gz", StringComparison.InvariantCultureIgnoreCase)) {
_s = new FileStream(filePath, FileMode.Open, FileAccess.Read);
_gz = new GZipStream(_s, CompressionMode.Decompress);
_sr = new StreamReader(_gz, Encoding.UTF8, true, 4096 * 4096);
_sr = new StreamReader(filePath, Encoding.UTF8, true, 4096 * 4096);
public string GetNextWord() {
StringBuilder sb = new StringBuilder();
var currentByte = _sr.Read();
var currentChar = (char)currentByte;
if (char.IsWhiteSpace(currentChar) || char.IsPunctuation(currentChar)) {
var word = sb.ToString().ToLower();
if (word.Length == 0 && isEnd) {
bool IsWord(string word) {
if (word.Length < _minWordLength) {
foreach (var currentChar in word) {
if (!char.IsLetter(currentChar)) {
#region IDisposable Support
private bool disposedValue = false;
protected virtual void Dispose(bool disposing) {
public static class FileDownloader {
public static async Task DownloadFile(string url, string targetFilePath) {
using (var httpClient = new HttpClient()) {
var response = await httpClient.GetAsync(url);
response.EnsureSuccessStatusCode();
using (var ms = await response.Content.ReadAsStreamAsync())
using (var fs = File.Create(targetFilePath))
using (var gz = new GZipStream(fs, CompressionMode.Compress)) {
ms.Seek(0, SeekOrigin.Begin);