Quiz: Overused Words (question only) | C# Online Compiler

Quiz: Overused Words (question only) by Anonymous

/*
INSTRUCTIONS:
Write code that detects and displays the most "overused words" in a text file.
A word is considered overused if two instances of this word appear within 30
words of each other. For example, if a particular word appears in positions
4, 25, 35 and 92, there are two overuses of this word (4 to 25 and 25 to 35).
The instance at position 92 is far from the previous instance, so it is not an
overuse.

The result must be a list of all overused words, along with the number of
overuses for each one.

Please consider that the source text can be many millions of words. You do not
have to bother with how the text is parsed into words. Just assume that you
are provided with a method you can call repeatedly to get the next word, until
it returns null, in which case you know you have reached the end.

Expected result for the text in this sample:
------------
Found 142,784 total overuses of 3,502 words, out of a total of 554,801 words.
Top 10:
    the: 30,203, and: 16,027, to: 10,489, of: 9,229, he: 5,633, his: 3,925, in: 3,688, that: 3,406, her: 2,952, was: 2,951
*/

using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.IO;
using System.IO.Compression;
using System.Linq;
using System.Net.Http;
using System.Text;
using System.Threading.Tasks;

// Solution parameters:
var sourceUrl = "https://www.gutenberg.org/files/2600/2600-0.txt"; // War and Peace, by Leo Tolstoy
int proximity = 30;

string filePath = "source.txt.gz";
// Download and save the file locally. GZip to get around .Net Fiddle's 2 MB limit.
await FileDownloader.DownloadFile(sourceUrl, filePath);

new Solution1(filePath, proximity).RunAndDisplayResults();
return;

// ------------------------------------------------------------------

/// <summary>
/// Solution to Overused Words quiz.
/// Update this class to solve the problem.
/// </summary>
public class Solution1 : OverusedWordSolutionBase {

    public Solution1(string filePath, int proximity)
        : base(filePath, proximity) { }

	/// <summary>
	/// Method that counts the overused words and returns the results.
	/// </summary>
	/// <remarks>
	/// Use the GetNextWord() method in the base class to get each word.
	/// When GetNextWord returns null, you are at the end of the file.
	/// <remarks>
	public override List<WordOveruse> Run() {
        // >>>> Add your solution here and return the results.
		return new List<WordOveruse>();
    }
}

// ------------------------------------------------------------------

/// <summary>
/// Class used in the return value of solutions.
/// </summary>
public class WordOveruse {
    public string Word { get; set; }
    public int Count { get; set; }
}

// ------------------------------------------------------------------

/// <summary>
/// Base class for solutions to the overused words problem.
/// </summary>
public abstract class OverusedWordSolutionBase {
    protected string _filePath;
    protected int _proximity;
    private Stopwatch _stopWatch = new Stopwatch();
    private WordParser _wordParser;
    private int _totalWords;

    public int RepeatRuns { get; set; } = 3;

    public OverusedWordSolutionBase(string filePath, int proximity) {
        _filePath = filePath;
        _proximity = proximity;
    }

    public List<WordOveruse> RunAndDisplayResults() {
            TimeSpan solutionTime = TimeSpan.Zero;
            TimeSpan totalTime = TimeSpan.Zero;
            List<WordOveruse> results = null;
        var totalStopWatch = new Stopwatch();
        for (int i = 0; i < RepeatRuns; i++) {
            try {
                _totalWords = 0;
                _wordParser = new WordParser(_filePath);
                _stopWatch.Reset();
                totalStopWatch.Reset();
                _stopWatch.Start();
                totalStopWatch.Start();
                results = Run();
                _stopWatch.Stop();
                totalStopWatch.Stop();

                solutionTime += _stopWatch.Elapsed;
                totalTime += totalStopWatch.Elapsed;
            } finally {
                _wordParser.Dispose();
            }
        }
        solutionTime = TimeSpan.FromTicks(solutionTime.Ticks / RepeatRuns);
        totalTime = TimeSpan.FromTicks(totalTime.Ticks / RepeatRuns);

        Console.WriteLine($"{this.GetType().Name}:");
        Console.WriteLine($"Time: {solutionTime.TotalMilliseconds / 1000:n3} seconds (total time: {totalTime.TotalMilliseconds / 1000:n3} seconds)");
        Console.WriteLine($"Found {results.Sum(r => r.Count):n0} total overuses of {results.Count():n0} words, out of a total of {_totalWords:n0} words.");
        Console.WriteLine($"Top 10:\r\n    {String.Join(", ", results.Take(10).Select(r => $"{r.Word}: {r.Count:n0}"))}\r\n");

        return results;
    }

    public abstract List<WordOveruse> Run();
    
    protected string GetNextWord() {
        _stopWatch.Stop();
        var result = _wordParser.GetNextWord();
        _totalWords++;
        _stopWatch.Start();
        return result;
    }
}

/// <summary>
/// Facilitates parsing of a file into words.
/// Uses a simple algorithm (e.g., it does not keep word-parts together when they are separated by apostrophe or dash).
/// </summary>
public class WordParser : IDisposable {

    // Minimum length for a string to be considered a word.
    const int _minWordLength = 2;

    Stream _s;
    GZipStream _gz;
    StreamReader _sr;

    public WordParser(string filePath) {
        //_sr = File.OpenText(filePath);
        if (!File.Exists(filePath)) {
            throw new InvalidOperationException($"File {filePath} was not found.");
        }
        if (Path.GetExtension(filePath).Equals(".gz", StringComparison.InvariantCultureIgnoreCase)) {
            _s = new FileStream(filePath, FileMode.Open, FileAccess.Read);
            _gz = new GZipStream(_s, CompressionMode.Decompress);
            _sr = new StreamReader(_gz, Encoding.UTF8, true, 4096 * 4096);
        } else {
            _sr = new StreamReader(filePath, Encoding.UTF8, true, 4096 * 4096);
        }
    }

    /// <summary>
    /// Returns the next word, or null if at the end of the text.
    /// </summary>
    public string GetNextWord() {
        while (true) {
            StringBuilder sb = new StringBuilder();
            bool isEnd = false;
            while (true) {
                var currentByte = _sr.Read();
                if (currentByte == -1) {
                    // End of stream
                    isEnd = true;
                    break;
                }

                var currentChar = (char)currentByte;
                if (char.IsWhiteSpace(currentChar) || char.IsPunctuation(currentChar)) {
                    break;
                }

                sb.Append(currentChar);
            }

            var word = sb.ToString().ToLower();
            if (word.Length == 0 && isEnd) {
                return null;
            }

            if (IsWord(word)) {
                return word;
            }
            
            // Not a word. Ignore and continue reading.
        }
    }  
   
    bool IsWord(string word) {
        if (word.Length < _minWordLength) {
            return false;
        }
        foreach (var currentChar in word) {
            if (!char.IsLetter(currentChar)) {
                return false;
            }
        }
        return true;
    }

    #region IDisposable Support
    private bool disposedValue = false; // To detect redundant calls

    protected virtual void Dispose(bool disposing) {
        if (!disposedValue) {
            if (disposing) {
                if (_sr != null) {
                    _sr.Dispose();
                }
                if (_gz != null) {
                    _gz.Dispose();
                }
                if (_s != null) {
                    _s.Dispose();
                }
            }

            _sr = null;
            _gz = null;
            _s = null;

            disposedValue = true;
        }
    }

    // This code added to correctly implement the disposable pattern.
    public void Dispose() {
        // Do not change this code. Put cleanup code in Dispose(bool disposing) above.
        Dispose(true);
    }
    #endregion
}

/// <summary>
/// Downloads a file from a URL and saves it locally.
/// </summary>
public static class FileDownloader {
	public static async Task DownloadFile(string url, string targetFilePath) {
		// validation
		using (var httpClient = new HttpClient()) {
			var response = await httpClient.GetAsync(url);
			response.EnsureSuccessStatusCode();
			using (var ms = await response.Content.ReadAsStreamAsync())
			using (var fs = File.Create(targetFilePath))
			using (var gz = new GZipStream(fs, CompressionMode.Compress)) {
				ms.Seek(0, SeekOrigin.Begin);
				ms.CopyTo(gz);
			}
		}
	}
}

​x
 
/*
INSTRUCTIONS:
Write code that detects and displays the most "overused words" in a text file.
A word is considered overused if two instances of this word appear within 30
words of each other. For example, if a particular word appears in positions
4, 25, 35 and 92, there are two overuses of this word (4 to 25 and 25 to 35).
The instance at position 92 is far from the previous instance, so it is not an
overuse.
​
The result must be a list of all overused words, along with the number of
overuses for each one.
​
Please consider that the source text can be many millions of words. You do not
have to bother with how the text is parsed into words. Just assume that you
are provided with a method you can call repeatedly to get the next word, until
it returns null, in which case you know you have reached the end.
​
Expected result for the text in this sample:
------------
Found 142,784 total overuses of 3,502 words, out of a total of 554,801 words.
Top 10:
    the: 30,203, and: 16,027, to: 10,489, of: 9,229, he: 5,633, his: 3,925, in: 3,688, that: 3,406, her: 2,952, was: 2,951
*/
​
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.IO;
using System.IO.Compression;
using System.Linq;
using System.Net.Http;
using System.Text;
using System.Threading.Tasks;
​
// Solution parameters:
var sourceUrl = "https://www.gutenberg.org/files/2600/2600-0.txt"; // War and Peace, by Leo Tolstoy
int proximity = 30;
​
string filePath = "source.txt.gz";
// Download and save the file locally. GZip to get around .Net Fiddle's 2 MB limit.
await FileDownloader.DownloadFile(sourceUrl, filePath);
​
new Solution1(filePath, proximity).RunAndDisplayResults();
return;
​
// ------------------------------------------------------------------
​
/// <summary>
/// Solution to Overused Words quiz.
/// Update this class to solve the problem.
/// </summary>
public class Solution1 : OverusedWordSolutionBase {
​
    public Solution1(string filePath, int proximity)
        : base(filePath, proximity) { }
​
    /// <summary>
    /// Method that counts the overused words and returns the results.
    /// </summary>
    /// <remarks>
    /// Use the GetNextWord() method in the base class to get each word.
    /// When GetNextWord returns null, you are at the end of the file.
    /// <remarks>
    public override List<WordOveruse> Run() {
        // >>>> Add your solution here and return the results.
        return new List<WordOveruse>();
    }
}
​
// ------------------------------------------------------------------
​
/// <summary>
/// Class used in the return value of solutions.
/// </summary>
public class WordOveruse {
    public string Word { get; set; }
    public int Count { get; set; }
}
​
// ------------------------------------------------------------------
​
/// <summary>
/// Base class for solutions to the overused words problem.
/// </summary>
public abstract class OverusedWordSolutionBase {
    protected string _filePath;
    protected int _proximity;
    private Stopwatch _stopWatch = new Stopwatch();
    private WordParser _wordParser;
    private int _totalWords;
​
    public int RepeatRuns { get; set; } = 3;
​
    public OverusedWordSolutionBase(string filePath, int proximity) {
        _filePath = filePath;
        _proximity = proximity;
    }
​
    public List<WordOveruse> RunAndDisplayResults() {
            TimeSpan solutionTime = TimeSpan.Zero;
            TimeSpan totalTime = TimeSpan.Zero;
            List<WordOveruse> results = null;
        var totalStopWatch = new Stopwatch();
        for (int i = 0; i < RepeatRuns; i++) {
            try {
                _totalWords = 0;
                _wordParser = new WordParser(_filePath);
                _stopWatch.Reset();
                totalStopWatch.Reset();
                _stopWatch.Start();
                totalStopWatch.Start();
                results = Run();
                _stopWatch.Stop();
                totalStopWatch.Stop();
​
                solutionTime += _stopWatch.Elapsed;
                totalTime += totalStopWatch.Elapsed;
            } finally {
                _wordParser.Dispose();
            }
        }
        solutionTime = TimeSpan.FromTicks(solutionTime.Ticks / RepeatRuns);
        totalTime = TimeSpan.FromTicks(totalTime.Ticks / RepeatRuns);
​
        Console.WriteLine($"{this.GetType().Name}:");
        Console.WriteLine($"Time: {solutionTime.TotalMilliseconds / 1000:n3} seconds (total time: {totalTime.TotalMilliseconds / 1000:n3} seconds)");
        Console.WriteLine($"Found {results.Sum(r => r.Count):n0} total overuses of {results.Count():n0} words, out of a total of {_totalWords:n0} words.");
        Console.WriteLine($"Top 10:\r\n    {String.Join(", ", results.Take(10).Select(r => $"{r.Word}: {r.Count:n0}"))}\r\n");
​
        return results;
    }
​
    public abstract List<WordOveruse> Run();
    
    protected string GetNextWord() {
        _stopWatch.Stop();
        var result = _wordParser.GetNextWord();
        _totalWords++;
        _stopWatch.Start();
        return result;
    }
}
​
/// <summary>
/// Facilitates parsing of a file into words.
/// Uses a simple algorithm (e.g., it does not keep word-parts together when they are separated by apostrophe or dash).
/// </summary>
public class WordParser : IDisposable {
​
    // Minimum length for a string to be considered a word.
    const int _minWordLength = 2;
​
    Stream _s;
    GZipStream _gz;
    StreamReader _sr;
​
    public WordParser(string filePath) {
        //_sr = File.OpenText(filePath);
        if (!File.Exists(filePath)) {
            throw new InvalidOperationException($"File {filePath} was not found.");
        }
        if (Path.GetExtension(filePath).Equals(".gz", StringComparison.InvariantCultureIgnoreCase)) {
            _s = new FileStream(filePath, FileMode.Open, FileAccess.Read);
            _gz = new GZipStream(_s, CompressionMode.Decompress);
            _sr = new StreamReader(_gz, Encoding.UTF8, true, 4096 * 4096);
        } else {
            _sr = new StreamReader(filePath, Encoding.UTF8, true, 4096 * 4096);
        }
    }
​
    /// <summary>
    /// Returns the next word, or null if at the end of the text.
    /// </summary>
    public string GetNextWord() {
        while (true) {
            StringBuilder sb = new StringBuilder();
            bool isEnd = false;
            while (true) {
                var currentByte = _sr.Read();
                if (currentByte == -1) {
                    // End of stream
                    isEnd = true;
                    break;
                }
​
                var currentChar = (char)currentByte;
                if (char.IsWhiteSpace(currentChar) || char.IsPunctuation(currentChar)) {
                    break;
                }
​
                sb.Append(currentChar);
            }
​
            var word = sb.ToString().ToLower();
            if (word.Length == 0 && isEnd) {
                return null;
            }
​
            if (IsWord(word)) {
                return word;
            }
            
            // Not a word. Ignore and continue reading.
        }
    }  
   
    bool IsWord(string word) {
        if (word.Length < _minWordLength) {
            return false;
        }
        foreach (var currentChar in word) {
            if (!char.IsLetter(currentChar)) {
                return false;
            }
        }
        return true;
    }
​
    #region IDisposable Support
    private bool disposedValue = false; // To detect redundant calls
​
    protected virtual void Dispose(bool disposing) {
        if (!disposedValue) {
            if (disposing) {
                if (_sr != null) {
                    _sr.Dispose();
                }
                if (_gz != null) {
                    _gz.Dispose();
                }
                if (_s != null) {
                    _s.Dispose();
                }
            }
​
            _sr = null;
            _gz = null;
            _s = null;
​
            disposedValue = true;
        }
    }
​
    // This code added to correctly implement the disposable pattern.
    public void Dispose() {
        // Do not change this code. Put cleanup code in Dispose(bool disposing) above.
        Dispose(true);
    }
    #endregion
}
​
/// <summary>
/// Downloads a file from a URL and saves it locally.
/// </summary>
public static class FileDownloader {
    public static async Task DownloadFile(string url, string targetFilePath) {
        // validation
        using (var httpClient = new HttpClient()) {
            var response = await httpClient.GetAsync(url);
            response.EnsureSuccessStatusCode();
            using (var ms = await response.Content.ReadAsStreamAsync())
            using (var fs = File.Create(targetFilePath))
            using (var gz = new GZipStream(fs, CompressionMode.Compress)) {
                ms.Seek(0, SeekOrigin.Begin);
                ms.CopyTo(gz);
            }
        }
    }
}

Bienvenido
Ingrese el Nombre de Usuario:

Cached Result
Last Run:	8:23:33 pm
Compile:	0.11s
Execute:	0s
Memory:	32kb
CPU:	0.016s

View IL Code