using System.Collections.Generic;
using System.Text.RegularExpressions;
public static void Main()
string url = "https://www.imdb.com/list/ls016522954/?ref_=nv_tvv_dvd&sort=release_date,desc&st_dt=&mode=detail&page=1";
using (var client = new WebClient())
string html = client.DownloadString(url);
var doc = new HtmlDocument();
var xpath = "//div[@class='lister-item mode-detail']";
var movieNodes = doc.DocumentNode.SelectNodes(xpath);
var movies = new List<Movie>();
for (int i = 0; i < 10; i++)
var movie = ParseMovieUsingXpath(movieNodes[i]);
Console.WriteLine("Parsed movies are:\n");
FiddleHelper.Dump(movies);
public static Movie ParseMovieUsingXpath(HtmlNode htmlNode)
var title = htmlNode.SelectSingleNode(".//h3[@class='lister-item-header']/a").InnerText.CleanParsedText();
var runtime = htmlNode.SelectSingleNode(".//span[@class='runtime']").InnerText.CleanParsedText();
var genre = htmlNode.SelectSingleNode(".//span[@class='genre']").InnerText.CleanParsedText();
var releaseDateNode = htmlNode.SelectSingleNode(".//div[@class='list-description']/p/b");
if (releaseDateNode != null)
releaseDate = releaseDateNode.InnerText.CleanParsedText();
releaseDate = htmlNode.SelectSingleNode(".//div[@class='list-description']/p").InnerText.CleanParsedText();
releaseDate = releaseDate.Substring(releaseDate.LastIndexOf(", ") + 2);
ReleaseDate = releaseDate
public string Title {get ; set;}
public string Genre {get; set;}
public string Runtime {get; set;}
public string ReleaseDate {get; set;}
public static class StringExtensions
static Regex TextCleanRegex = new Regex("\\s{2,}");
public static string CleanParsedText(this string text)
if (String.IsNullOrWhiteSpace(text))
var cleanText = HttpUtility.HtmlDecode(text);
cleanText = TextCleanRegex.Replace(cleanText, " ");
cleanText = cleanText.Trim();