using System.Collections;
using System.Collections.Generic;
using System.Runtime.Serialization.Formatters;
using System.ComponentModel.DataAnnotations;
using System.Globalization;
using Newtonsoft.Json.Linq;
using Newtonsoft.Json.Converters;
using Newtonsoft.Json.Serialization;
public static void Test()
var doc = new HtmlDocument();
var h2nodes = doc.DocumentNode.SelectNodes("//h2[text() = 'Applicant' or text() = 'Agent']");
for (int i = 0, count = 2*(h2nodes.Count/2); i < count; i+=2)
var startnode = h2nodes[i];
var endnode = h2nodes[i+1];
var query = startnode.SelectNodes("./following::node()")
.TakeWhile(n => n != endnode)
.Where(n => n.Name == "dd");
var innerTexts = query.Select(n => n.InnerText).ToList();
Console.WriteLine("Query:");
Console.WriteLine(string.Format("{0} nodes found:", innerTexts.Count));
Console.WriteLine("Inner Texts:");
Console.WriteLine(JsonConvert.SerializeObject(innerTexts, Formatting.Indented));
Console.WriteLine("Nodes:");
foreach (var node in query)
Console.WriteLine(node.OuterHtml);
Assert.IsTrue(node.Name == "dd" && !node.InnerText.Contains("Don't Include Me"));
Assert.IsTrue(endnode.Line >= startnode.Line);
Assert.IsTrue(innerTexts.Count == 10);
static string GetHtml() =>
<body class=""some example"">
<div id=""main-container-z"">
<h2 class=""DontIncludeMe"">1</h3>
<h3 class=""DontIncludeMe"">1</h3>
<dl class=""Grid LeftCol"">
<dd>Don't Include Me</dd>
<!-- Applicants section -->
<h2 class=""GridTitle"">Applicant</h2>
<h3 class=""DataTitle"">1</h3>
<dl class=""Grid LeftCol"">
<dl class=""Grid RightCol"">
<dd>Some address here</dd>
<h3 class=""DataTitle"">2</h3>
<dl class=""Grid LeftCol"">
<dl class=""Grid RightCol"">
<dd>Some address here1</dd>
<h2 class=""GridTitle"">Agent</h2>
<h2 class=""AlsoDontIncludeMe"">1</h3>
<h3 class=""AlsoDontIncludeMe"">1</h3>
<dl class=""Grid LeftCol"">
<dd>Don't Include Me</dd>
public static void Main()
Console.WriteLine("Environment version: {0}", System.Runtime.InteropServices.RuntimeInformation.FrameworkDescription);
Console.WriteLine("Json.NET version: " + typeof(JsonSerializer).Assembly.FullName);
Console.WriteLine("HtmlAgilityPack version: " + typeof(HtmlDocument).Assembly.FullName);
Console.WriteLine("Failed with unhandled exception: ");
public static partial class HtmlAgilityPackExtensions
public static HtmlNode LowestCommonNode(this HtmlNode node1, HtmlNode node2)
if (node1 == null || node2 == null)
throw new ArgumentNullException();
if (node1.OwnerDocument != node2.OwnerDocument)
throw new ArgumentException();
return node1.AncestorsAndSelf().Reverse().Zip(node2.AncestorsAndSelf().Reverse()).Last(p => p.Item1 == p.Item2).Item1;
public static partial class HtmlAgilityPackExtensions
public static IEnumerable<HtmlNode> NextNodes(this HtmlNode start, bool includeSelf = false)
var current = start.FirstChild;
var next = current.FirstChild;
next = current.NextSibling;
for (var parent = current.ParentNode; parent != null && next == null; parent = parent.ParentNode)
next = parent.NextSibling;
public static IEnumerable<HtmlNode> NodesUntil(this HtmlNode start, HtmlNode end, bool includeSelf = false)
if (start == null || end == null)
throw new ArgumentNullException();
if (start.OwnerDocument != end.OwnerDocument)
throw new ArgumentException();
var query = start.NextNodes(includeSelf).TakeWhile(n => n != end);
query = query.Concat(new [] { end });