namespace Gspann.Web.Helpers
public class HTMLCrawler : IComputedIndexField
public string FieldName { get; set; }
public string ReturnType { get; set; }
public object ComputeFieldValue(IIndexable indexable)
Assert.ArgumentNotNull(indexable, "indexable");
Item item = indexable as SitecoreIndexableItem;
using (new DatabaseSwitcher(item.Database))
if (item != null && item.IsPageForCrawling)
itemUrl = LinkManager.GetItemUrl(item, new UrlOptions()
AlwaysIncludeServerUrl = true,
LanguageEmbedding = LanguageEmbedding.Never
}).Replace("/sitecore/shell/MyWebsite/Home", "");
using (var client = new WebClient())
string webContent = client.DownloadString(itemUrl);
HtmlDocument htmlDocument = new HtmlDocument();
htmlDocument.LoadHtml(webContent);
string crawlableContent = GetOnlyContent(htmlDocument);