using System.Text.RegularExpressions;
public static void Main()
string newspaper_url = "https://www.hk01.com";
HtmlWeb hw = new HtmlWeb();
HtmlDocument doc = new HtmlDocument();
doc = hw.Load(@"https://www.hk01.com/search?q=%E3%80%8A%E6%97%85%E9%81%8A%E6%A5%AD%E6%A2%9D%E4%BE%8B%E8%8D%89%E6%A1%88%E3%80%8B");
string str_prev_link = "";
var body = doc.DocumentNode.SelectNodes("//div[@class='sc-bdVaJa jpNXjy']");
foreach (HtmlNode divnode in body)
string theclass = divnode.GetAttributeValue("class",string.Empty);
string keyclass = "sc-bdVaJa jpNXjy";
Console.WriteLine(theclass);
string the_html=divnode.InnerHtml;
Console.WriteLine(the_html);
int offset = the_html.IndexOf("\"");
offset = the_html.IndexOf("\"", offset+1);
int href_start = the_html.IndexOf("\"", offset+1);
int href_end = the_html.IndexOf("\"", href_start+1);
Console.WriteLine(href_start + " - " + href_end);
string art_url = the_html.Substring(href_start, href_end - href_start+1).Replace("\"","");
Console.WriteLine(art_url);
if(theclass == keyclass){
foreach (HtmlNode link in divnode.SelectNodes("//a[@href]"))
string str_link = art_url;
if(str_link != str_prev_link){
HtmlDocument doc2 = new HtmlDocument();
Console.WriteLine("Surf:" + newspaper_url + str_link + "...");
doc2 = hw.Load(newspaper_url+str_link);
foreach (HtmlNode link2 in doc2.DocumentNode.SelectNodes("//a[@href]"))
string str_link2 = link2.GetAttributeValue("href", string.Empty);
if(str_link2.StartsWith("/tag")){
Console.WriteLine(str_link2 + " - " + link2.InnerText);
str_prev_link = str_link;