using System.Text.RegularExpressions;
using System.Collections.Generic;
public static void Main()
string input = "http://www.mywebsite1.com/yahoo.org/search=xyz https://www.mywebsite2.com/www.yahoo.org/search=xyz http://mywebsite3.com/msn.co.uk' https://mywebsite4.com/http://msn.co.uk' Matches: regexlib.com | this.is.a.museum | 3com.com Non-Matches: notadomain-.com | helloworld.c | .oops.org Matches: this.com | https://this.com:8080/this/this.htm | ftp://255.255.255.255/ Non-Matches: .this.com | https://this.com:/ | ftps://255.256.255.255/ Matches: http://www.website.com/index.html | www.website.com | website.com Non-Matches: Works in all my tests. Does not capture protocol. Matches: www.blah.com:8103 | www.blah.com/blah.asp?sort=ASC | www.blah.com/blah.htm#blah Non-Matches: www.state.ga | http://www.blah.ru Matches: http://www.acme.com | ftp://ftp.acme.com/hede | gopher://asdfasd.asdfasdf Non-Matches: <a href="http://acme.com">http://www.acme.com</a> | <br>http://www.acme. Matches: http://www.acme.com | ftp://ftp.acme.com/hede | gopher://asdfasd.asdfasdf Non-Matches: <a href="http://acme.com">http://www.acme.com</a> | <br>http://www.acme. Lorem ipsum dolor sit amet, consectetur adipiscing elit www.google.com Lorem ipsum dolor sit amet, consectetur adipiscing elit Lorem ipsum dolor sit amet, consectetur adipiscing elit, ... www.yahoo.com Lorem ipsum dolor sit amet, consectetur adipiscing elit, Lorem ipsum dolor sit amet, consectetur adipiscing elit, www.pyramidplatform.com Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Lorem ipsum Joe@the-Automator.com dolor sit amet, (555)555.8957 the-Automator.com consectetuer adipiscing elit. https://www.linkedin.com/in/joeglines/ @recruiting lorem ipsum dolor sit http://github.com/maestrith/AHK-Studio elit. dolor sit www.github.com/maestrith/AHK-Studio elit. nic.uk nic.uk/ http://nic.uk http://nic.uk/ https://example.com/test/?a=bcd Verifying they are valid URLs is another story! It would also match: index.php It would not match: directory/index.php";
string anchor = "<a href=\"google.com\" />";
List<string> output = Test(anchor);
foreach (var item in output)
Console.WriteLine("Item: " + item);
private static readonly Regex Pattern = new Regex("{(.*?)}", RegexOptions.Compiled);
public static List<string> Test(string input)
RegexOptions options = RegexOptions.IgnoreCase | RegexOptions.CultureInvariant;
List<string> fp3 = new List<string>();
foreach (var path in Regex.Matches(input, @"<a[^>]*? href=\(?<url>[^\]+)\[^>]*?>(?<text>.*?)</a>", options))
fp3.Add(path.ToString());