Thursday 6 October 2022

.Net c# Read html string and get Attribute values of html tags:

public List<int> GetIds()
{
    string html = "<p>hi&nbsp;<a class=\"test\" id=\"8369\">Anil Kumar</a> how are you? <a class=\"test\" id=\"8370\">Manoj Kumar</a></p>";
    List<int> ids = new List<int>();
    string pattern = "<a class=\"test\" id=\"(.*?)\">(.*?)</a>";
    var rows = Regex.Matches(html, pattern, RegexOptions.Multiline);
    if (rows.Count > 0)
    {
        foreach (Match row in rows)
        {
            int id = GetAttributeValue<int>(row.Value, "id");
            ids.Add(id);
        }
    }
    return ids;
}

public static T GetAttributeValue<T>(string htmlString, string attributeName)
{
    string pattern = string.Format(@"(?<={0}="").*?(?="")", attributeName);
    Regex rgx = new Regex(pattern, RegexOptions.IgnoreCase);
    Match match = rgx.Match(htmlString);
    return JsonConvert.DeserializeObject<T>(match.Value); //Method of Newtonsoft.Json
}


No comments:

Post a Comment