(string Htmlstring)能否保留<a> <b> <p> <strong> <img> <script src="/*.js"></script>
能否保留<a> <b> <p> <strong> <img> <script src="/*.js"></script>这几个标签呢
public static string NoHTML(string Htmlstring)
{
//删除脚本
Htmlstring = Regex.Replace(Htmlstring, @"<script[^>]*?>.*?</script>", "", RegexOptions.IgnoreCase);
//删除HTML
// Htmlstring.Replace("</p>", "</p>\r\n");
Htmlstring = Regex.Replace(Htmlstring, @"</p>", "</p>:hn", RegexOptions.IgnoreCase);
// Htmlstring = Regex.Replace(Htmlstring, @"<div>", "", RegexOptions.IgnoreCase);
// Htmlstring = Regex.Replace(Htmlstring, @"</div>", "", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"<(meta)[^>]*>", "", RegexOptions.IgnoreCase);
Htmlstring.Replace("<div>", "");
Htmlstring.Replace("</div>", "");
Htmlstring = Regex.Replace(Htmlstring, @"<(.[^>]*)>", "", RegexOptions.IgnoreCase);
// Htmlstring = Regex.Replace(Htmlstring, @"([\r\n])[\s]+", "<br>", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"-->", "", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"<!--.*", "", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"&(quot|#34);", """, RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"&(amp|#38);", "&", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"&(nbsp|#160);", " ", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"&(iexcl|#161);", "\xa1", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"&(cent|#162);", "\xa2", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"&(pound|#163);", "\xa3", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"&(copy|#169);", "\xa9", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"(\r\n)*", "", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"(\s)", "", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"&#(\d+);", "", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"&(rdquo);", "”", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"&(ldquo);", "“", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"&(mdash);", "—", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"&(hellip);", "…", RegexOptions.IgnoreCase);
Htmlstring.Replace("<", "");
Htmlstring.Replace(">", "");
Htmlstring = Regex.Replace(Htmlstring, @":hn", "<br> ", RegexOptions.IgnoreCase);
// Htmlstring.Replace(":hn", "<br>");
// Htmlstring = Regex.Replace(Htmlstring, @"&(lt|#60);", "<", RegexOptions.IgnoreCase);
// Htmlstring = Regex.Replace(Htmlstring, @"&(gt|#62);", ">", RegexOptions.IgnoreCase);
// Htmlstring = Regex.Replace(Htmlstring, @"<", "<", RegexOptions.IgnoreCase);
// Htmlstring = Regex.Replace(Htmlstring, @">", ">", RegexOptions.IgnoreCase);
// Htmlstring.Replace("<", "<");
// Htmlstring.Replace(">", ">");
// Htmlstring = System.Web.HttpContext.Current.Server.HtmlEncode(Htmlstring).Trim();
return Htmlstring;
}
[解决办法]
public static string GetTextFromHTML(string HTML)
{
System.Text.RegularExpressions.Regex regEx = new System.Text.RegularExpressions.Regex(@"</?(?!br
[解决办法]
/?p
[解决办法]
img
[解决办法]
string
[解决办法]
a
[解决办法]
script)[^>]*>", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
return regEx.Replace(HTML, "");
}