asp教程.net 正则替换html标签与删除指定字符方法,使用了正则表达式进行规则过滤,由于html标记都是基于<>这种格式,而且还有类似 这样的符号,所以分了2次处理将字符串处理为无html格式的字符串。
public string NoHtml(string html)
{
string StrNohtml = System.Text.RegularExpressions.Regex.Replace(html, "<[^>]+>", "");
StrNohtml = System.Text.RegularExpressions.Regex.Replace(StrNohtml, "&[^;]+;", "");
return StrNohtml;
}
功能增强代码:
public string NoHTML(string Htmlstring) //替换HTML标记
{
//删除脚本
Htmlstring = Regex.Replace(Htmlstring, @"<script[^>]*?>.*?</script>", "", RegexOptions.IgnoreCase);//删除HTML
Htmlstring = Regex.Replace(Htmlstring, @"<(.[^>]*)>", "", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"([rn])[s]+", "", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"-->", "", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"<!--.*", "", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"&(quot|#34);", """, RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"&(amp|#38);", "&", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"&(lt|#60);", "<", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"&(gt|#62);", ">", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"&(nbsp|#160);", " ", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"&(iexcl|#161);", "xa1", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"&(cent|#162);", "xa2", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"&(pound|#163);", "xa3", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"&(copy|#169);", "xa9", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"&#(d+);", "", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"<img[^>]*>;", "", RegexOptions.IgnoreCase);
Htmlstring.Replace("<", "");
Htmlstring.Replace(">", "");
Htmlstring.Replace("rn", "");
Htmlstring = HttpContext.Current.Server.HtmlEncode(Htmlstring).Trim();
return Htmlstring;
}
替换字符串,正则替换字符串,不区分大小写替换字符串,替换html标签,正则匹配
using System;
using System.Text;
using System.Text.RegularExpressions;namespace EC
{
/// <summary>
/// 替换字符串
/// </summary>
public class StringRepStrs
{
public StringRepStrs ()
{
}#region 普通替换字符串
/// <summary>
/// 普通替换字符串/// </summary>
/// <param name="src">源字符串</param>
/// <param name="pattern">要匹配的正则表达式模式</param>
/// <param name="replacement">替换字符串</param>
/// <returns>已修改的字符串</returns>
public static string Replace (string src, string pattern, string replacement)
{
return Replace(src, pattern, replacement, RegexOptions.None);
}#endregion
#region 正则替换字符串
/// <summary>
/// 正则替换字符串/// </summary>
/// <param name="src">要修改的字符串</param>
/// <param name="pattern">要匹配的正则表达式模式</param>
/// <param name="replacement">替换字符串</param>
/// <param name="options">匹配模式</param>
/// <returns>已修改的字符串</returns>
public static string Replace (string src, string pattern, string replacement, RegexOptions options)
{
Regex regex = new Regex(pattern, options|RegexOptions.Compiled);return regex.Replace(src, replacement);
}#endregion
#region 不区分大小写替换字符串
/// <summary>
/// 不区分大小写替换字符串/// </summary>
/// <param name="src">源字符串</param>
/// <param name="pattern">要匹配的正则表达式模式</param>
/// <param name="replacement">替换字符串</param>
/// <returns>已修改的字符串</returns>
public static string ReplaceIgnoreCase (string src, string pattern, string replacement)
{
return Replace(src, pattern, replacement, RegexOptions.IgnoreCase);
}#endregion
/// <summary>
/// 删除字符串中指定的内容
/// </summary>
/// <param name="src">要修改的字符串</param>
/// <param name="pattern">要删除的正则表达式模式</param>
/// <returns>已删除指定内容的字符串</returns>
public static string Drop (string src, string pattern)
{
return Replace(src, pattern, "");
}
/// <summary>
/// 删除字符串中指定的内容,不区分大小写
/// </summary>
/// <param name="src">要修改的字符串</param>
/// <param name="pattern">要删除的正则表达式模式</param>
/// <returns>已删除指定内容的字符串</returns>
public static string DropIgnoreCase (string src, string pattern)
{
return ReplaceIgnoreCase(src, pattern, "");
}/// <summary>
/// 替换字符串到数据库教程可输入模式
/// </summary>
/// <param name="src">待插入数据库的字符串</param>
/// <returns>可插入数据库的字符串</returns>
public static string ToSQL (string src)
{
if (src == null)
{
return null;
}
return Replace(src, "", "");
}/// <summary>
/// 去掉html内容中的指定的html标签
/// </summary>
/// <param name="content">html内容</param>
/// <param name="tagName">html标签</param>
/// <returns>去掉标签的内容</returns>
public static string DropHtmlTag (string content, string tagName)
{
//去掉<tagname>和</tagname>
return DropIgnoreCase(content, "<[/]{0,1}" + tagName + "[^>]*>");
}/// <summary>
/// 去掉html内容中全部标签
/// </summary>
/// <param name="content">html内容</param>
/// <returns>去掉html标签的内容</returns>
public static string DropHtmlTag (string content)
{
//去掉<*>
return Drop(content, "<[^>]*>");
}/// <summary>
/// 判断一个数据是不是数字
/// </summary>
/// <param name="inputData">字符串</param>
/// <returns>结果</returns>
public static bool IsNumeric(string inputData)
{
Regex _isNumber = new Regex(@"^d+{$article$}quot;);
Match m = _isNumber.Match(inputData);
return m.Success;
}/// <summary>
/// 转换html标签为web页可见内容
/// </summary>
/// <param name="src"></param>
/// <returns></returns>
public static string EscapeHtml (string src)
{
if (src == null)
{
return null;
}
string s = src;
s = Replace(s, ">", ">");
s = Replace(s, "<", "<");
return s;
}/// <summary>
/// 将字符串格式化成HTML代码
/// </summary>
/// <param name="str">要格式化的字符串</param>
/// <returns>格式化后的字符串</returns>
public static String ToHtml(string str)
{
if (str == null || str.Equals(""))
{
return str;
}
StringBuilder sb= new StringBuilder(str);
sb.Replace("&", "&");
sb.Replace("<", "<");
sb.Replace(">", ">");
sb.Replace("rn", "<br>");
sb.Replace("n", "<br>");
sb.Replace("t", " ");
sb.Replace(" ", " ");
return sb.ToString();
}/// <summary>
/// 将HTML代码转化成文本格式
/// </summary>
/// <param name="str">要格式化的字符串</param>
/// <returns>格式化后的字符串</returns>
public static String ToTxt(String str)
{
if (str == null || str.Equals(""))
{
return str;
}
StringBuilder sb= new StringBuilder(str);
sb.Replace(" ", " ");
sb.Replace("<br>", "rn");
sb.Replace("<", "<");
sb.Replace(">", ">");
sb.Replace("&", "&");
return sb.ToString();
}
}
}