c# - asp.net mvc 中的服务器端 html 过滤库

标签 c# asp.net-mvc-4 tinymce

我正在使用 TinyMCE。我需要服务器端 Html 输入过滤功能或库。 TinyMCE推荐

所有这些仅与 PHP 兼容。但我正在使用 asp.net MVC

请告诉我,asp.net MVC中有没有可用的过滤属性

比如这样

[HtmlInputFilter] //<<--
public ActionResult Post(string html){
...
}

最佳答案

您可以使用Microsoft AntiXSS库来清理 HTML。

如果您想处理自己的标签和属性白名单,AntiXSS 库不适合,因为它不为您提供必要的自定义 Hook 。一种可能性是自己卷装 sanitizer (当然,风险由您自己承担)。例如,您可以使用 HTML Agility Pack为了那个原因。这是 blog post示例 sanitizer :

public static class HtmlUtility
{
    // Original list courtesy of Robert Beal :
    // http://www.robertbeal.com/37/sanitising-html

    private static readonly Dictionary<string, string[]> ValidHtmlTags =
        new Dictionary<string, string[]>
        {
            {"p", new string[]          {"style", "class", "align"}},
            {"div", new string[]        {"style", "class", "align"}},
            {"span", new string[]       {"style", "class"}},
            {"br", new string[]         {"style", "class"}},
            {"hr", new string[]         {"style", "class"}},
            {"label", new string[]      {"style", "class"}},

            {"h1", new string[]         {"style", "class"}},
            {"h2", new string[]         {"style", "class"}},
            {"h3", new string[]         {"style", "class"}},
            {"h4", new string[]         {"style", "class"}},
            {"h5", new string[]         {"style", "class"}},
            {"h6", new string[]         {"style", "class"}},

            {"font", new string[]       {"style", "class", "color", "face", "size"}},
            {"strong", new string[]     {"style", "class"}},
            {"b", new string[]          {"style", "class"}},
            {"em", new string[]         {"style", "class"}},
            {"i", new string[]          {"style", "class"}},
            {"u", new string[]          {"style", "class"}},
            {"strike", new string[]     {"style", "class"}},
            {"ol", new string[]         {"style", "class"}},
            {"ul", new string[]         {"style", "class"}},
            {"li", new string[]         {"style", "class"}},
            {"blockquote", new string[] {"style", "class"}},
            {"code", new string[]       {"style", "class"}},

            {"a", new string[]          {"style", "class", "href", "title"}},
            {"img", new string[]        {"style", "class", "src", "height", "width",
                "alt", "title", "hspace", "vspace", "border"}},

            {"table", new string[]      {"style", "class"}},
            {"thead", new string[]      {"style", "class"}},
            {"tbody", new string[]      {"style", "class"}},
            {"tfoot", new string[]      {"style", "class"}},
            {"th", new string[]         {"style", "class", "scope"}},
            {"tr", new string[]         {"style", "class"}},
            {"td", new string[]         {"style", "class", "colspan"}},

            {"q", new string[]          {"style", "class", "cite"}},
            {"cite", new string[]       {"style", "class"}},
            {"abbr", new string[]       {"style", "class"}},
            {"acronym", new string[]    {"style", "class"}},
            {"del", new string[]        {"style", "class"}},
            {"ins", new string[]        {"style", "class"}}
        };

    /// <summary>
    /// Takes raw HTML input and cleans against a whitelist
    /// </summary>
    /// <param name="source">Html source</param>
    /// <returns>Clean output</returns>
    public static string SanitizeHtml(string source)
    {
        HtmlDocument html = GetHtml(source);
        if (html == null) return String.Empty;

        // All the nodes
        HtmlNode allNodes = html.DocumentNode;

        // Select whitelist tag names
        string[] whitelist = (from kv in ValidHtmlTags
                              select kv.Key).ToArray();

        // Scrub tags not in whitelist
        CleanNodes(allNodes, whitelist);

        // Filter the attributes of the remaining
        foreach (KeyValuePair<string, string[]> tag in ValidHtmlTags)
        {
            IEnumerable<HtmlNode> nodes = (from n in allNodes.DescendantsAndSelf()
                                           where n.Name == tag.Key
                                           select n);

            if (nodes == null) continue;

            foreach (var n in nodes)
            {
                if (!n.HasAttributes) continue;

                // Get all the allowed attributes for this tag
                HtmlAttribute[] attr = n.Attributes.ToArray();
                foreach (HtmlAttribute a in attr)
                {
                    if (!tag.Value.Contains(a.Name))
                    {
                        a.Remove(); // Wasn't in the list
                    }
                    else
                    {
                        // AntiXss
                        a.Value =
                            Microsoft.Security.Application.Encoder.UrlPathEncode(a.Value);
                    }
                }
            }
        }

        return allNodes.InnerHtml;
    }

    /// <summary>
    /// Takes a raw source and removes all HTML tags
    /// </summary>
    /// <param name="source"></param>
    /// <returns></returns>
    public static string StripHtml(string source)
    {
        source = SanitizeHtml(source);

        // No need to continue if we have no clean Html
        if (String.IsNullOrEmpty(source))
            return String.Empty;

        HtmlDocument html = GetHtml(source);
        StringBuilder result = new StringBuilder();

        // For each node, extract only the innerText
        foreach (HtmlNode node in html.DocumentNode.ChildNodes)
            result.Append(node.InnerText);

        return result.ToString();
    }

    /// <summary>
    /// Recursively delete nodes not in the whitelist
    /// </summary>
    private static void CleanNodes(HtmlNode node, string[] whitelist)
    {
        if (node.NodeType == HtmlNodeType.Element)
        {
            if (!whitelist.Contains(node.Name))
            {
                node.ParentNode.RemoveChild(node);
                return; // We're done
            }
        }

        if (node.HasChildNodes)
            CleanChildren(node, whitelist);
    }

    /// <summary>
    /// Apply CleanNodes to each of the child nodes
    /// </summary>
    private static void CleanChildren(HtmlNode parent, string[] whitelist)
    {
        for (int i = parent.ChildNodes.Count - 1; i >= 0; i--)
            CleanNodes(parent.ChildNodes[i], whitelist);
    }

    /// <summary>
    /// Helper function that returns an HTML document from text
    /// </summary>
    private static HtmlDocument GetHtml(string source)
    {
        HtmlDocument html = new HtmlDocument();
        html.OptionFixNestedTags = true;
        html.OptionAutoCloseOnEnd = true;
        html.OptionDefaultStreamEncoding = Encoding.UTF8;

        html.LoadHtml(source);

        return html;
    }
}

关于c# - asp.net mvc 中的服务器端 html 过滤库,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/15216706/

相关文章:

jquery - Tinymce - 从菜单中删除 'FIle' 选项

c# - 如何打开左侧的行号

c# - 将模型传递给部分时出现类型转换错误

c# - 在 Entity Framework 中使用一个存储过程插入到两个表中

c# - 无法加载文件或程序集 'WebMatrix.Data`

asp.net-mvc - 我可以在实际项目中使用 Telerik ASP.NET MVC 扩展吗?

javascript - 删除 TinyMCE 内的 html、head、body 标签

javascript - TinyMCE:焦点更改后无法在 Mac 上写入特殊字符

java - 在 Camera2 api 上使用 captureBurst 时 JPEG 回调超时

c# - 为什么我们需要静态私有(private)数组来初始化其他非静态私有(private)数组字段?