caching - Varnish 4 VCL- strip 定义的查询字符串参数

标签 caching varnish varnish-vcl varnish-4

我目前正在使用Varnish 4作为网站上的反向代理缓存。但是我注意到,当使用查询字符串参数调用url时,它将绕过 Varnish 缓存。

例如:

  • www.mywebsite.com =缓存HIT
  • www.mywebsite.com?gclid=123 =缓存
    小姐

  • 我希望 Varnish 在确定网页的匹配项时忽略多个查询字符串参数,例如Google的跟踪参数。

    我将以下内容添加到我的VCL文件中,但是当我加载诸如www.mywebsite.com?gclid=123之类的URL时,我看到一个404页面,因此情况不太正确。
    # Normalize request url parameters before determining a page match.
    set req.url = regsuball(req.url, "((\?)|&)(gclid|gclsrc|utm_content|utm_term|utm_campaign|utm_medium|utm_source|_ga)=[^&]*", "");
    set req.url = regsub(req.url, "(\?&|\?|&)$", "");
    

    任何帮助将非常感激。

    这是完整的VCL文件:
    vcl 4.0;
    
    import std;
    # The minimal Varnish version is 4.0
    # For SSL offloading, pass the following header in your proxy server or load balancer: 'X-Forwarded-Proto: https'
    
    backend default {
        .host = "127.2.0.1";
        .port = "80";
        .first_byte_timeout = 6s;
    }
    
    acl purge {
        "localhost";
    }
    
    sub vcl_recv {
        if (req.method == "PURGE") {
            if (client.ip !~ purge) {
                return (synth(405, "Method not allowed"));
            }
            # To use the X-Pool header for purging varnish during automated deployments, make sure the X-Pool header
            # has been added to the response in your backend server config. This is used, for example, by the
            # capistrano-magento2 gem for purging old content from varnish during it's deploy routine.
            if (!req.http.X-Magento-Tags-Pattern && !req.http.X-Pool) {
                return (synth(400, "X-Magento-Tags-Pattern or X-Pool header required"));
            }
            if (req.http.X-Magento-Tags-Pattern) {
              ban("obj.http.X-Magento-Tags ~ " + req.http.X-Magento-Tags-Pattern);
            }
            if (req.http.X-Pool) {
              ban("obj.http.X-Pool ~ " + req.http.X-Pool);
            }
            return (synth(200, "Purged"));
        }
    
        if (req.method != "GET" &&
            req.method != "HEAD" &&
            req.method != "PUT" &&
            req.method != "POST" &&
            req.method != "TRACE" &&
            req.method != "OPTIONS" &&
            req.method != "DELETE") {
              /* Non-RFC2616 or CONNECT which is weird. */
              return (pipe);
        }
    
        # We only deal with GET and HEAD by default
        if (req.method != "GET" && req.method != "HEAD") {
            return (pass);
        }
    
        # Bypass shopping cart, checkout and search requests
        if (req.url ~ "/checkout" || req.url ~ "/catalogsearch") {
            return (pass);
        }
    
        # Bypass health check requests
        if (req.url ~ "/pub/health_check.php") {
            return (pass);
        }
    
        # Set initial grace period usage status
        set req.http.grace = "none";
    
        # normalize url in case of leading HTTP scheme and domain
        set req.url = regsub(req.url, "^http[s]?://", "");
    
        # Normalize request url parameters before determining a page match.
        # strip normalized parameters from query string
        set req.url = regsuball(req.url, "((\?)|&)(gclid|gclsrc|utm_content|utm_term|utm_campaign|utm_medium|utm_source|_ga)=[^&]*", "");
        set req.url = regsub(req.url, "(\?&|\?|&)$", "");
    
        # collect all cookies
        std.collect(req.http.Cookie);
    
        # Compression filter. See https://www.varnish-cache.org/trac/wiki/FAQ/Compression
        if (req.http.Accept-Encoding) {
            if (req.url ~ "\.(jpg|jpeg|png|gif|gz|tgz|bz2|tbz|mp3|ogg|swf|flv)$") {
                # No point in compressing these
                unset req.http.Accept-Encoding;
            } elsif (req.http.Accept-Encoding ~ "gzip") {
                set req.http.Accept-Encoding = "gzip";
            } elsif (req.http.Accept-Encoding ~ "deflate" && req.http.user-agent !~ "MSIE") {
                set req.http.Accept-Encoding = "deflate";
            } else {
                # unkown algorithm
                unset req.http.Accept-Encoding;
            }
        }
    
        # Static files caching
        if (req.url ~ "^/(pub/)?(media|static)/") {
            # Static files should not be cached by default
            return (pass);
    
            # But if you use a few locales and don't use CDN you can enable caching static files by commenting previous line (#return (pass);) and uncommenting next 3 lines
            #unset req.http.Https;
            #unset req.http.X-Forwarded-Proto;
            #unset req.http.Cookie;
        }
    
        return (hash);
    }
    
    sub vcl_hash {
        if (req.http.cookie ~ "X-Magento-Vary=") {
            hash_data(regsub(req.http.cookie, "^.*?X-Magento-Vary=([^;]+);*.*$", "\1"));
        }
    
        # For multi site configurations to not cache each other's content
        if (req.http.host) {
            hash_data(req.http.host);
        } else {
            hash_data(server.ip);
        }
    
        # To make sure http users don't see ssl warning
        if (req.http.X-Forwarded-Proto) {
            hash_data(req.http.X-Forwarded-Proto);
        }
    
    }
    
    sub vcl_backend_response {
    
        set beresp.grace = 3d;
    
        if (beresp.http.content-type ~ "text") {
            set beresp.do_esi = true;
        }
    
        if (bereq.url ~ "\.js$" || beresp.http.content-type ~ "text") {
            set beresp.do_gzip = true;
        }
    
        if (beresp.http.X-Magento-Debug) {
            set beresp.http.X-Magento-Cache-Control = beresp.http.Cache-Control;
        }
    
        # cache only successfully responses and 404s
        if (beresp.status != 200 && beresp.status != 404) {
            set beresp.ttl = 0s;
            set beresp.uncacheable = true;
            return (deliver);
        } elsif (beresp.http.Cache-Control ~ "private") {
            set beresp.uncacheable = true;
            set beresp.ttl = 86400s;
            return (deliver);
        }
    
        # validate if we need to cache it and prevent from setting cookie
        if (beresp.ttl > 0s && (bereq.method == "GET" || bereq.method == "HEAD")) {
            unset beresp.http.set-cookie;
        }
    
       # If page is not cacheable then bypass varnish for 2 minutes as Hit-For-Pass
       if (beresp.ttl <= 0s ||
           beresp.http.Surrogate-control ~ "no-store" ||
           (!beresp.http.Surrogate-Control &&
           beresp.http.Cache-Control ~ "no-cache|no-store") ||
           beresp.http.Vary == "*") {
           # Mark as Hit-For-Pass for the next 2 minutes
            set beresp.ttl = 120s;
            set beresp.uncacheable = true;
        }
    
        return (deliver);
    }
    
    sub vcl_deliver {
      set resp.http.X-Magento-Cache-Debug-Request-Url = req.url;
        if (resp.http.X-Magento-Debug) {
          # set the normalized request url as a http header if magento is in debug mode for easy debugging
    
            if (resp.http.x-varnish ~ " ") {
                set resp.http.X-Magento-Cache-Debug = "HIT";
                set resp.http.Grace = req.http.grace;
            } else {
                set resp.http.X-Magento-Cache-Debug = "MISS";
            }
        } else {
            unset resp.http.Age;
        }
    
        # unset resp.http.X-Magento-Debug;
        # unset resp.http.X-Magento-Tags;
        # unset resp.http.X-Powered-By;
        # unset resp.http.Server;
        # unset resp.http.X-Varnish;
        # unset resp.http.Via;
        # unset resp.http.Link;
    }
    
    sub vcl_hit {
        if (obj.ttl >= 0s) {
            # Hit within TTL period
            return (deliver);
        }
        if (std.healthy(req.backend_hint)) {
            if (obj.ttl + 300s > 0s) {
                # Hit after TTL expiration, but within grace period
                set req.http.grace = "normal (healthy server)";
                return (deliver);
            } else {
                # Hit after TTL and grace expiration
                return (fetch);
            }
        } else {
            # server is not healthy, retrieve from cache
            set req.http.grace = "unlimited (unhealthy server)";
            return (deliver);
        }
    }
    

    最佳答案

    也许以下将为您更好地工作:

    if (req.url ~ "(\?|&)(gclid|utm_[a-z]+)=") {
        set req.url = regsuball(req.url, "(gclid|utm_[a-z]+)=[-_A-z0-9+()%.]+&?", "");
        set req.url = regsub(req.url, "[?|&]+$", "");
    }
    

    最初发布here

    关于caching - Varnish 4 VCL- strip 定义的查询字符串参数,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/47376776/

    相关文章:

    jquery - 页面刷新后如何保留div内容

    正则表达式PCRE捕获URL中的多次出现查询字符串

    gzip - CSS并非总是被压缩,为什么?

    varnish - 无法在其他导演中使用Varnish导演

    ruby-on-rails - Varnish 缓存 http 301 302 header 位置重定向

    正则表达式匹配任何不是子模式的东西

    java - 选择并更新 hibernate 缓存表

    mysql - MySQL 如何从缓冲池中逐出页面?

    javascript - 客户端缓存(使用 JavaScript)

    ubuntu - 设置 Varnish Apache2 HTTP & HTTPS Ubuntu 16.04