json - Elasticsearch(+ Kibana)中的 Fluentd 错误地解析了 Nginx json 日志

标签 json nginx logging kubernetes fluentd

我有一个 nginx 1.16.1 在 vanilla K8s(裸机)的 docker 中运行。

日志由 ln -sf /dev/stdout /var/log/nginx/access.log 转发到标准输出&然后转移到Elasticsearch docker.elastic.co/elasticsearch/elasticsearch:7.2.0通过 Fluentd fluentd-kubernetes-daemonset:v1.10-debian-elasticsearch7-1

我的 nginx.conf 中有以下内容:

http {
...
        log_format json_combined escape=json
          '{' 
            '"time_local":"$time_local",'
            '"remote_addr":"$remote_addr",'
            '"remote_user":"$remote_user",'
            '"request":"$request",'
            '"status": "$status",'
            '"body_bytes_sent":"$body_bytes_sent",'
            '"request_time":"$request_time",'
            '"http_referrer":"$http_referer",'
            '"http_user_agent":"$http_user_agent"'
          '}';

    access_log /var/log/nginx/access.log json_combined;
...

我的 fluidd 使用由 kubectl create configmap fluentd-kubernetes-conf --from-file=kubernetes.conf --namespace=kube-system 从原始 kubernetes.conf 创建的 configmap (完整提供,以防万一):

# AUTOMATICALLY GENERATED
# DO NOT EDIT THIS FILE DIRECTLY, USE /templates/conf/kubernetes.conf.erb

<label @FLUENT_LOG>
  <match fluent.**>
    @type null
  </match>
</label>

<source>
  @type tail
  @id in_tail_container_logs
  path /var/log/containers/*.log
  pos_file /var/log/fluentd-containers.log.pos
  tag "#{ENV['FLUENT_CONTAINER_TAIL_TAG'] || 'kubernetes.*'}"
  exclude_path "#{ENV['FLUENT_CONTAINER_TAIL_EXCLUDE_PATH'] || use_default}"
  read_from_head true
  <parse>
    @type "#{ENV['FLUENT_CONTAINER_TAIL_PARSER_TYPE'] || 'json'}"
    time_format %Y-%m-%dT%H:%M:%S.%NZ
  </parse>
</source>

<source>
  @type tail
  @id in_tail_minion
  path /var/log/salt/minion
  pos_file /var/log/fluentd-salt.pos
  tag salt
  <parse>
    @type regexp
    expression /^(?<time>[^ ]* [^ ,]*)[^\[]*\[[^\]]*\]\[(?<severity>[^ \]]*) *\] (?<message>.*)$/
    time_format %Y-%m-%d %H:%M:%S
  </parse>
</source>

<source>
  @type tail
  @id in_tail_startupscript
  path /var/log/startupscript.log
  pos_file /var/log/fluentd-startupscript.log.pos
  tag startupscript
  <parse>
    @type syslog
  </parse>
</source>

<source>
  @type tail
  @id in_tail_docker
  path /var/log/docker.log
  pos_file /var/log/fluentd-docker.log.pos
  tag docker
  <parse>
    @type regexp
    expression /^time="(?<time>[^)]*)" level=(?<severity>[^ ]*) msg="(?<message>[^"]*)"( err="(?<error>[^"]*)")?( statusCode=($<status_code>\d+))?/
  </parse>
</source>

<source>
  @type tail
  @id in_tail_etcd
  path /var/log/etcd.log
  pos_file /var/log/fluentd-etcd.log.pos
  tag etcd
  <parse>
    @type none
  </parse>
</source>

<source>
  @type tail
  @id in_tail_kubelet
  multiline_flush_interval 5s
  path /var/log/kubelet.log
  pos_file /var/log/fluentd-kubelet.log.pos
  tag kubelet
  <parse>
    @type kubernetes
  </parse>
</source>

<source>
  @type tail
  @id in_tail_kube_proxy
  multiline_flush_interval 5s
  path /var/log/kube-proxy.log
  pos_file /var/log/fluentd-kube-proxy.log.pos
  tag kube-proxy
  <parse>
    @type kubernetes
  </parse>
</source>

<source>
  @type tail
  @id in_tail_kube_apiserver
  multiline_flush_interval 5s
  path /var/log/kube-apiserver.log
  pos_file /var/log/fluentd-kube-apiserver.log.pos
  tag kube-apiserver
  <parse>
    @type kubernetes
  </parse>
</source>

<source>
  @type tail
  @id in_tail_kube_controller_manager
  multiline_flush_interval 5s
  path /var/log/kube-controller-manager.log
  pos_file /var/log/fluentd-kube-controller-manager.log.pos
  tag kube-controller-manager
  <parse>
    @type kubernetes
  </parse>
</source>

<source>
  @type tail
  @id in_tail_kube_scheduler
  multiline_flush_interval 5s
  path /var/log/kube-scheduler.log
  pos_file /var/log/fluentd-kube-scheduler.log.pos
  tag kube-scheduler
  <parse>
    @type kubernetes
  </parse>
</source>

<source>
  @type tail
  @id in_tail_rescheduler
  multiline_flush_interval 5s
  path /var/log/rescheduler.log
  pos_file /var/log/fluentd-rescheduler.log.pos
  tag rescheduler
  <parse>
    @type kubernetes
  </parse>
</source>

<source>
  @type tail
  @id in_tail_glbc
  multiline_flush_interval 5s
  path /var/log/glbc.log
  pos_file /var/log/fluentd-glbc.log.pos
  tag glbc
  <parse>
    @type kubernetes
  </parse>
</source>

<source>
  @type tail
  @id in_tail_cluster_autoscaler
  multiline_flush_interval 5s
  path /var/log/cluster-autoscaler.log
  pos_file /var/log/fluentd-cluster-autoscaler.log.pos
  tag cluster-autoscaler
  <parse>
    @type kubernetes
  </parse>
</source>

# Example:
# 2017-02-09T00:15:57.992775796Z AUDIT: id="90c73c7c-97d6-4b65-9461-f94606ff825f" ip="104.132.1.72" method="GET" user="kubecfg" as="<self>" asgroups="<lookup>" namespace="default" uri="/api/v1/namespaces/default/pods"
# 2017-02-09T00:15:57.993528822Z AUDIT: id="90c73c7c-97d6-4b65-9461-f94606ff825f" response="200"
<source>
  @type tail
  @id in_tail_kube_apiserver_audit
  multiline_flush_interval 5s
  path /var/log/kubernetes/kube-apiserver-audit.log
  pos_file /var/log/kube-apiserver-audit.log.pos
  tag kube-apiserver-audit
  <parse>
    @type multiline
    format_firstline /^\S+\s+AUDIT:/
    # Fields must be explicitly captured by name to be parsed into the record.
    # Fields may not always be present, and order may change, so this just looks
    # for a list of key="\"quoted\" value" pairs separated by spaces.
    # Unknown fields are ignored.
    # Note: We can't separate query/response lines as format1/format2 because
    #       they don't always come one after the other for a given query.
    format1 /^(?<time>\S+) AUDIT:(?: (?:id="(?<id>(?:[^"\\]|\\.)*)"|ip="(?<ip>(?:[^"\\]|\\.)*)"|method="(?<method>(?:[^"\\]|\\.)*)"|user="(?<user>(?:[^"\\]|\\.)*)"|groups="(?<groups>(?:[^"\\]|\\.)*)"|as="(?<as>(?:[^"\\]|\\.)*)"|asgroups="(?<asgroups>(?:[^"\\]|\\.)*)"|namespace="(?<namespace>(?:[^"\\]|\\.)*)"|uri="(?<uri>(?:[^"\\]|\\.)*)"|response="(?<response>(?:[^"\\]|\\.)*)"|\w+="(?:[^"\\]|\\.)*"))*/
    time_format %Y-%m-%dT%T.%L%Z
  </parse>
</source>

<filter kubernetes.**>
  @type kubernetes_metadata
  @id filter_kube_metadata
  kubernetes_url "#{ENV['FLUENT_FILTER_KUBERNETES_URL'] || 'https://' + ENV.fetch('KUBERNETES_SERVICE_HOST') + ':' + ENV.fetch('KUBERNETES_SERVICE_PORT') + '/api'}"
  verify_ssl "#{ENV['KUBERNETES_VERIFY_SSL'] || true}"
  ca_file "#{ENV['KUBERNETES_CA_FILE']}"
</filter>

不幸的是,我在 Kibana 中看到以下 JSON 格式的内容:

  "_source": {
    "log": "{\"time_local\":\"25/Apr/2020:00:06:36 +0000\",\"remote_addr\":\"10.244.1.1\",\"remote_user\":\"\",\"request\":\"GET /health HTTP/1.1\",\"status\": \"200\",\"body_bytes_sent\":\"2\",\"request_time\":\"0.000\",\"http_referrer\":\"\",\"http_user_agent\":\"kube-probe/1.18\"}\n",
    "stream": "stdout",

(用“\”代替正确解析的日志)。

日志在标准输出中看起来正常 kubectl get logs podname &我也尝试设置 escape=none在 nginx.conf 中(没有区别),所以看起来是 fluidd 的配置问题。

您能告诉我到底应该在 fluidd 的 kubernetes.conf 中更改/添加哪些内容才能获得正确解析的 json log

最佳答案

“问题”确实出在 kubernetes.conf 中的 fluidd 配置 https://github.com/fluent/fluentd-kubernetes-daemonset...

我已将 source 中的 parse 类型从 json 更改为 json_in_json,这使我能够获取所有日志解析正确!

<source>
  @type tail
  @id in_tail_container_logs
  path /var/log/containers/*.log
  pos_file /var/log/fluentd-containers.log.pos
  tag "#{ENV['FLUENT_CONTAINER_TAIL_TAG'] || 'kubernetes.*'}"
  exclude_path "#{ENV['FLUENT_CONTAINER_TAIL_EXCLUDE_PATH'] || use_default}"
  read_from_head true
  <parse>
    @type "#{ENV['FLUENT_CONTAINER_TAIL_PARSER_TYPE'] || 'json_in_json'}"
    time_format %Y-%m-%dT%H:%M:%S.%NZ
  </parse>
</source>

https://github.com/alikhil/fluent-plugin-json-in-json/

什么没有帮助: key_name 日志 reserve_data true hash_value_field 日志 https://docs.fluentd.org/v/0.12/filter/parser

关于json - Elasticsearch(+ Kibana)中的 Fluentd 错误地解析了 Nginx json 日志,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/61419439/

相关文章:

python - 将json dict转换为pandas df中的行

python - 如何将 WSGI 与 Python/Flask 应用程序一起使用 - HasGeek Lastuser?

grails - 在文件中未生成日志

json - Postgres JSON 等效于 HSTORE 减法运算符

java - 有没有办法将任意数据结构与 GSON 解析器相关联?

java - 在 Android 中使用日出/日落 REST API。如何正确传递获取到的位置坐标的URL?

nginx - 使用 nginx 软件重新流式传输另一个流

mysql - Laravel 时区问题

java - Log4J2 同步记录器比混合异步/同步记录器更快

java - 回退 : does not creates log file