xml - 解析XML Filebeat> Logstash> Elasticsearch

标签 xml elasticsearch logstash elastic-stack filebeat

目标:将带有嵌套数据的XML文件解析到不同的Elasticsearch文档中。

我选择使用logstash来帮助我,但是由于文件将位于不同的服务器上,因此我决定使用filebeat将这些文件提供给logstash。安装程序听起来不错。

但是,我似乎无法通过文件信号在一条消息中发送所有行,而是逐行获取它们:

{
    "@timestamp" => 2017-10-15T20:30:11.825Z,
        "offset" => 44,
      "@version" => "1",
          "beat" => {
            "name" => "bb1ee9b80d2d",
        "hostname" => "bb1ee9b80d2d",
         "version" => "6.0.0-rc1"
    },
          "host" => "bb1ee9b80d2d",
        "source" => "/mnt/log/test4.xml",
       "message" => "<?xml version=\"1.0\" encoding=\"iso-8859-1\"?>",
          "tags" => [
        [0] "beats_input_codec_plain_applied",
        [1] "_split_type_failure"
    ]
}
{
    "@timestamp" => 2017-10-15T20:30:11.825Z,
        "offset" => 108,
      "@version" => "1",
          "beat" => {
            "name" => "bb1ee9b80d2d",
        "hostname" => "bb1ee9b80d2d",
         "version" => "6.0.0-rc1"
    },
          "host" => "bb1ee9b80d2d",
        "source" => "/mnt/log/test4.xml",
       "message" => "<foo:statistics xsi:schemaLocation=\"http://www.foo.no foo.xsd\" ",
          "tags" => [
        [0] "beats_input_codec_plain_applied",
        [1] "_split_type_failure"
    ]
}
{
    "@timestamp" => 2017-10-15T20:30:11.825Z,
        "offset" => 141,
      "@version" => "1",
          "beat" => {
            "name" => "bb1ee9b80d2d",
        "hostname" => "bb1ee9b80d2d",
         "version" => "6.0.0-rc1"
    },
          "host" => "bb1ee9b80d2d",
        "source" => "/mnt/log/test4.xml",
       "message" => "  xmlns:foo=\"http://www.foo.no\" ",
          "tags" => [
        [0] "beats_input_codec_plain_applied",
        [1] "_split_type_failure"
    ]
}
{
    "@timestamp" => 2017-10-15T20:30:11.826Z,
        "offset" => 198,
      "@version" => "1",
          "beat" => {
            "name" => "bb1ee9b80d2d",
        "hostname" => "bb1ee9b80d2d",
         "version" => "6.0.0-rc1"
    },
          "host" => "bb1ee9b80d2d",
        "source" => "/mnt/log/test4.xml",
       "message" => "  xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\">",
          "tags" => [
        [0] "beats_input_codec_plain_applied",
        [1] "_split_type_failure"
    ]
}
{
    "@timestamp" => 2017-10-15T20:30:11.826Z,
        "offset" => 231,
      "@version" => "1",
          "beat" => {
            "name" => "bb1ee9b80d2d",
        "hostname" => "bb1ee9b80d2d",
         "version" => "6.0.0-rc1"
    },
          "host" => "bb1ee9b80d2d",
        "source" => "/mnt/log/test4.xml",
       "message" => "  <foo:version>1.0</foo:version>",
          "tags" => [
        [0] "beats_input_codec_plain_applied",
        [1] "_split_type_failure"
    ]
}
{
    "@timestamp" => 2017-10-15T20:30:11.826Z,
        "offset" => 258,
      "@version" => "1",
          "beat" => {
            "name" => "bb1ee9b80d2d",
        "hostname" => "bb1ee9b80d2d",
         "version" => "6.0.0-rc1"
    },
          "host" => "bb1ee9b80d2d",
        "source" => "/mnt/log/test4.xml",
       "message" => "  <foo:name>bar</foo:name>",
          "tags" => [
        [0] "beats_input_codec_plain_applied",
        [1] "_split_type_failure"
    ]
}
{
    "@timestamp" => 2017-10-15T20:30:11.826Z,
        "offset" => 313,
      "@version" => "1",
          "beat" => {
            "name" => "bb1ee9b80d2d",
        "hostname" => "bb1ee9b80d2d",
         "version" => "6.0.0-rc1"
    },
          "host" => "bb1ee9b80d2d",
        "source" => "/mnt/log/test4.xml",
       "message" => "  <foo:start>2017-01-01T00:06:34.880+02:00</foo:start>",
          "tags" => [
        [0] "beats_input_codec_plain_applied",
        [1] "_split_type_failure"
    ]
}
{
    "@timestamp" => 2017-10-15T20:30:11.826Z,
        "offset" => 366,
      "@version" => "1",
          "beat" => {
            "name" => "bb1ee9b80d2d",
        "hostname" => "bb1ee9b80d2d",
         "version" => "6.0.0-rc1"
    },
          "host" => "bb1ee9b80d2d",
        "source" => "/mnt/log/test4.xml",
       "message" => "  <foo:stop>2017-05-01T00:06:34.880+02:00</foo:stop>",
          "tags" => [
        [0] "beats_input_codec_plain_applied",
        [1] "_split_type_failure"
    ]
}
{
    "@timestamp" => 2017-10-15T20:30:11.826Z,
        "offset" => 380,
      "@version" => "1",
          "beat" => {
            "name" => "bb1ee9b80d2d",
        "hostname" => "bb1ee9b80d2d",
         "version" => "6.0.0-rc1"
    },
          "host" => "bb1ee9b80d2d",
        "source" => "/mnt/log/test4.xml",
       "message" => "  <foo:place>",
          "tags" => [
        [0] "beats_input_codec_plain_applied",
        [1] "_split_type_failure"
    ]
}
{
    "@timestamp" => 2017-10-15T20:30:11.826Z,
        "offset" => 409,
      "@version" => "1",
          "beat" => {
            "name" => "bb1ee9b80d2d",
        "hostname" => "bb1ee9b80d2d",
         "version" => "6.0.0-rc1"
    },
          "host" => "bb1ee9b80d2d",
        "source" => "/mnt/log/test4.xml",
       "message" => "    <foo:name>baz</foo:name>",
          "tags" => [
        [0] "beats_input_codec_plain_applied",
        [1] "_split_type_failure"
    ]
}
{
    "@timestamp" => 2017-10-15T20:30:11.826Z,
        "offset" => 442,
      "@version" => "1",
          "beat" => {
            "name" => "bb1ee9b80d2d",
        "hostname" => "bb1ee9b80d2d",
         "version" => "6.0.0-rc1"
    },
          "host" => "bb1ee9b80d2d",
        "source" => "/mnt/log/test4.xml",
       "message" => "    <foo:id>1B445T4UV-W</foo:id>",
          "tags" => [
        [0] "beats_input_codec_plain_applied",
        [1] "_split_type_failure"
    ]
}
{
    "@timestamp" => 2017-10-15T20:30:11.826Z,
        "offset" => 457,
      "@version" => "1",
          "beat" => {
            "name" => "bb1ee9b80d2d",
        "hostname" => "bb1ee9b80d2d",
         "version" => "6.0.0-rc1"
    },
          "host" => "bb1ee9b80d2d",
        "source" => "/mnt/log/test4.xml",
       "message" => "  </foo:place>",
          "tags" => [
        [0] "beats_input_codec_plain_applied",
        [1] "_split_type_failure"
    ]
}
{
    "@timestamp" => 2017-10-15T20:30:11.826Z,
        "offset" => 471,
      "@version" => "1",
          "beat" => {
            "name" => "bb1ee9b80d2d",
        "hostname" => "bb1ee9b80d2d",
         "version" => "6.0.0-rc1"
    },
          "host" => "bb1ee9b80d2d",
        "source" => "/mnt/log/test4.xml",
       "message" => "  <foo:visit>",
          "tags" => [
        [0] "beats_input_codec_plain_applied",
        [1] "_split_type_failure"
    ]
}
{
    "@timestamp" => 2017-10-15T20:30:11.826Z,
        "offset" => 526,
      "@version" => "1",
          "beat" => {
            "name" => "bb1ee9b80d2d",
        "hostname" => "bb1ee9b80d2d",
         "version" => "6.0.0-rc1"
    },
          "host" => "bb1ee9b80d2d",
        "source" => "/mnt/log/test4.xml",
       "message" => "    <foo:date>2017-04-17T04:06:34.880+02:00</foo:date>",
          "tags" => [
        [0] "beats_input_codec_plain_applied",
        [1] "_split_type_failure"
    ]
}
{
    "@timestamp" => 2017-10-15T20:30:11.826Z,
        "offset" => 557,
      "@version" => "1",
          "beat" => {
            "name" => "bb1ee9b80d2d",
        "hostname" => "bb1ee9b80d2d",
         "version" => "6.0.0-rc1"
    },
          "host" => "bb1ee9b80d2d",
        "source" => "/mnt/log/test4.xml",
       "message" => "    <foo:status>2</foo:status>",
          "tags" => [
        [0] "beats_input_codec_plain_applied",
        [1] "_split_type_failure"
    ]
}
{
    "@timestamp" => 2017-10-15T20:30:11.826Z,
        "offset" => 572,
      "@version" => "1",
          "beat" => {
            "name" => "bb1ee9b80d2d",
        "hostname" => "bb1ee9b80d2d",
         "version" => "6.0.0-rc1"
    },
          "host" => "bb1ee9b80d2d",
        "source" => "/mnt/log/test4.xml",
       "message" => "  </foo:visit>",
          "tags" => [
        [0] "beats_input_codec_plain_applied",
        [1] "_split_type_failure"
    ]
}
{
    "@timestamp" => 2017-10-15T20:30:11.826Z,
        "offset" => 586,
      "@version" => "1",
          "beat" => {
            "name" => "bb1ee9b80d2d",
        "hostname" => "bb1ee9b80d2d",
         "version" => "6.0.0-rc1"
    },
          "host" => "bb1ee9b80d2d",
        "source" => "/mnt/log/test4.xml",
       "message" => "  <foo:visit>",
          "tags" => [
        [0] "beats_input_codec_plain_applied",
        [1] "_split_type_failure"
    ]
}
{
    "@timestamp" => 2017-10-15T20:30:11.826Z,
        "offset" => 641,
      "@version" => "1",
          "beat" => {
            "name" => "bb1ee9b80d2d",
        "hostname" => "bb1ee9b80d2d",
         "version" => "6.0.0-rc1"
    },
          "host" => "bb1ee9b80d2d",
        "source" => "/mnt/log/test4.xml",
       "message" => "    <foo:date>2017-04-18T04:06:34.880+02:00</foo:date>",
          "tags" => [
        [0] "beats_input_codec_plain_applied",
        [1] "_split_type_failure"
    ]
}
{
    "@timestamp" => 2017-10-15T20:30:11.826Z,
        "offset" => 672,
      "@version" => "1",
          "beat" => {
            "name" => "bb1ee9b80d2d",
        "hostname" => "bb1ee9b80d2d",
         "version" => "6.0.0-rc1"
    },
          "host" => "bb1ee9b80d2d",
        "source" => "/mnt/log/test4.xml",
       "message" => "    <foo:status>3</foo:status>",
          "tags" => [
        [0] "beats_input_codec_plain_applied",
        [1] "_split_type_failure"
    ]
}
{
    "@timestamp" => 2017-10-15T20:30:11.826Z,
        "offset" => 687,
      "@version" => "1",
          "beat" => {
            "name" => "bb1ee9b80d2d",
        "hostname" => "bb1ee9b80d2d",
         "version" => "6.0.0-rc1"
    },
          "host" => "bb1ee9b80d2d",
        "source" => "/mnt/log/test4.xml",
       "message" => "  </foo:visit>",
          "tags" => [
        [0] "beats_input_codec_plain_applied",
        [1] "_split_type_failure"
    ]
}
{
    "@timestamp" => 2017-10-15T20:30:11.826Z,
        "offset" => 701,
      "@version" => "1",
          "beat" => {
            "name" => "bb1ee9b80d2d",
        "hostname" => "bb1ee9b80d2d",
         "version" => "6.0.0-rc1"
    },
          "host" => "bb1ee9b80d2d",
        "source" => "/mnt/log/test4.xml",
       "message" => "  <foo:visit>",
          "tags" => [
        [0] "beats_input_codec_plain_applied",
        [1] "_split_type_failure"
    ]
}
{
    "@timestamp" => 2017-10-15T20:30:11.826Z,
        "offset" => 756,
      "@version" => "1",
          "beat" => {
            "name" => "bb1ee9b80d2d",
        "hostname" => "bb1ee9b80d2d",
         "version" => "6.0.0-rc1"
    },
          "host" => "bb1ee9b80d2d",
        "source" => "/mnt/log/test4.xml",
       "message" => "    <foo:date>2017-04-19T04:06:34.880+02:00</foo:date>",
          "tags" => [
        [0] "beats_input_codec_plain_applied",
        [1] "_split_type_failure"
    ]
}
{
    "@timestamp" => 2017-10-15T20:30:11.826Z,
        "offset" => 787,
      "@version" => "1",
          "beat" => {
            "name" => "bb1ee9b80d2d",
        "hostname" => "bb1ee9b80d2d",
         "version" => "6.0.0-rc1"
    },
          "host" => "bb1ee9b80d2d",
        "source" => "/mnt/log/test4.xml",
       "message" => "    <foo:status>1</foo:status>",
          "tags" => [
        [0] "beats_input_codec_plain_applied",
        [1] "_split_type_failure"
    ]
}
[2017-10-15T20:30:42,853][WARN ][logstash.filters.split   ] Only String and Array types are splittable. field:visits is of type = NilClass
{
    "@timestamp" => 2017-10-15T20:30:11.826Z,
        "offset" => 802,
      "@version" => "1",
          "beat" => {
            "name" => "bb1ee9b80d2d",
        "hostname" => "bb1ee9b80d2d",
         "version" => "6.0.0-rc1"
    },
          "host" => "bb1ee9b80d2d",
        "source" => "/mnt/log/test4.xml",
       "message" => "  </foo:visit>",
          "tags" => [
        [0] "beats_input_codec_plain_applied",
        [1] "_split_type_failure"
    ]
}

这是我的文件拍配置
filebeat.prospectors:
- type: log
  paths:
    - /mnt/log/*.xml
  multiline.pattern: '<?xml .*'
  multiline.negate: false
  multiline.match: after

output.logstash:
  hosts: ["logstash:5000"]

和我的XML文件:
<?xml version="1.0" encoding="iso-8859-1"?>
<foo:statistics xsi:schemaLocation="http://www.foo.no foo.xsd" 
  xmlns:foo="http://www.foo.no" 
  xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
  <foo:version>1.0</foo:version>
  <foo:name>bar</foo:name>
  <foo:start>2017-01-01T00:06:34.880+02:00</foo:start>
  <foo:stop>2017-05-01T00:06:34.880+02:00</foo:stop>
  <foo:place>
    <foo:name>baz</foo:name>
    <foo:id>1B445T4UV-W</foo:id>
  </foo:place>
  <foo:visit>
    <foo:date>2017-04-17T04:06:34.880+02:00</foo:date>
    <foo:status>2</foo:status>
  </foo:visit>
  <foo:visit>
    <foo:date>2017-04-18T04:06:34.880+02:00</foo:date>
    <foo:status>3</foo:status>
  </foo:visit>
  <foo:visit>
    <foo:date>2017-04-19T04:06:34.880+02:00</foo:date>
    <foo:status>1</foo:status>
  </foo:visit>
</foo:statistics>

我希望将整个文件传递到logstash,这是该文件的配置。
input {
    beats {
        port => 5000
    }
}

filter {
    xml {
        namespaces => {
            "foo" => "http://www.foo.no"
            "xsi" => "http://www.w3.org/2001/XMLSchema-instance"
        }
        source => "message"
        store_xml => "false"

        xpath => ["/foo:statistics/foo:start/text()", "start"]
        xpath => ["/foo:statistics/foo:stop/text()", "stop"]
        xpath => ["/foo:statistics/foo:place/name/text()", "place_name"]
        xpath => ["/foo:statistics/foo:place/id/text()", "place_id"]
        xpath => ["/foo:statistics/foo:visit", "visits"]
    }

    split {
        field => "visits"
        remove_field => "message"
    }

    xml {
        source => "visits"
        store_xml => "false"
        xpath => ["/foo:visit/foo:date/text()", "date"]
        xpath => ["/foo:visit/foo:status/text()", "status"]
        remove_field => "visits"                
    }

    date {
        match => ["date", "ISO8601"]
    }
}

output {
    stdout { codec => rubydebug }
    elasticsearch {
        hosts => "elasticsearch:9200"
        index => "maaling-%{+YYYY.MM.dd}"
    }
}

任何帮助深表感谢。

编辑:将样式更改为“

最佳答案

我将文件拍配置更改为:

filebeat.prospectors:
- type: log
  paths:
    - /mnt/log/*.ATKSTAT
  encoding: 'windows-1252'
  multiline.pattern: ' \A.*'
  multiline.negate: true
  multiline.match: after

output.logstash:
  hosts: ["logstash:5000"]

这是麋鹿堆栈的v6.0.0

关于xml - 解析XML Filebeat> Logstash> Elasticsearch,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/46760010/

相关文章:

ruby - 无法加载:无法将nil转换为String

javascript - 解析 Adob​​e InDesign 的 HTML 文本——其他标签内的标签

xml - 如何在 PowerShell 中添加 XML 命名空间作为属性前缀?

elasticsearch - Elasticsearch:跨数据路径传播文件

elasticsearch - 使用完整的建议器和德语分析器进行搜索

elasticsearch - Kibana没有使用Elasticsearch映射

elasticsearch - 如何使用json过滤器将我的json日志文件存储到logstash

javascript - 在 Javascript 中获取复杂命名空间下的数据

c# - 如何访问 SharePoint 映射文件夹下的 XML 文件以创建 Xml 阅读器?

elasticsearch - 如何在聚合中获取特定的 _source 字段