Filebeat and Logstash config

The logs collect workflow

  • Filebeat gather the file path log files then send to Logstash 5044 port
  • The logstash receive and transform extract fields and send to Elasticsearch

The Filebeat config file is: /etc/filebeat.yml

  • installing the Filebeat script:
 1#!/bin/bash
 2FILEBEAT_NAME=filebeat-6.7.2-x86_64.rpm
 3# install filebeat
 4curl -L -O https://artifacts.elastic.co/downloads/beats/filebeat/${FILEBEAT_NAME}
 5sudo rpm -vi ${FILEBEAT_NAME} && sudo rm ${FILEBEAT_NAME}
 6sudo chkconfig --add filebeat
 7sudo chkconfig filebeat on
 8sudo mv -f filebeat.yml /etc/filebeat/filebeat.yml
 9sudo chown root:root /etc/filebeat/filebeat.yml
10sudo chmod go-w /etc/filebeat/filebeat.yml
11sudo service filebeat restart
  • the filebeat.yml
 1filebeat.prospectors:
 2- type: log
 3  paths:
 4    - /var/applog/**/*.log
 5    - /var/www/webapp/logs/**/*.log
 6  multiline:
 7    pattern: '^\d+-\d+-\d+ \d+:\d+:\d+\.\d+|^\d+\.\d+\.\d+\.\d+|^\[\d+-\d+-\d+ \d+:\d+:\d+\]|^\d+:\d+:\d+.\d+|^\d+-\d+-\d+T\d+:\d+:\d+.\d+\+\d+:\d+|^\d{4}\\\d{2}\\\d{2} \d{2}:\d{2}:\d{2} \[|^\d+-\d+-\d+ \d+:\d+:\d+|^\d+:\d+:\d+|^\[\d+-[[:alpha:]]+-\d+ \d+:\d+:\d+\] '
 8    negate: true
 9    match: after
10
11# Logstash
12output.logstash:
13    hosts: 'logstash.dev.com:5044'

installing the Logstash

  • the logstash Dockerfile
1FROM docker.elastic.co/logstash/logstash-oss:6.3.2
2RUN rm -f /usr/share/logstash/pipeline/logstash.conf
3RUN /usr/share/logstash/bin/logstash-plugin install logstash-input-s3 && \
4    /usr/share/logstash/bin/logstash-plugin install logstash-input-cloudwatch_logs && \
5    /usr/share/logstash/bin/logstash-plugin update logstash-output-elasticsearch
6    # /usr/share/logstash/bin/logstash-plugin install logstash-output-amazon_es
7
8ADD pipeline/ /usr/share/logstash/pipeline/
9ADD config/ /usr/share/logstash/config/
  • the config/jvm.options
1-Djava.awt.headless=true
2-Dfile.encoding=UTF-8
3-Djruby.compile.invokedynamic=true
4-Djruby.jit.threshold=0
5-XX:+HeapDumpOnOutOfMemoryError
6-Djava.security.egd=file:/dev/urandom
  • the pipeline/logstash.conf
  1input {
  2  beats {
  3    port => 5044
  4    client_inactivity_timeout => 600
  5  }
  6}
  7
  8# input {
  9#  cloudwatch_logs {
 10#         log_group => ["/aws/lambda/service"]
 11#         type => "lambda"
 12#         sincedb_path => "/usr/share/logstash/sincedb"
 13#         interval => 30
 14#   }
 15# }
 16
 17# input {
 18        # s3 {
 19        #     bucket => "${Env:dev}-elb-accesslog"
 20        #     prefix => "develop"
 21        #     type => "elblogs"
 22        #     sincedb_path => "/usr/share/logstash/develop.sincedb"
 23        #     delete => true
 24        #     id => "develop"
 25        #     add_field => {"app_id" => "${Env:dev}-develop"}
 26        # }
 27# }
 28
 29filter {
 30	  if [source] =~ /^\/var\/log\/secure/ {
 31    	grok {
 32      	match => {
 33        	"source" => "^\/var\/log\/secure"
 34        	"message" => [".*sshd.*"]
 35      	}
 36    	}
 37  	}
 38  
 39   if [source] =~ /^\/var\/www\/webapp\// {
 40      grok {
 41        match => {
 42                  "source" => "^/var/www/webapp/logs/(?<app_id>[^/]+)"
 43              }
 44          }
 45     grok {
 46         match => {
 47                "message" => ["%{TIMESTAMP_ISO8601:log_time} (?<log_level>[^\s]+)\s+\[(?<ecs_cluster>[^\s]+)\](.*)?"]
 48         }
 49      }
 50
 51    date {
 52        match => ["log_time", "ISO8601"]
 53        locale => "en"
 54        timezone => "Atlantic/Stanley"
 55        target => "log_timestamp"
 56    }
 57  }
 58  
 59  if [source] =~ /.*?nginx.*?/ {
 60    grok {
 61     match => {
 62        "message" => ["%{IPORHOST:remote_addr} - %{USERNAME:remote_user} \[%{HTTPDATE:time_local}\] \"%{DATA:request}\" %{INT:status} %{NUMBER:bytes_sent} \"%{DATA:http_referer}\" \"%{DATA:http_user_agent}\" \"%{DATA:http_x_forwarded_for}\" rt=\"(?:%{NUMBER:request_time}|-)\" uct=\"(?:%{NUMBER:upstream_connect_time}|-)\" uht=\"(?:%{NUMBER:upstream_header_time}|-)\" urt=\"(?:%{NUMBER:upstream_response_time}|-)\""]
 63      }
 64    }
 65  }
 66  
 67  if [type] == "lambda" {
 68        grok {
 69          match => {
 70               "[cloudwatch_logs][log_group]" => "^/aws/lambda/(?<app_id>[^/]+)"
 71          }
 72        }
 73  }
 74  
 75mutate {
 76 lowercase => ["app_id","type"]
 77 convert => ['request_time', 'float']
 78 convert => ['upstream_connect_time', 'float']
 79 convert => ['upstream_response_time', 'float']
 80 convert => ['upstream_header_time', 'float']
 81 remove_field => [ "[host]" ]
 82}
 83
 84mutate {
 85    add_field => {
 86      "host" => "%{[beat][hostname]}"
 87      "env" => "${Env:dev}"
 88    }
 89  }
 90}
 91  
 92  
 93}
 94
 95output {
 96  # amazon_es {
 97  #   hosts => ["${ES_URL:es.dev.com}"]
 98  #   region => "${REGION:us-east-1}"
 99  #   index => "%{[app_id]}-%{+YYYY}-%{+MM}"
100  # }
101
102  elasticsearch {
103    hosts => ["https://${ES_URL:es.dev.com}:443"]
104    index => "%{[app_id]}-%{+YYYY}-%{+MM}"
105    ssl => true
106  }
107}
  • the config/logstash.yml
1http.host: "0.0.0.0"
2http.port: 9600

launch the logstash docker container parameters

1ContainerPort:
2  - 5044
3  - 9600
4Memory: 2048
5Command:
6  - LS_JAVA_OPTS='-XX:+UseG1GC -XX:MaxMetaspaceSize=250m -XX:CompressedClassSpaceSize=50m -XX:MaxRAMPercentage=50.0 -XX:MinRAMPercentage=50.0' /usr/share/logstash/bin/logstash
7Environment:
8    ES_URL: ${elasticsearchDomain}
9    S3_BUCKET: ${ElbLogs}