作者: whooyun发表于: 2018-08-16 16:09
自己整理个各种脚本用来做nginx日志分析,虽然可以用,但是都不成体系,所以特意去体验下goaccess,虽然不是非常强大,但是做做uv,pv统计肯定是够了。
记录下安装过程:
centos7.4,goaccess1.2
1、安装goaccess
yum install goaccess
如果你的系统repository没有的话,那就去下个tar,自己编译安装也是可以的。
2、查看是否安装成功
goaccess -V
如果有显示版本号,说明安装成功。自己如果不知道在哪个目录就find下
3、配置/etc/goaccess.conf的时间格式,贴出生产系统的给大家参考下
######################################
# Time Format Options (required)
######################################
#
# The hour (24-hour clock) [00,23]; leading zeros are permitted but not required.
# The minute [00,59]; leading zeros are permitted but not required.
# The seconds [00,60]; leading zeros are permitted but not required.
# See `man strftime` for more details
#
# The following time format works with any of the
# Apache/NGINX's log formats below.
#
time-format %H:%M:%S
#
# Google Cloud Storage or
# The time in microseconds since the Unix epoch.
#
#time-format %f
# Squid native log format
#
#time-format %s
######################################
# Date Format Options (required)
######################################
#
# The date-format variable followed by a space, specifies
# the log format date containing any combination of regular
# characters and special format specifiers. They all begin with a
# percentage (%) sign. See `man strftime`
#
# The following date format works with any of the
# Apache/NGINX's log formats below.
#
date-format %d/%b/%Y
#
# AWS | Amazon CloudFront (Download Distribution)
# AWS | Elastic Load Balancing
# W3C (IIS)
#
#date-format %Y-%m-%d
#
# Google Cloud Storage or
# The time in microseconds since the Unix epoch.
#
#date-format %f
# Squid native log format
#
#date-format %s
######################################
# Log Format Options (required)
######################################
#
# The log-format variable followed by a space or \t for
# tab-delimited, specifies the log format string.
#
# NOTE: If the time/date is a timestamp in seconds or microseconds
# %x must be used instead of %d & %t to represent the date & time.
# NCSA Combined Log Format
#log-format %h %^[%d:%t %^] "%r" %s %b "%R" "%u"
# NCSA Combined Log Format with Virtual Host
#log-format %v:%^ %h %^[%d:%t %^] "%r" %s %b "%R" "%u"
# Common Log Format (CLF)
#log-format %h %^[%d:%t %^] "%r" %s %b
# Common Log Format (CLF) with Virtual Host
#log-format %v:%^ %h %^[%d:%t %^] "%r" %s %b
# W3C
#log-format %d %t %h %^ %^ %^ %^ %r %^ %s %b %^ %^ %u %R
# Squid native log format
#log-format %^ %^ %^ %v %^: %x.%^ %~%L %h %^/%s %b %m %U
# AWS | Amazon CloudFront (Download Distribution)
#log-format %d\t%t\t%^\t%b\t%h\t%m\t%^\t%r\t%s\t%R\t%u\t%^
# Google Cloud Storage
#log-format "%x","%h",%^,%^,"%m","%U","%s",%^,"%b","%D",%^,"%R","%u"
# AWS | Elastic Load Balancing
#log-format %dT%t.%^ %^ %h:%^ %^ %T %^ %^ %^ %s %^ %b "%r" "%u"
# AWSS3 | Amazon Simple Storage Service (S3)
#log-format %^[%d:%t %^] %h %^"%r" %s %^ %b %^ %L %^ "%R" "%u"
# Virtualmin Log Format with Virtual Host
#log-format %h %^ %v %^[%d:%t %^] "%r" %s %b "%R" "%u"
# In addition to specifying the raw log/date/time formats, for
# simplicity, any of the following predefined log format names can be
# supplied to the log/date/time-format variables. GoAccess can also
# handle one predefined name in one variable and another predefined
# name in another variable.
#
log-format COMBINED
#log-format VCOMBINED
#log-format COMMON
#log-format VCOMMON
#log-format W3C
#log-format SQUID
#log-format CLOUDFRONT
#log-format CLOUDSTORAGE
#log-format AWSELB
#log-format AWSS3
######################################
# UI Options
######################################
# Choose among color schemes
# 1 : Monochrome
# 2 : Green
# 3 : Monokai (if 256-colors supported)
#
#color-scheme 3
# Prompt log/date configuration window on program start.
#
config-dialog false
# Color highlight active panel.
#
hl-header true
# Specify a custom CSS file in the HTML report.
#
#html-custom-css /path/file.css
# Specify a custom JS file in the HTML report.
#
#html-custom-js /path/file.js
# Set default HTML preferences.
#
# NOTE: A valid JSON object is required.
# DO NOT USE A MULTILINE JSON OBJECT.
# The parser will only parse the value next to `html-prefs` (single line)
# It allows the ability to customize each panel plot. See example below.
#
#html-prefs {"theme":"bright","perPage":5,"layout":"horizontal","showTables":true,"visitors":{"plot":{"chartType":"bar"}}}
# Set HTML report page title and header.
#
#html-report-title My Awesome Web Stats
# Format JSON output using tabs and newlines.
#
json-pretty-print false
# Turn off colored output. This is the default output on
# terminals that do not support colors.
# true : for no color output
# false : use color-scheme
#
no-color false
# Don't write column names in the terminal output. By default, it displays
# column names for each available metric in every panel.
#
no-column-names false
# Disable summary metrics on the CSV output.
#
no-csv-summary false
# Disable progress metrics.
#
no-progress false
# Disable scrolling through panels on TAB.
#
no-tab-scroll false
# Do not show the last updated field displayed in the HTML generated report.
#
#no-html-last-updated true
# Enable mouse support on main dashboard.
#
with-mouse false
# Maximum number of items to show per panel.
# Note: Only the CSV and JSON outputs allow a maximum greater than the
# default value of 366.
#
#max-items 366
# Custom colors for the terminal output
# Tailor GoAccess to suit your own tastes.
#
# Color Syntax:
# DEFINITION space/tab colorFG#:colorBG# [[attributes,] PANEL]
#
# FG# = foreground color number [-1...255] (-1 = default terminal color)
# BG# = background color number [-1...255] (-1 = default terminal color)
#
# Optionally:
#
# It is possible to apply color attributes, such as:
# bold,underline,normal,reverse,blink.
# Multiple attributes are comma separated
#
# If desired, it is possible to apply custom colors per panel, that is, a
# metric in the REQUESTS panel can be of color A, while the same metric in the
# BROWSERS panel can be of color B.
#
# The following is a 256 color scheme (hybrid palette)
#
#color COLOR_MTRC_HITS color110:color-1
#color COLOR_MTRC_VISITORS color173:color-1
#color COLOR_MTRC_DATA color221:color-1
#color COLOR_MTRC_BW color167:color-1
#color COLOR_MTRC_AVGTS color143:color-1
#color COLOR_MTRC_CUMTS color247:color-1
#color COLOR_MTRC_MAXTS color186:color-1
#color COLOR_MTRC_PROT color109:color-1
#color COLOR_MTRC_MTHD color139:color-1
#color COLOR_MTRC_HITS_PERC color186:color-1
#color COLOR_MTRC_HITS_PERC_MAX color139:color-1
#color COLOR_MTRC_HITS_PERC_MAX color139:color-1 VISITORS
#color COLOR_MTRC_HITS_PERC_MAX color139:color-1 OS
#color COLOR_MTRC_HITS_PERC_MAX color139:color-1 BROWSERS
#color COLOR_MTRC_HITS_PERC_MAX color139:color-1 VISIT_TIMES
#color COLOR_MTRC_VISITORS_PERC color186:color-1
#color COLOR_MTRC_VISITORS_PERC_MAX color139:color-1
#color COLOR_PANEL_COLS color243:color-1
#color COLOR_BARS color250:color-1
#color COLOR_ERROR color231:color167
#color COLOR_SELECTED color7:color167
#color COLOR_PANEL_ACTIVE color7:color237
#color COLOR_PANEL_HEADER color250:color235
#color COLOR_PANEL_DESC color242:color-1
#color COLOR_OVERALL_LBLS color243:color-1
#color COLOR_OVERALL_VALS color167:color-1
#color COLOR_OVERALL_PATH color186:color-1
#color COLOR_ACTIVE_LABEL color139:color235 bold underline
#color COLOR_BG color250:color-1
#color COLOR_DEFAULT color243:color-1
#color COLOR_PROGRESS color7:color110
######################################
# Server Options
######################################
# Specify IP address to bind server to.
#
#addr 0.0.0.0
# Run GoAccess as daemon (if --real-time-html enabled).
#
#daemonize false
# Ensure clients send the specified origin header upon the WebSocket
# handshake.
#
#origin http://example.org
# The port to which the connection is being attempted to connect.
# By default GoAccess' WebSocket server listens on port 7890
# See man page or http://gwsocket.io for details.
#
#port 7890
# Enable real-time HTML output.
#
#real-time-html true
# Path to TLS/SSL certificate.
# Note that ssl-cert and ssl-key need to be used to enable TLS/SSL.
#
#ssl-cert /path/ssl/domain.crt
# Path to TLS/SSL private key.
# Note that ssl-cert and ssl-key need to be used to enable TLS/SSL.
#
#ssl-key /path/ssl/domain.key
# URL to which the WebSocket server responds. This is the URL supplied
# to the WebSocket constructor on the client side.
#
# Optionally, it is possible to specify the WebSocket URI scheme, such as ws://
# or wss:// for unencrypted and encrypted connections.
# e.g., ws-url wss://goaccess.io
#
# If GoAccess is running behind a proxy, you could set the client side
# to connect to a different port by specifying the host followed by a
# colon and the port.
# e.g., ws-url goaccess.io:9999
#
# By default, it will attempt to connect to localhost. If GoAccess is
# running on a remote server, the host of the remote server should be
# specified here. Also, make sure it is a valid host and NOT an http
# address.
#
#ws-url goaccess.io
# Path to read named pipe (FIFO).
#
#fifo-in /tmp/wspipein.fifo
# Path to write named pipe (FIFO).
#
#fifo-in /tmp/wspipeout.fifo
######################################
# File Options
######################################
# Specify the path to the input log file. If set, it will take
# priority over -f from the command line.
#
#log-file /var/log/apache2/access.log
# Send all debug messages to the specified file.
#
#debug-file debug.log
# Specify a custom configuration file to use. If set, it will take
# priority over the global configuration file (if any).
#
#config-file <filename>
# Log invalid requests to the specified file.
#
#invalid-requests <filename>
# Do not load the global configuration file.
#
#no-global-config false
######################################
# Parse Options
######################################
# Enable a list of user-agents by host. For faster parsing, do not
# enable this flag.
#
agent-list false
# Enable IP resolver on HTML|JSON|CSV output.
#
with-output-resolver false
# Exclude an IPv4 or IPv6 from being counted.
# Ranges can be included as well using a dash in between
# the IPs (start-end).
#
#exclude-ip 127.0.0.1
#exclude-ip 192.168.0.1-192.168.0.100
#exclude-ip ::1
#exclude-ip 0:0:0:0:0:ffff:808:804-0:0:0:0:0:ffff:808:808
# Include HTTP request method if found. This will create a
# request key containing the request method + the actual request.
#
# <yes|no> [default: yes]
#
http-method yes
# Include HTTP request protocol if found. This will create a
# request key containing the request protocol + the actual request.
#
# <yes|no> [default: yes]
#
http-protocol yes
# Write output to stdout given one of the following files and the
# corresponding extension for the output format:
#
# /path/file.csv - Comma-separated values (CSV)
# /path/file.json - JSON (JavaScript Object Notation)
# /path/file.html - HTML
#
#output-format /path/file.html
# Ignore request's query string.
# i.e., www.google.com/page.htm?query => www.google.com/page.htm
#
# Note: Removing the query string can greatly decrease memory
# consumption, especially on timestamped requests.
#
no-query-string false
# Disable IP resolver on terminal output.
#
no-term-resolver false
# Treat non-standard status code 444 as 404.
#
444-as-404 false
# Add 4xx client errors to the unique visitors count.
#
4xx-to-unique-count false
# Include static files that contain a query string in the static files
# panel.
# e.g., /fonts/fontawesome-webfont.woff?v=4.0.3
#
all-static-files false
# Date specificity. Possible values: `date` (default), or `hr`.
#
#date-spec hr
# Decode double-encoded values.
#
double-decode false
# Enable parsing/displaying the given panel.
#
#enable-panel VISITORS
#enable-panel REQUESTS
#enable-panel REQUESTS_STATIC
#enable-panel NOT_FOUND
#enable-panel HOSTS
#enable-panel OS
#enable-panel BROWSERS
#enable-panel VISIT_TIMES
#enable-panel VIRTUAL_HOSTS
#enable-panel REFERRERS
#enable-panel REFERRING_SITES
#enable-panel KEYPHRASES
#enable-panel STATUS_CODES
#enable-panel REMOTE_USER
#enable-panel GEO_LOCATION
# Hour specificity. Possible values: `hr` (default), or `min` (tenth
# of a minute).
#
#hour-spec min
# Ignore crawlers from being counted.
# This will ignore robots listed under browsers.c
# Note that it will count them towards the total
# number of requests, but excluded from any of the panels.
#
ignore-crawlers false
# Parse and display crawlers only.
# This will ignore robots listed under browsers.c
# Note that it will count them towards the total
# number of requests, but excluded from any of the panels.
#
crawlers-only false
# Ignore parsing and displaying the given panel.
#
#ignore-panel VISITORS
#ignore-panel REQUESTS
#ignore-panel REQUESTS_STATIC
#ignore-panel NOT_FOUND
#ignore-panel HOSTS
#ignore-panel OS
#ignore-panel BROWSERS
#ignore-panel VISIT_TIMES
#ignore-panel VIRTUAL_HOSTS
ignore-panel REFERRERS
#ignore-panel REFERRING_SITES
ignore-panel KEYPHRASES
#ignore-panel STATUS_CODES
#ignore-panel REMOTE_USER
#ignore-panel GEO_LOCATION
# Ignore referers from being counted.
# This supports wild cards. For instance,
# '*' matches 0 or more characters (including spaces)
# '?' matches exactly one character
#
#ignore-referer *.domain.com
#ignore-referer ww?.domain.*
# Ignore parsing and displaying one or multiple status code(s)
#
#ignore-status 400
#ignore-status 502
# Number of lines from the access log to test against the provided
# log/date/time format. By default, the parser is set to test 10
# lines. If set to 0, the parser won't test any lines and will parse
# the whole access log.
#
#num-tests 10
# Parse log and exit without outputting data.
#
#process-and-exit false
# Display real OS names. e.g, Windows XP, Snow Leopard.
#
real-os true
# Sort panel on initial load.
# Sort options are separated by comma.
# Options are in the form: PANEL,METRIC,ORDER
#
# Available metrics:
# BY_HITS - Sort by hits
# BY_VISITORS - Sort by unique visitors
# BY_DATA - Sort by data
# BY_BW - Sort by bandwidth
# BY_AVGTS - Sort by average time served
# BY_CUMTS - Sort by cumulative time served
# BY_MAXTS - Sort by maximum time served
# BY_PROT - Sort by http protocol
# BY_MTHD - Sort by http method
# Available orders:
# ASC
# DESC
#
#sort-panel VISITORS,BY_DATA,ASC
#sort-panel REQUESTS,BY_HITS,ASC
#sort-panel REQUESTS_STATIC,BY_HITS,ASC
#sort-panel NOT_FOUND,BY_HITS,ASC
#sort-panel HOSTS,BY_HITS,ASC
#sort-panel OS,BY_HITS,ASC
#sort-panel BROWSERS,BY_HITS,ASC
#sort-panel VISIT_TIMES,BY_DATA,DESC
#sort-panel VIRTUAL_HOSTS,BY_HITS,ASC
#sort-panel REFERRERS,BY_HITS,ASC
#sort-panel REFERRING_SITES,BY_HITS,ASC
#sort-panel KEYPHRASES,BY_HITS,ASC
#sort-panel STATUS_CODES,BY_HITS,ASC
#sort-panel REMOTE_USER,BY_HITS,ASC
#sort-panel GEO_LOCATION,BY_HITS,ASC
# Consider the following extensions as static files
# The actual '.' is required and extensions are case sensitive
# For a full list, uncomment the less common static extensions below.
#
static-file .css
static-file .js
static-file .jpg
static-file .png
static-file .gif
static-file .ico
static-file .jpeg
static-file .pdf
static-file .txt
static-file .csv
static-file .zip
static-file .mp3
static-file .mp4
static-file .mpeg
static-file .mpg
static-file .exe
static-file .swf
static-file .woff
static-file .woff2
static-file .xls
static-file .xlsx
static-file .doc
static-file .docx
static-file .ppt
static-file .pptx
static-file .iso
static-file .gz
static-file .rar
static-file .svg
static-file .bmp
static-file .tar
static-file .tgz
static-file .tiff
static-file .tif
static-file .ttf
static-file .flv
#static-file .less
#static-file .ac3
#static-file .avi
#static-file .bz2
#static-file .class
#static-file .cue
#static-file .dae
#static-file .dat
#static-file .dts
#static-file .ejs
#static-file .eot
#static-file .eps
#static-file .img
#static-file .jar
#static-file .map
#static-file .mid
#static-file .midi
#static-file .mkv
#static-file .odp
#static-file .ods
#static-file .odt
#static-file .ogg
#static-file .otf
#static-file .pict
#static-file .pls
#static-file .ps
#static-file .qt
#static-file .rm
#static-file .svgz
#static-file .wav
#static-file .webp
######################################
# GeoIP Options
# Only if configured with --enable-geoip
######################################
# Standard GeoIP database for less memory usage.
#
#std-geoip false
# Specify path to GeoIP database file. i.e., GeoLiteCity.dat
# .dat file needs to be downloaded from maxmind.com.
#
# For IPv4 City database:
# wget -N http://geolite.maxmind.com/download/geoip/database/GeoLiteCity.dat.gz
# gunzip GeoLiteCity.dat.gz
#
# For IPv6 City database:
# wget -N http://geolite.maxmind.com/download/geoip/database/GeoLiteCityv6-beta/GeoLiteCityv6.dat.gz
# gunzip GeoLiteCityv6.dat.gz
#
# For IPv6 Country database:
# wget -N http://geolite.maxmind.com/download/geoip/database/GeoIPv6.dat.gz
# gunzip GeoIPv6.dat.gz
#
# Note: `geoip-city-data` is an alias of `geoip-database`
#
#geoip-database /usr/local/share/GeoIP/GeoLiteCity.dat
######################################
# Tokyo Cabinet Options
# Only if configured with --enable-tcb=btree
######################################
# GoAccess has the ability to process logs incrementally through the on-disk
# B+Tree database.
#
# It works in the following way:
# - A data set must be persisted first with --keep-db-files, then the same data
# set can be loaded with --load-from-disk.
# - If new data is passed (piped or through a log file), it will append it to
# the original data set.
# - To preserve the data at all times, --keep-db-files must be used.
# - If --load-from-disk is used without --keep-db-files, database files will be
# deleted upon closing the program.
# On-disk B+ Tree
# Persist parsed data into disk. This should be set to
# the first dataset prior to use `load-from-disk`.
# Setting it to false will delete all database files
# when exiting the program.
#keep-db-files true
# On-disk B+ Tree
# Load previously stored data from disk.
# Database files need to exist. See `keep-db-files`.
#load-from-disk false
# On-disk B+ Tree
# Path where the on-disk database files are stored.
# The default value is the /tmp/ directory
# Note the trailing forward-slash.
#
#db-path /tmp/
# On-disk B+ Tree
# Set the size in bytes of the extra mapped memory.
# The default value is 0.
#
#xmmap 0
# On-disk B+ Tree
# Max number of leaf nodes to be cached.
# Specifies the maximum number of leaf nodes to be cached.
# If it is not more than 0, the default value is specified.
# The default value is 1024.
#
#cache-lcnum 1024
# On-disk B+ Tree
# Specifies the maximum number of non-leaf nodes to be cached.
# If it is not more than 0, the default value is specified.
# The default value is 512.
#
#cache-ncnum 512
# On-disk B+ Tree
# Specifies the number of members in each leaf page.
# If it is not more than 0, the default value is specified.
# The default value is 128.
#
#tune-lmemb 128
# On-disk B+ Tree
# Specifies the number of members in each non-leaf page.
# If it is not more than 0, the default value is specified.
# The default value is 256.
#
#tune-nmemb 256
# On-disk B+ Tree
# Specifies the number of elements of the bucket array.
# If it is not more than 0, the default value is specified.
# The default value is 32749.
# Suggested size of the bucket array is about from 1 to 4
# times of the number of all pages to be stored.
#
#tune-bnum 32749
# On-disk B+ Tree
# Specifies that each page is compressed with ZLIB|BZ2 encoding.
# Disabled by default.
#
#compression zlib
4、生成报表命令
goaccess -f /var/log/nginx/access.log -a > report.html
你也可以自己写脚本每天按时执行,这样就不需要自己手动了
贴一张生成的报表图