Imported Upstream version 0.96

2018-02-25 14:46:41 +01:00
commit a6db65a701
16 changed files with 2221 additions and 0 deletions
--- a/lib/Log/GELF/Util.pm
+++ b/lib/Log/GELF/Util.pm
@@ -0,0 +1,698 @@
+package Log::GELF::Util;
+use 5.010;
+use strict;
+use warnings;
+
+require Exporter;
+use Readonly;
+
+our (
+    $VERSION,
+    @ISA,
+    @EXPORT_OK, 
+    %EXPORT_TAGS,
+    $GELF_MSG_MAGIC,
+    $ZLIB_MAGIC,
+    $GZIP_MAGIC,
+    %LEVEL_NAME_TO_NUMBER,
+    %LEVEL_NUMBER_TO_NAME,
+    %GELF_MESSAGE_FIELDS,
+    $LEVEL_NAME_REGEX,
+);
+
+$VERSION = "0.96";
+
+use Params::Validate qw(
+    validate
+    validate_pos
+    validate_with
+    SCALAR
+    ARRAYREF
+    HASHREF
+);
+use Time::HiRes qw(time);
+use Sys::Syslog qw(:macros);
+use Sys::Hostname;
+use JSON::MaybeXS qw(encode_json decode_json);
+use IO::Compress::Gzip qw(gzip $GzipError);
+use IO::Uncompress::Gunzip qw(gunzip $GunzipError);
+use IO::Compress::Deflate qw(deflate $DeflateError);
+use IO::Uncompress::Inflate qw(inflate $InflateError);
+use Math::Random::MT qw(irand);
+
+Readonly $GELF_MSG_MAGIC => pack('C*', 0x1e, 0x0f);
+Readonly $ZLIB_MAGIC     => pack('C*', 0x78, 0x9c);
+Readonly $GZIP_MAGIC     => pack('C*', 0x1f, 0x8b);
+
+Readonly %LEVEL_NAME_TO_NUMBER => (
+    emerg  => LOG_EMERG,
+    alert  => LOG_ALERT,
+    crit   => LOG_CRIT,
+    err    => LOG_ERR,
+    warn   => LOG_WARNING,
+    notice => LOG_NOTICE,
+    info   => LOG_INFO,
+    debug  => LOG_DEBUG,
+);
+
+Readonly %LEVEL_NUMBER_TO_NAME => (
+    &LOG_EMERG   =>  'emerg',
+    &LOG_ALERT   =>  'alert',
+    &LOG_CRIT    =>  'crit',
+    &LOG_ERR     =>  'err',
+    &LOG_WARNING =>  'warn',
+    &LOG_NOTICE  =>  'notice',
+    &LOG_INFO    =>  'info',
+    &LOG_DEBUG   =>  'debug',
+);
+
+Readonly %GELF_MESSAGE_FIELDS => (
+    version        => 1,
+    host           => 1,
+    short_message  => 1,
+    full_message   => 1,
+    timestamp      => 1,
+    level          => 1,
+    facility       => 0,
+    line           => 0,
+    file           => 0,
+);
+
+my $ln = '^(' .
+    (join '|', (keys %LEVEL_NAME_TO_NUMBER)) .
+    ')\w*$';
+$LEVEL_NAME_REGEX = qr/$ln/i;
+undef $ln;
+
+@ISA       = qw(Exporter);
+@EXPORT_OK = qw( 
+    $GELF_MSG_MAGIC
+    $ZLIB_MAGIC
+    $GZIP_MAGIC
+    %LEVEL_NAME_TO_NUMBER
+    %LEVEL_NUMBER_TO_NAME
+    %GELF_MESSAGE_FIELDS
+    validate_message
+    encode
+    decode
+    compress
+    uncompress
+    enchunk
+    dechunk
+    is_chunked
+    decode_chunk
+    parse_level
+    parse_size
+);
+
+push @{ $EXPORT_TAGS{all} }, @EXPORT_OK ;
+Exporter::export_ok_tags('all');
+
+sub validate_message {
+    my %p = validate_with(
+        params      => \@_,
+        allow_extra => 1,
+        spec        => {
+            version       => {
+                default => '1.1',
+                callbacks => {
+                    version_check => sub {
+                        my $version = shift;
+                        $version =~ /^1\.1$/
+                            or die 'version must be 1.1, supplied $version';
+                    },
+                },
+            },
+            host          => { type => SCALAR, default => hostname() },
+            short_message => { type => SCALAR },
+            full_message  => { type => SCALAR, optional => 1 },
+            timestamp     => {
+                type => SCALAR,
+                default   => time(),
+                callbacks => {
+                    ts_format => sub {
+                        my $ts = shift;
+                        $ts =~ /^\d+(?:\.\d+)*$/
+                            or die 'bad timestamp';
+                    },
+                },
+            },
+            level         => { type => SCALAR, default  => 1 },
+            facility      => {
+                type      => SCALAR,
+                optional  => 1,
+            },
+            line          => {
+                type      => SCALAR,
+                optional  => 1,
+                callbacks => {
+                    facility_check => sub {
+                        my $line = shift;
+                        $line =~ /^\d+$/
+                            or die 'line must be a number';
+                    },
+                },
+            },
+            file          => {
+                type      => SCALAR,
+                optional  => 1,
+            },
+        },
+    );
+
+    $p{level} = parse_level($p{level});
+
+    foreach my $key ( keys %p ) {
+
+        if ( ! $key =~ /^[\w\.\-]+$/ ) {
+            die "invalid field name '$key'";
+        }
+
+        if ( $key eq '_id' ||
+             ! ( exists $GELF_MESSAGE_FIELDS{$key} || $key =~ /^_/ )
+        ) {
+            die "invalid field '$key'";
+        }
+
+        if ( exists $GELF_MESSAGE_FIELDS{$key}
+             && $GELF_MESSAGE_FIELDS{$key} == 0 ) {
+            # field is deprecated
+            warn "$key is deprecated, send as additional field instead";
+        }
+    }
+
+    return \%p;
+}
+
+sub encode {
+    my @p = validate_pos(
+        @_,
+        { type => HASHREF },
+    );
+
+    return encode_json(validate_message(@p));
+}
+
+sub decode {
+    my @p = validate_pos(
+        @_,
+        { type => SCALAR },
+    );
+
+    my $msg = shift @p;
+
+    return validate_message(decode_json($msg));
+}
+
+sub compress {
+    my @p = validate_pos(
+        @_,
+        { type  => SCALAR },
+        {
+            type    => SCALAR,
+            default => 'gzip',
+            callbacks => {
+                compress_type => sub {
+                    my $level = shift;
+                    $level =~ /^(?:zlib|gzip)$/
+                        or die 'compression type must be gzip (default) or zlib';
+                },
+            },
+        },
+    );
+
+    my ($message, $type) = @p;
+    
+    my $method = \&gzip;
+    my $error  = \$GzipError;
+    if ( $type eq 'zlib' ) {
+        $method = \&deflate;
+        $error  = \$DeflateError;
+    }
+
+    my $msgz;
+    &{$method}(\$message => \$msgz)
+      or die "compress failed: ${$error}";
+
+    return $msgz;
+}
+
+sub uncompress {
+    my @p = validate_pos(
+        @_,
+        { type => SCALAR }
+    );
+    
+    my $message = shift @p;
+    
+    my $msg_magic = substr $message, 0, 2;
+    
+    my $method;
+    my $error;
+    if ($ZLIB_MAGIC eq $msg_magic) {
+        $method = \&inflate;
+        $error  = \$InflateError;
+    }
+    elsif ($GZIP_MAGIC eq $msg_magic) {
+        $method = \&gunzip;
+        $error  = \$GunzipError;
+    }
+    else {
+        #assume plain message
+        return $message;
+    }
+
+    my $msg;
+    &{$method}(\$message => \$msg)
+      or die "uncompress failed: ${$error}";
+
+    return $msg;
+}
+
+sub enchunk {
+    my @p = validate_pos(
+        @_,
+        { type => SCALAR },
+        { type => SCALAR, default => 'wan' },
+        { type => SCALAR, default => pack('L*', irand(),irand()) },
+    );
+
+    my ($message, $size, $message_id) = @p;
+
+    if ( length $message_id != 8 ) {
+        die "message id must be 8 bytes";
+    }
+
+    $size = parse_size($size);
+
+    if ( $size > 0
+         && length $message > $size
+    ) {
+        my @chunks;
+        while (length $message) {
+            push @chunks, substr $message, 0, $size, '';
+        }
+
+        my $n_chunks = scalar @chunks;
+        die 'Message too big' if $n_chunks > 128;
+
+        my $sequence_count = pack('C*', $n_chunks);
+
+        my @chunks_w_header;
+        my $sequence_number = 0;
+        foreach my $chunk (@chunks) {
+           push @chunks_w_header,
+              $GELF_MSG_MAGIC
+              . $message_id
+              . pack('C*',$sequence_number++)
+              . $sequence_count
+              . $chunk;
+        }
+
+        return @chunks_w_header;
+    }
+    else {
+         return ($message);
+    }
+}
+
+sub dechunk {
+    my @p = validate_pos(
+        @_,
+        { type => ARRAYREF },
+        { type => HASHREF },
+    );
+
+    my ($accumulator, $chunk) = @_;
+
+    if ( ! exists $chunk->{id}
+           && exists $chunk->{sequence_number}
+           && exists $chunk->{sequence_count}
+           && exists $chunk->{data}
+    ) {
+        die 'malformed chunk';
+    }
+
+    if ($chunk->{sequence_number} > $chunk->{sequence_count} ) {
+        die 'chunk sequence number > count';
+    }
+
+    $accumulator->[$chunk->{sequence_number}] = $chunk->{data};
+
+    if ( (scalar grep {defined} @{$accumulator}) == $chunk->{sequence_count} ) {
+        return join '', @{$accumulator};
+    }
+    else {
+        return;
+    }
+}
+
+sub is_chunked {
+    my @p = validate_pos(
+        @_,
+        { type => SCALAR },
+    );
+    
+    my $chunk = shift @p;
+    
+    return $GELF_MSG_MAGIC eq substr $chunk, 0, 2;
+}
+
+sub decode_chunk {
+    my @p = validate_pos(
+        @_,
+        { type => SCALAR },
+    );
+    
+    my $encoded_chunk = shift;
+
+    if ( is_chunked($encoded_chunk) ) {
+        
+        my $id      = substr $encoded_chunk,  2, 8;
+        my $seq_no  = unpack('C',  substr $encoded_chunk, 10, 1);
+        my $seq_cnt = unpack('C',  substr $encoded_chunk, 11, 1);
+        my $data    = substr $encoded_chunk, 12;
+        
+        return {
+            id              => $id,
+            sequence_number => $seq_no,
+            sequence_count  => $seq_cnt,
+            data            => $data,
+        };
+    }
+    else {
+        die "message not chunked";
+    }
+}
+
+sub parse_level {
+    my @p = validate_pos(
+        @_,
+        { type => SCALAR }
+    );
+    
+    my $level = shift @p;
+
+    if ( $level =~ $LEVEL_NAME_REGEX ) {
+        return $LEVEL_NAME_TO_NUMBER{$1};
+    }
+    elsif ( $level =~ /^(?:0|1|2|3|4|5|6|7)$/ ) {
+        return $level;
+    }
+    else {
+        die "level must be between 0 and 7 or a valid log level string";
+    }
+}
+
+sub parse_size {
+    my @p = validate_pos(
+        @_,
+        {
+            type      => SCALAR,
+            callbacks => {
+                compress_type => sub {
+                    my $size = shift;
+                    $size =~ /^(?:lan|wan|\d+)$/i
+                        or die 'chunk size must be "lan", "wan", a positve integer, or 0 (no chunking)';
+                },
+            },
+        },
+    );
+
+    my $size = lc(shift @p);
+
+    # These default values below were determined by
+    # examining the code for Graylog's implementation. See
+    #  https://github.com/Graylog2/gelf-rb/blob/master/lib/gelf/notifier.rb#L62
+    # I believe these are determined by likely MTU defaults
+    #  and possible heasers like so...
+    # WAN: 1500 - 8 b (UDP header) - 60 b (max IP header) - 12 b (chunking header) = 1420 b
+    # LAN: 8192 - 8 b (UDP header) - 20 b (min IP header) - 12 b (chunking header) = 8152 b
+    # Note that based on my calculation the Graylog LAN
+    #  default may be 2 bytes too big (8154)
+    # See http://stackoverflow.com/questions/14993000/the-most-reliable-and-efficient-udp-packet-size
+    # For some discussion. I don't think this is an exact science!
+
+    if ( $size eq 'wan' ) {
+        $size = 1420;
+    }
+    elsif ( $size eq 'lan' ) {
+        $size = 8152;
+    }
+
+    return $size;
+}
+
+1;
+__END__
+
+=encoding utf-8
+
+=head1 NAME
+
+Log::GELF::Util - Utility functions for Graylog's GELF format.
+
+=head1 SYNOPSIS
+
+    use Log::GELF::Util qw( encode );
+
+    my $msg = encode( { short_message => 'message', } );
+
+
+    use Log::GELF::Util qw( :all );
+
+    sub process_chunks {
+
+        my @accumulator;
+        my $msg;
+
+        do {
+            $msg = dechunk(
+                \@accumulator,
+                decode_chunk(shift())
+            );
+        } until ($msg);
+
+        return uncompress($msg);
+    };
+
+    my $hr = validate_message( short_message => 'message' );
+
+=head1 DESCRIPTION
+
+Log::GELF::Util is a collection of functions and data structures useful
+when working with Graylog's GELF Format version 1.1. It strives to support
+all of the features and options as described in the L<GELF
+specification|http://docs.graylog.org/en/latest/pages/gelf.html>.
+
+=head1 FUNCTIONS
+
+=head2 validate_message( short_message => $ )
+
+Returns a HASHREF representing the validated message with any defaulted
+values added to the data structure.
+
+Takes the following message parameters as per the GELF message
+specification:
+
+=over
+
+=item short_message
+
+Mandatory string, a short descriptive message
+
+=item version
+
+String, must be '1.1' which is the default.
+
+=item host
+
+String, defaults to hostname() from L<Sys::Hostname>.
+
+=item timestamp
+
+Timestamp, defaults to time() from L<Time::HiRes>.
+
+=item level
+
+Integer, equal to the standard syslog levels, default is 1 (ALERT).
+
+=item facility
+
+Deprecated, a warning will be issued.
+
+=item line
+
+Deprecated, a warning will be issued.
+
+=item file
+
+Deprecated, a warning will be issued.
+
+=item _[additional_field]
+
+Parameters prefixed with an underscore (_) will be treated as an additional
+field. Allowed characters in field names are any word character (letter,
+number, underscore), dashes and dots. As per the specification '_id' is
+disallowed.
+
+=back
+
+=head2 encode( \% )
+
+Accepts a HASHREF representing a GELF message. The message will be
+validated with L</validate_message>.
+
+Returns a JSON encoded string representing the message.
+
+=head2 decode( $ )
+
+Accepts a JSON encoded string representing the message. This will be
+converted to a hashref and validated with L</validate_message>.
+
+Returns a HASHREF representing the validated message with any defaulted
+values added to the data structure.
+
+=head2 compress( $ [, $] )
+
+Accepts a string and compresses it. The second parameter is optional and
+can take the value C<zlib> or C<gzip>, defaulting to C<gzip>.
+
+Returns a compressed string.
+
+=head2 uncompress( $ )
+
+Accepts a string and uncompresses it. The compression method (C<gzip> or
+C<zlib>) is determined automatically. Uncompressed strings are passed
+through unaltered.
+
+Returns an uncompressed string.
+
+=head2 enchunk( $ [, $, $] )
+
+Accepts an encoded message (JSON string) and chunks it according to the
+GELF chunking protocol.
+
+The optional second parameter is the maximum size of the chunks to produce,
+this must be a positive integer or the special strings C<lan> or C<wan>,
+see L</parse_size>. Defaults to C<wan>. A zero chunk size means no chunking
+will be applied.
+
+The optional third parameter is the message id used to identify associated
+chunks. This must be 8 bytes. It defaults to 8 bytes of randomness generated
+by L<Math::Random::MT>.
+
+If the message size is greater than the maximum size then an array of
+chunks is retuned, otherwise the message is retuned unaltered as the first
+element of an array.
+
+=head2 dechunk( \@, \% )
+
+This facilitates reassembling a GELF message from a stream of chunks.
+
+It accepts an ARRAYREF for accumulating the chunks and a HASHREF
+representing a decoded message chunk as produced by L</decode_chunk>.
+
+It returns undef if the accumulator is not complete, i.e. all chunks have
+not yet been passed it.
+
+Once the accumulator is complete it returns the de-chunked message in the
+form of a string. Note that this message may still be compressed.
+
+Here is an example usage:
+
+    sub process_chunks {
+
+        my @accumulator;
+        my $msg;
+
+        do {
+            $msg = dechunk(
+                \@accumulator,
+                decode_chunk(shift())
+            );
+        } until ($msg);
+
+        return uncompress($msg);
+    };
+
+=head2 is_chunked( $ )
+
+Accepts a string and returns a true value if it is a GELF message chunk.
+
+=head2 decode_chunk( $ )
+
+Accepts a GELF message chunk and returns an ARRAYREF representing the
+unpacked chunk. Dies if the input is not a GELF chunk.
+
+The message consists of the following keys:
+
+ id
+ sequence_number
+ sequence_count
+ data
+
+=head2 parse_level( $ )
+
+Accepts a C<syslog> style level in the form of a number (1-7) or a string
+being one of C<emerg>, C<alert>, C<crit>, C<err>, C<warn>, C<notice>,
+C<info>, or C<debug>. Dies upon invalid input.
+
+The string forms may also be elongated and will still be accepted. For
+example C<err> and C<error> are equivalent.
+
+The associated syslog level is returned in numeric form.
+
+=head2 parse_size( $ )
+
+Accepts an integer specifying the chunk size or the special string values
+C<lan> or C<wan> corresponding to 8154 or 1420 respectively. An explanation
+of these values is in the code.
+
+Returns the passed size or the value corresponding to the C<lan> or C<wan>.
+
+L</parse_size> dies upon invalid input.
+
+=head1 CONSTANTS
+
+All Log::Gelf::Util constants are Readonly perl structures. You must use
+sigils when referencing them. They can be imported individually and are
+included when importing ':all';
+
+=head2 $GELF_MSG_MAGIC
+
+The magic number used to identify a GELF message chunk.
+
+=head2 $ZLIB_MAGIC
+
+The magic number used to identify a Zlib deflated message.
+
+=head2 $GZIP_MAGIC
+
+The magic number used to identify a gzipped message.
+
+=head2 %LEVEL_NAME_TO_NUMBER
+
+A HASH mapping the level names to numbers.
+
+=head2 %LEVEL_NUMBER_TO_NAME
+
+A HASH mapping the level numbers to names.
+
+=head2 %GELF_MESSAGE_FIELDS
+
+A HASH where each key is a valid core GELF message field name. Deprecated
+fields are associated with a false value.
+
+=head1 LICENSE
+
+Copyright (C) Strategic Data.
+
+This library is free software; you can redistribute it and/or modify it
+under the same terms as Perl itself.
+
+=head1 AUTHOR
+
+Adam Clarke E<lt>adamc@strategicdata.com.auE<gt>
+
+=cut