#!/usr/bin/perl

# tag-stats - tag classification statistics
#
# This script displays statistics and data for tag classification based on
# Severity fields and their mapping to a E/W/I code.
#
# The verbose options (-v, -vv, -vvv) can be used to display a detailed list
# of which tags are assigned to each category.

use v5.20;
use warnings;
use utf8;
use autodie qw(opendir closedir);

use Const::Fast;
use Cwd qw(realpath);
use File::Basename qw(dirname);
use Unicode::UTF8 qw(encode_utf8);

# neither Path::This nor lib::relative are in Debian
use constant THISFILE => realpath __FILE__;
use constant THISDIR => dirname realpath __FILE__;

# use Lintian modules that belong to this program
use lib THISDIR . '/../lib';

use Lintian::Deb822::Parser qw(read_dpkg_control);
use Lintian::Tag;

const my $SPACE => q{ };
const my $INDENT => $SPACE x 4;
const my $EXTRA_VERBOSE => 3;

$ENV{LINTIAN_BASE} = realpath(THISDIR . '/..')
  // die encode_utf8('Cannot resolve LINTIAN_BASE');

my @severities = reverse qw(pedantic info warning error);
my @types = qw(E W I P);

my %stats;
my $num_tags = 0;
my $num_ok = 0;
my $percent = 0;

my $verbose = $ARGV[0] ? ($ARGV[0] =~ s/v/v/g) : 0;

opendir(my $checkdir, "$ENV{LINTIAN_BASE}/checks");
for my $check (readdir($checkdir)) {
    next unless $check =~ /\.desc$/;

    my @tags = read_dpkg_control("$ENV{LINTIAN_BASE}/checks/$check");
    my $desc = $tags[0];

    shift(@tags);

    foreach my $tag (@tags) {
        my $name = $tag->{Tag};
        my $severity = $tag->{Severity};

        $severity = 'unclassified' if not $severity;

        my $info
          = Lintian::Tag->new($tag, $desc->{'Check-Script'},$desc->{'Type'});
        my $code = $info->code;

        push(@{$stats{severity}{$severity}}, $name);
        push(@{$stats{type}{severity}{$code}{$severity}}, $name);

        $num_tags++;
    }
}

closedir($checkdir);

print encode_utf8("Severity\n");
foreach my $s (@severities) {
    my $tags = $stats{severity}{$s} // [];
    print encode_utf8("  $s: " . @{$tags} . "\n");
    print encode_utf8($INDENT . join("\n    ", sort @{$tags}) . "\n")
      if $verbose >= $EXTRA_VERBOSE;
}

foreach my $t (@types) {
    print encode_utf8("\nType $t Severity\n");
    foreach my $s (@severities) {
        if (my $tags = $stats{type}{severity}{$t}{$s}) {
            print encode_utf8("  $s: " . @{$tags} . "\n");
            print encode_utf8($INDENT . join("\n    ", sort @{$tags}) . "\n")
              if $verbose >= 2;
        }
    }
}

print encode_utf8("\nCollections\n");
foreach my $s (@severities) {
    if (my $needs = $stats{needs}{$s}) {
        my $size = scalar keys %{$needs};
        my @list = sort keys %{$needs};
        print encode_utf8("  $s: $size\n");
        print encode_utf8($INDENT . join("\n    ", @list) . "\n")
          if $verbose >= 2;
    }
}

if ($verbose >= 1 and exists $stats{severity}{unclassified}) {
    print encode_utf8("\nUnclassified tags\n");
    print encode_utf8(
        $SPACE x 2 . join("\n  ", @{$stats{severity}{unclassified}}) . "\n");
}

# Local Variables:
# indent-tabs-mode: nil
# cperl-indent-level: 4
# End:
# vim: syntax=perl sw=4 sts=4 sr et
