#! /usr/bin/perl -w

use strict;
use HTML::Parser;
use XML::Parser::Expat;

$| = 1;

my %rfc_pub;
my %rfc_now;
my $minrfc = 1000;
my $maxrfc = -1;
my %status;
my %notissued;
my %fyi;
my $parser;

# 1. parse the rfc status from the xml index file

my $c_rfc;
my $c_pub;
my $c_now;
my $in_rfc = 0;
my $in_ignore = 0;
my $in_notissued = 0;
my $is_old;
my @e_stat;
my $p_element;

$parser = new XML::Parser::Expat;
$parser->setHandlers('Start' => \&e_start,
                     'End'   => \&e_end);
$parser->parsefile('extra/rfc-index.xml');

sub e_start {
    my ($p, $el, %attrs) = @_;
    my $parent = $e_stat[$#e_stat];
    push @e_stat, $el;
    my $path = join '/', @e_stat;
    if ($el eq 'rfc-entry') {
        # new rfc entry starting
        if(defined($c_rfc) || defined($c_pub) || defined($c_now)) {
            die "Re-starting rfc-entry?";
        }
        $c_rfc = '';
        $c_pub = '';
        $c_now = '';
        $in_rfc = 1;
        $is_old = 0;
    } elsif ($el eq 'rfc-not-issued-entry') {
        $in_notissued = 1;
        $c_rfc = '';
    } elsif ($path eq 'rfc-index/fyi-entry/is-also/doc-id') {
        $c_rfc = '';
        die "Restarting doc-id while in doc_id $c_rfc"
            unless $c_rfc eq '';
        $p->setHandlers('Char' => \&e_docid);
    } elsif ($in_rfc) {
        if($el eq 'doc-id' && $parent eq 'rfc-entry') {
            die "Restarting doc-id while in doc_id $c_rfc"
                unless $c_rfc eq '';
            $p->setHandlers('Char' => \&e_docid);
        } elsif($el eq 'current-status') {
            $p->setHandlers('Char' => \&e_nstatus);
        } elsif($el eq 'publication-status') {
            $p->setHandlers('Char' => \&e_pstatus);
        } elsif($el eq 'obsoleted-by') {
            # mark the current rfc as being obsoleted
            $is_old = 1;
        }
    } elsif ($in_notissued) {
        if($el eq 'doc-id' && $parent eq 'rfc-not-issued-entry') {
            die "Restarting doc-id while in doc_id $c_rfc"
                unless $c_rfc eq '';
            $p->setHandlers('Char' => \&e_docid);
        }
    }
}

sub e_end {
    my ($p, $el) = @_;
    $p->setHandlers('Char' => 0);
    my $path = join '/', @e_stat;
    pop @e_stat;
    if($el eq 'rfc-entry') {
        die "Missing rfc id or pub or now"
            unless ($c_rfc && $c_pub && $c_now);
        die "Invalid RFC id $c_rfc"
            unless ($c_rfc =~ /^RFC\d+$/);
        $c_rfc = int(substr($c_rfc, 3));
        #print "Got $c_rfc: $c_pub -> $c_now\n";
        $maxrfc = $c_rfc if $c_rfc > $maxrfc;
        $minrfc = $c_rfc if $c_rfc < $minrfc;
        $status{$c_rfc}{pub} = $c_pub;
        $status{$c_rfc}{now} = $c_now;
        $status{$c_rfc}{old} = $is_old;
        $c_rfc = undef;
        $c_pub = undef;
        $c_now = undef;
        $in_rfc = 0;
    } elsif ($el eq 'rfc-not-issued-entry') {
        die "Missing not issued rfc id"
            unless ($c_rfc);
        $c_rfc = int(substr($c_rfc, 3));
        $notissued{$c_rfc} = 1;
        $c_rfc = undef;
    } elsif($path eq 'rfc-index/fyi-entry/is-also/doc-id') {
        die "Missing fyi rfc id"
            unless ($c_rfc);
        $c_rfc = int(substr($c_rfc, 3));
        $fyi{$c_rfc} = 1;
        $c_rfc = undef;
    }
}

sub e_docid {
    my ($p, $text) = @_;
    $c_rfc .= $text;
}

sub e_nstatus {
    my ($p, $text) = @_;
    $c_now .= $text;
}

sub e_pstatus {
    my ($p, $text) = @_;
    $c_pub .= $text;
}

# 3. drafts in the queue

my $in_tag;

$parser = HTML::Parser->new(api_version => 3,
                            handlers => {
                                start => [\&qu_start, 'tagname'],
                                end   => [\&qu_end, 'tagname'],
                                text  => [\&qu_text, 'dtext'],
                            });
$parser->unbroken_text(1);

my %drafts;

sub qu_text
{
    my $text = shift;
    if($in_tag eq 'a' && $text =~ /^draft-/) {
        $drafts{$text}++;
    }
}

sub qu_start
{
    my $tag = shift;
    $in_tag = $tag;
}

sub qu_end
{
    my $tag = shift;
    if($in_tag eq 'a' && $tag eq 'a') {
        # we correctly ended an 'a' element
        $in_tag = '';
    }
}

my $url = 'extra/queue.html';

print "parsing $url\n";
$in_tag = '';
$parser->parse_file($url) or die "$url: $!";

# 4. show results

print "RFC range is $minrfc - $maxrfc\n";

my $out = 'rfc-status.txt';
open OUT, "> $out" or die "$out: $!";
my %name = (
    'UNKNOWN'                                => 'unclassified',
    'INTERNET STANDARD'                      => 'standard',
    'EXPERIMENTAL'                           => 'experimental',
    'HISTORIC'                               => 'historic',
    'INFORMATIONAL'                          => 'informational',
    'PROPOSED STANDARD'                      => 'proposed-standard',
    'DRAFT STANDARD'                         => 'draft-standard',
    'BEST CURRENT PRACTICE'                  => 'best-current-practice',
    );

my $len1 = 0;
for my $p (values %name) {
    $len1 = (4+length $p) if (4+length($p)) > $len1;
}

for (my $rfc = $minrfc; $rfc <= $maxrfc; $rfc++) {
    if(defined($notissued{$rfc})) {
        next;
    }
    my $have = -f "../rfc$rfc.txt";
    my $now = $status{$rfc}{now};
    if(!defined($now)) {
        if($have) {
            die "Uncategorized existing RFC $rfc";
        } else {
            next;
        }
    }
    die "Undefined category '$now' for rfc $rfc"
        unless defined($name{$now});

    my $category = $name{$now};
    if(!defined($category)) {
        $category = 'unclassified';
    }
    if($status{$rfc}{old}) {
        $category = "old/$category";
    }
    if($fyi{$rfc}) {
        $category = "for-your-information";
    }
    print OUT sprintf("%04d %-*s\n", $rfc, $len1, $category);
}
close OUT or die "$out: $!";

$out = 'rfc-queue.txt';
open OUT, "> $out" or die "$out: $!";
for my $draft (sort keys %drafts) {
    my $have = (-f "extra/$draft") ? "" : "[missing] ";
    print OUT $have, $draft, "\n";
}
close OUT or die "$out: $!";


print "\ndone.\n";
