package WWW::LinkChecker::Internal::App::Command::check;
$WWW::LinkChecker::Internal::App::Command::check::VERSION = '0.8.1';
use strict;
use warnings;
use 5.014;

use WWW::LinkChecker::Internal::App -command;

use List::Util 1.34 qw/ any none /;
use WWW::Mechanize ();

sub description
{
    return "check a site for broken internal links";
}

sub abstract
{
    return shift->description();
}

sub opt_spec
{
    return (
        [ "base=s",                "Base URL", ],
        [ 'before-insert-skip=s@', "before-insert-skip regexes", ],
        [ 'pre-skip=s@',           "pre-skip regexes", ],
        [ 'start=s',               "alternative start URL", ],
        [ 'state-filename=s' => 'filename to keep the state', ],
    );
}

sub execute
{
    my ( $self, $opt, $args ) = @_;
    my $base_url = $opt->{base};
    if ( !defined($base_url) )
    {
        die "--base must be specified";
    }

    my @pre_skip_regexes = map { qr/$_/ } @{ $opt->{pre_skip} };
    my @before_insert_skips_regexes =
        map { qr/$_/ } @{ $opt->{before_insert_skip} };

    my $alternative_start_url = $opt->{start};
    my $state_fn              = $opt->{state_filename};
    my $start_url             = ( $alternative_start_url || $base_url );

    my $state =
        +( $state_fn && ( -e $state_fn ) )
        ? decode_json( path($state_fn)->slurp_utf8 )
        : {
        stack            => [ { url => $start_url, from => undef(), } ],
        encountered_urls => { $start_url => 1, },
        };
STACK:

    while ( my $url_rec = $state->{stack}->[-1] )
    {
        my $url = $url_rec->{'url'};
        print "Checking SRC URL '$url'\n";

        my $mech = WWW::Mechanize->new();
        eval { $mech->get($url); };

        if ($@)
        {
            if ($state_fn)
            {
                path($state_fn)->spew_utf8( encode_json($state) );
            }
            my $from = ( $url_rec->{from} // "START" );
            die "SRC URL $from points to '$url'.";
        }
        pop( @{ $state->{stack} } );

        if ( any { $url =~ $_ } @pre_skip_regexes )
        {
            next STACK;
        }

        foreach my $link ( $mech->links() )
        {
            my $dest_url = $link->url_abs() . "";
            $dest_url =~ s{#[^#]+\z}{}ms;
            if (    ( !exists( $state->{encountered_urls}->{$dest_url} ) )
                and $dest_url =~ m{\A\Q$base_url\E}ms
                and ( none { $dest_url =~ $_ } @before_insert_skips_regexes ) )
            {
                $state->{encountered_urls}->{$dest_url} = 1;
                push @{ $state->{stack} }, { url => $dest_url, from => $url, };
            }
        }
    }

    print
"Finished checking the site under the base URL '$base_url'.\nNo broken links were found\n";

    return;
}

1;

__END__

=pod

=encoding UTF-8

=head1 VERSION

version 0.8.1

=for :stopwords cpan testmatrix url bugtracker rt cpants kwalitee diff irc mailto metadata placeholders metacpan

=head1 SUPPORT

=head2 Websites

The following websites have more information about this module, and may be of help to you. As always,
in addition to those websites please use your favorite search engine to discover more resources.

=over 4

=item *

MetaCPAN

A modern, open-source CPAN search engine, useful to view POD in HTML format.

L<https://metacpan.org/release/WWW-LinkChecker-Internal>

=item *

RT: CPAN's Bug Tracker

The RT ( Request Tracker ) website is the default bug/issue tracking system for CPAN.

L<https://rt.cpan.org/Public/Dist/Display.html?Name=WWW-LinkChecker-Internal>

=item *

CPANTS

The CPANTS is a website that analyzes the Kwalitee ( code metrics ) of a distribution.

L<http://cpants.cpanauthors.org/dist/WWW-LinkChecker-Internal>

=item *

CPAN Testers

The CPAN Testers is a network of smoke testers who run automated tests on uploaded CPAN distributions.

L<http://www.cpantesters.org/distro/W/WWW-LinkChecker-Internal>

=item *

CPAN Testers Matrix

The CPAN Testers Matrix is a website that provides a visual overview of the test results for a distribution on various Perls/platforms.

L<http://matrix.cpantesters.org/?dist=WWW-LinkChecker-Internal>

=item *

CPAN Testers Dependencies

The CPAN Testers Dependencies is a website that shows a chart of the test results of all dependencies for a distribution.

L<http://deps.cpantesters.org/?module=WWW::LinkChecker::Internal>

=back

=head2 Bugs / Feature Requests

Please report any bugs or feature requests by email to C<bug-www-linkchecker-internal at rt.cpan.org>, or through
the web interface at L<https://rt.cpan.org/Public/Bug/Report.html?Queue=WWW-LinkChecker-Internal>. You will be automatically notified of any
progress on the request by the system.

=head2 Source Code

The code is open to the world, and available for you to hack on. Please feel free to browse it and play
with it, or whatever. If you want to contribute patches, please send me a diff or prod me to pull
from your repository :)

L<https://github.com/shlomif/perl-www-linkchecker-internal>

  git clone git://github.com/shlomif/perl-www-linkchecker-internal.git

=head1 AUTHOR

Shlomi Fish <shlomif@cpan.org>

=head1 BUGS

Please report any bugs or feature requests on the bugtracker website
L<https://github.com/shlomif/perl-www-linkchecker-internal/issues>

When submitting a bug or request, please include a test-file or a
patch to an existing test-file that illustrates the bug or desired
feature.

=head1 COPYRIGHT AND LICENSE

This software is Copyright (c) 2020 by Shlomi Fish.

This is free software, licensed under:

  The MIT (X11) License

=cut
