###############################################################################
#
#   Class: NaturalDocs::Languages::Perl
#
###############################################################################
#
#   A subclass to handle the language variations of Perl.
#
#
#   Topic: Language Support
#
#       Supported:
#
#       - Packages
#       - Inheritance via "use base" and "@ISA =".
#       - Functions
#       - Variables
#
#       Not supported yet:
#
#       - Constants
#
###############################################################################

# This file is part of Natural Docs, which is Copyright (C) 2003-2008 Greg Valure
# Natural Docs is licensed under the GPL

use strict;
use integer;

package NaturalDocs::Languages::Perl;

use base 'NaturalDocs::Languages::Advanced';


#
#   array: hereDocTerminators
#   An array of active Here Doc terminators, or an empty array if not active.  Each entry is an arrayref of tokens.  The entries
#   must appear in the order they must appear in the source.
#
my @hereDocTerminators;



###############################################################################
# Group: Interface Functions


#
#   Function: PackageSeparator
#   Returns the package separator symbol.
#
sub PackageSeparator
    {  return '::';  };

#
#   Function: EnumValues
#   Returns the <EnumValuesType> that describes how the language handles enums.
#
sub EnumValues
    {  return ::ENUM_GLOBAL();  };


#
#   Function: ParseFile
#
#   Parses the passed source file, sending comments acceptable for documentation to <NaturalDocs::Parser->OnComment()>.
#
#   Parameters:
#
#       sourceFile - The name of the source file to parse.
#       topicList - A reference to the list of <NaturalDocs::Parser::ParsedTopics> being built by the file.
#
#   Returns:
#
#       The array ( autoTopics, scopeRecord ).
#
#       autoTopics - An arrayref of automatically generated topics from the file, or undef if none.
#       scopeRecord - An arrayref of <NaturalDocs::Languages::Advanced::ScopeChanges>, or undef if none.
#
sub ParseFile #(sourceFile, topicsList)
    {
    my ($self, $sourceFile, $topicsList) = @_;

    @hereDocTerminators = ( );

    # The regular block comment symbols are undef because they're all potentially JavaDoc comments.  PreprocessFile() will
    # handle translating things like =begin naturaldocs and =begin javadoc to =begin nd.
    $self->ParseForCommentsAndTokens($sourceFile, [ '#' ], undef, [ '##' ], [ '=begin nd', '=end nd' ]);

    my $tokens = $self->Tokens();
    my $index = 0;
    my $lineNumber = 1;

    while ($index < scalar @$tokens)
        {
        if ($self->TryToSkipWhitespace(\$index, \$lineNumber) ||
            $self->TryToGetPackage(\$index, \$lineNumber) ||
            $self->TryToGetBase(\$index, \$lineNumber) ||
            $self->TryToGetFunction(\$index, \$lineNumber) ||
            $self->TryToGetVariable(\$index, \$lineNumber) )
            {
            # The functions above will handle everything.
            }

        elsif ($tokens->[$index] eq '{')
            {
            $self->StartScope('}', $lineNumber, undef);
            $index++;
            }

        elsif ($tokens->[$index] eq '}')
            {
            if ($self->ClosingScopeSymbol() eq '}')
                {  $self->EndScope($lineNumber);  };

            $index++;
            }

        elsif (lc($tokens->[$index]) eq 'eval')
            {
            # We want to skip the token in this case instead of letting it fall to SkipRestOfStatement.  This allows evals with braces
            # to be treated like normal floating braces.
            $index++;
            }

        else
            {
            $self->SkipRestOfStatement(\$index, \$lineNumber);
            };
        };


    # Don't need to keep these around.
    $self->ClearTokens();

    return ( $self->AutoTopics(), $self->ScopeRecord() );
    };


#
#   Function: PreprocessFile
#
#   Overridden to support "=begin nd" and similar.
#
#   - "=begin [nd|naturaldocs|natural docs|jd|javadoc|java doc]" all translate to "=begin nd".
#   - "=[nd|naturaldocs|natural docs]" also translate to "=begin nd".
#   - "=end [nd|naturaldocs|natural docs|jd|javadoc]" all translate to "=end nd".
#   - "=cut" from a ND block translates into "=end nd", but the next line will be altered to begin with "(NDPODBREAK)".  This is
#     so if there is POD leading into ND which ends with a cut, the parser can still end the original POD because the end ND line
#     would have been removed.  Remember, <NaturalDocs::Languages::Advanced->ParseForCommentsAndTokens()> removes
#     Natural Docs-worthy comments to save parsing time.
#   - "=pod begin nd" and "=pod end nd" are supported for compatibility with ND 1.32 and earlier, even though the syntax is a
#     mistake.
#   - It also supports the wrong plural forms, so naturaldoc/natural doc/javadocs/java docs will work.
#
sub PreprocessFile #(lines)
    {
    my ($self, $lines) = @_;

    my $inNDPOD = 0;
    my $mustBreakPOD = 0;

    for (my $i = 0; $i < scalar @$lines; $i++)
        {
        if ($lines->[$i] =~ /^\=(?:(?:pod[ \t]+)?begin[ \t]+)?(?:nd|natural[ \t]*docs?|jd|java[ \t]*docs?)[ \t]*$/i)
            {
            $lines->[$i] = '=begin nd';
            $inNDPOD = 1;
            $mustBreakPOD = 0;
            }
        elsif ($lines->[$i] =~ /^\=(?:pod[ \t]+)end[ \t]+(?:nd|natural[ \t]*docs?|jd|javadocs?)[ \t]*$/i)
            {
            $lines->[$i] = '=end nd';
            $inNDPOD = 0;
            $mustBreakPOD = 0;
            }
        elsif ($lines->[$i] =~ /^\=cut[ \t]*$/i)
            {
            if ($inNDPOD)
                {
                $lines->[$i] = '=end nd';
                $inNDPOD = 0;
                $mustBreakPOD = 1;
                };
            }
        elsif ($mustBreakPOD)
            {
            $lines->[$i] = '(NDPODBREAK)' . $lines->[$i];
            $mustBreakPOD = 0;
            };
        };
    };



###############################################################################
# Group: Statement Parsing Functions
# All functions here assume that the current position is at the beginning of a statement.
#
# Note for developers: I am well aware that the code in these functions do not check if we're past the end of the tokens as
# often as it should.  We're making use of the fact that Perl will always return undef in these cases to keep the code simpler.


#
#   Function: TryToGetPackage
#
#   Determines whether the position is at a package declaration statement, and if so, generates a topic for it, skips it, and
#   returns true.
#
sub TryToGetPackage #(indexRef, lineNumberRef)
    {
    my ($self, $indexRef, $lineNumberRef) = @_;
    my $tokens = $self->Tokens();

    if (lc($tokens->[$$indexRef]) eq 'package')
        {
        my $index = $$indexRef + 1;
        my $lineNumber = $$lineNumberRef;

        if (!$self->TryToSkipWhitespace(\$index, \$lineNumber))
            {  return undef;  };

        my $name;

        while ($tokens->[$index] =~ /^[a-z_\:]/i)
            {
            $name .= $tokens->[$index];
            $index++;
            };

        if (!defined $name)
            {  return undef;  };

        my $autoTopic = NaturalDocs::Parser::ParsedTopic->New(::TOPIC_CLASS(), $name,
                                                                                             undef, undef,
                                                                                             undef,
                                                                                             undef, undef, $$lineNumberRef);
        $self->AddAutoTopic($autoTopic);

        NaturalDocs::Parser->OnClass($autoTopic->Symbol());

        $self->SetPackage($autoTopic->Symbol(), $$lineNumberRef);

        $$indexRef = $index;
        $$lineNumberRef = $lineNumber;
        $self->SkipRestOfStatement($indexRef, $lineNumberRef);

        return 1;
        };

    return undef;
    };


#
#   Function: TryToGetBase
#
#   Determines whether the position is at a package base declaration statement, and if so, calls
#   <NaturalDocs::Parser->OnClassParent()>.
#
#   Supported Syntaxes:
#
#   > use base [list of strings]
#   > @ISA = [list of strings]
#   > @[package]::ISA = [list of strings]
#   > our @ISA = [list of strings]
#
sub TryToGetBase #(indexRef, lineNumberRef)
    {
    my ($self, $indexRef, $lineNumberRef) = @_;
    my $tokens = $self->Tokens();

    my ($index, $lineNumber, $class, $parents);

    if (lc($tokens->[$$indexRef]) eq 'use')
        {
        $index = $$indexRef + 1;
        $lineNumber = $$lineNumberRef;

        if (!$self->TryToSkipWhitespace(\$index, \$lineNumber) ||
           lc($tokens->[$index]) ne 'base')
            {  return undef;  }

        $index++;
        $self->TryToSkipWhitespace(\$index, \$lineNumber);

        $parents = $self->TryToGetListOfStrings(\$index, \$lineNumber);
        }

    else
        {
        $index = $$indexRef;
        $lineNumber = $$lineNumberRef;

        if (lc($tokens->[$index]) eq 'our')
            {
            $index++;
            $self->TryToSkipWhitespace(\$index, \$lineNumber);
            };

        if ($tokens->[$index] eq '@')
            {
            $index++;

            while ($index < scalar @$tokens)
                {
                if ($tokens->[$index] eq 'ISA')
                    {
                    $index++;
                    $self->TryToSkipWhitespace(\$index, \$lineNumber);

                    if ($tokens->[$index] eq '=')
                        {
                        $index++;
                        $self->TryToSkipWhitespace(\$index, \$lineNumber);

                        $parents = $self->TryToGetListOfStrings(\$index, \$lineNumber);
                        }
                    else
                        {  last;  };
                    }

                # If token isn't ISA...
                elsif ($tokens->[$index] =~ /^[a-z0-9_:]/i)
                    {
                    $class .= $tokens->[$index];
                    $index++;
                    }
                else
                    {  last;  };
                };
            };
        };

    if (defined $parents)
        {
        if (defined $class)
            {
            $class =~ s/::$//;
            my @classIdentifiers = split(/::/, $class);
            $class = NaturalDocs::SymbolString->Join(@classIdentifiers);
            }
        else
            {  $class = $self->CurrentScope();  };

        foreach my $parent (@$parents)
            {
            my @parentIdentifiers = split(/::/, $parent);
            my $parentSymbol = NaturalDocs::SymbolString->Join(@parentIdentifiers);

            NaturalDocs::Parser->OnClassParent($class, $parentSymbol, undef, undef, ::RESOLVE_ABSOLUTE());
            };

        $$indexRef = $index;
        $$lineNumberRef = $lineNumber;
        $self->SkipRestOfStatement($indexRef, $lineNumberRef);

        return 1;
        }
    else
        {  return undef;  };
    };


#
#   Function: TryToGetFunction
#
#   Determines whether the position is at a function declaration statement, and if so, generates a topic for it, skips it, and
#   returns true.
#
sub TryToGetFunction #(indexRef, lineNumberRef)
    {
    my ($self, $indexRef, $lineNumberRef) = @_;
    my $tokens = $self->Tokens();

    if ( lc($tokens->[$$indexRef]) eq 'sub')
        {
        my $prototypeStart = $$indexRef;
        my $prototypeStartLine = $$lineNumberRef;
        my $prototypeEnd = $$indexRef + 1;
        my $prototypeEndLine = $$lineNumberRef;

        if ( !$self->TryToSkipWhitespace(\$prototypeEnd, \$prototypeEndLine) ||
             $tokens->[$prototypeEnd] !~ /^[a-z_]/i )
            {  return undef;  };

        my $name = $tokens->[$prototypeEnd];
        $prototypeEnd++;

        # We parsed 'sub [name]'.  Now keep going until we find a semicolon or a brace.

        for (;;)
            {
            if ($prototypeEnd >= scalar @$tokens)
                {  return undef;  }

            # End if we find a semicolon, since it means we found a predeclaration rather than an actual function.
            elsif ($tokens->[$prototypeEnd] eq ';')
                {  return undef;  }

            elsif ($tokens->[$prototypeEnd] eq '{')
                {
                # Found it!

                my $prototype = $self->NormalizePrototype( $self->CreateString($prototypeStart, $prototypeEnd) );

                $self->AddAutoTopic(NaturalDocs::Parser::ParsedTopic->New(::TOPIC_FUNCTION(), $name,
                                                                                                          $self->CurrentScope(), undef,
                                                                                                          $prototype,
                                                                                                          undef, undef, $prototypeStartLine));

                $$indexRef = $prototypeEnd;
                $$lineNumberRef = $prototypeEndLine;

                $self->SkipRestOfStatement($indexRef, $lineNumberRef);

                return 1;
                }

            else
                {  $self->GenericSkip(\$prototypeEnd, \$prototypeEndLine, 0, 1);  };
            };
        }
    else
        {  return undef;  };
    };


#
#   Function: TryToGetVariable
#
#   Determines if the position is at a variable declaration statement, and if so, generates a topic for it, skips it, and returns
#   true.
#
#   Supported Syntaxes:
#
#   - Supports variables declared with "my", "our", and "local".
#   - Supports multiple declarations in one statement, such as "my ($x, $y);".
#   - Supports types and attributes.
#
sub TryToGetVariable #(indexRef, lineNumberRef)
    {
    my ($self, $indexRef, $lineNumberRef) = @_;
    my $tokens = $self->Tokens();

    my $firstToken = lc( $tokens->[$$indexRef] );

    if ($firstToken eq 'my' || $firstToken eq 'our' || $firstToken eq 'local')
        {
        my $prototypeStart = $$indexRef;
        my $prototypeStartLine = $$lineNumberRef;
        my $prototypeEnd = $$indexRef + 1;
        my $prototypeEndLine = $$lineNumberRef;

        $self->TryToSkipWhitespace(\$prototypeEnd, \$prototypeEndLine);


        # Get the type if present.

        my $type;

        if ($tokens->[$prototypeEnd] =~ /^[a-z\:]/i)
            {
            do
                {
                $type .= $tokens->[$prototypeEnd];
                $prototypeEnd++;
                }
            while ($tokens->[$prototypeEnd] =~ /^[a-z\:]/i);

            if (!$self->TryToSkipWhitespace(\$prototypeEnd, \$prototypeEndLine))
                {  return undef;  };
            };


        # Get the name, or possibly names.

        if ($tokens->[$prototypeEnd] eq '(')
            {
            # If there's multiple variables, we'll need to build a custom prototype for each one.  $firstToken already has the
            # declaring word.  We're going to store each name in @names, and we're going to use $prototypeStart and
            # $prototypeEnd to capture any properties appearing after the list.

            my $name;
            my @names;
            my $hasComma = 0;

            $prototypeStart = $prototypeEnd + 1;
            $prototypeStartLine = $prototypeEndLine;

            for (;;)
                {
                $self->TryToSkipWhitespace(\$prototypeStart, \$prototypeStartLine);

                $name = $self->TryToGetVariableName(\$prototypeStart, \$prototypeStartLine);

                if (!defined $name)
                    {  return undef;  };

                push @names, $name;

                $self->TryToSkipWhitespace(\$prototypeStart, \$prototypeStartLine);

                # We can have multiple commas in a row.  We can also have trailing commas.  However, the parenthesis must
                # not start with a comma or be empty, hence this logic does not appear earlier.
                while ($tokens->[$prototypeStart] eq ',')
                    {
                    $prototypeStart++;
                    $self->TryToSkipWhitespace(\$prototypeStart, \$prototypeStartLine);

                    $hasComma = 1;
                    }

                if ($tokens->[$prototypeStart] eq ')')
                    {
                    $prototypeStart++;
                    last;
                    }
                elsif (!$hasComma)
                    {  return undef;  };
                };


            # Now find the end of the prototype.

            $prototypeEnd = $prototypeStart;
            $prototypeEndLine = $prototypeStartLine;

            while ($prototypeEnd < scalar @$tokens &&
                     $tokens->[$prototypeEnd] !~ /^[\;\=]/)
                {
                $prototypeEnd++;
                };


            my $prototypePrefix = $firstToken . ' ';
            if (defined $type)
                {  $prototypePrefix .= $type . ' ';  };

            my $prototypeSuffix = ' ' . $self->CreateString($prototypeStart, $prototypeEnd);

            foreach $name (@names)
                {
                my $prototype = $self->NormalizePrototype( $prototypePrefix . $name . $prototypeSuffix );

                $self->AddAutoTopic(NaturalDocs::Parser::ParsedTopic->New(::TOPIC_VARIABLE(), $name,
                                                                                                           $self->CurrentScope(), undef,
                                                                                                           $prototype,
                                                                                                           undef, undef, $prototypeStartLine));
                };

            $self->SkipRestOfStatement(\$prototypeEnd, \$prototypeEndLine);

            $$indexRef = $prototypeEnd;
            $$lineNumberRef = $prototypeEndLine;
            }

        else # no parenthesis
            {
            my $name = $self->TryToGetVariableName(\$prototypeEnd, \$prototypeEndLine);

            if (!defined $name)
                {  return undef;  };

            while ($prototypeEnd < scalar @$tokens &&
                     $tokens->[$prototypeEnd] !~ /^[\;\=]/)
                {
                $prototypeEnd++;
                };

            my $prototype = $self->NormalizePrototype( $self->CreateString($prototypeStart, $prototypeEnd) );

            $self->AddAutoTopic(NaturalDocs::Parser::ParsedTopic->New(::TOPIC_VARIABLE(), $name,
                                                                                                       $self->CurrentScope(), undef,
                                                                                                       $prototype,
                                                                                                       undef, undef, $prototypeStartLine));

            $self->SkipRestOfStatement(\$prototypeEnd, \$prototypeEndLine);

            $$indexRef = $prototypeEnd;
            $$lineNumberRef = $prototypeEndLine;
            };

        return 1;
        }
    else
        {  return undef;  };
    };


#
#   Function: TryToGetVariableName
#
#   Determines if the position is at a variable name, and if so, skips it and returns the name.
#
sub TryToGetVariableName #(indexRef, lineNumberRef)
    {
    my ($self, $indexRef, $lineNumberRef) = @_;
    my $tokens = $self->Tokens();

    my $name;

    if ($tokens->[$$indexRef] =~ /^[\$\@\%\*]/)
        {
        $name .= $tokens->[$$indexRef];
        $$indexRef++;

        $self->TryToSkipWhitespace($indexRef, $lineNumberRef);

        if ($tokens->[$$indexRef] =~ /^[a-z_]/i)
            {
            $name .= $tokens->[$$indexRef];
            $$indexRef++;
            }
        else
            {  return undef;  };
        };

    return $name;
    };


#
#   Function: TryToGetListOfStrings
#
#   Attempts to retrieve a list of strings from the current position.  Returns an arrayref of them if any are found, or undef if none.
#   It stops the moment it reaches a non-string, so "string1, variable, string2" will only return string1.
#
#   Supported Syntaxes:
#
#   - Supports parenthesis.
#   - Supports all string forms supported by <TryToSkipString()>.
#   - Supports qw() string arrays.
#
sub TryToGetListOfStrings #(indexRef, lineNumberRef)
    {
    my ($self, $indexRef, $lineNumberRef) = @_;
    my $tokens = $self->Tokens();

    my $parenthesis = 0;
    my $strings;

    while ($$indexRef < scalar @$tokens)
        {
        # We'll tolerate parenthesis.
        if ($tokens->[$$indexRef] eq '(')
            {
            $$indexRef++;
            $parenthesis++;
            }
        elsif ($tokens->[$$indexRef] eq ')')
            {
            if ($parenthesis == 0)
                {  last;  };

            $$indexRef++;
            $parenthesis--;
            }
        elsif ($tokens->[$$indexRef] eq ',')
            {
            $$indexRef++;
            }
        else
            {
            my ($startContent, $endContent);
            my $symbolIndex = $$indexRef;

            if ($self->TryToSkipString($indexRef, $lineNumberRef, \$startContent, \$endContent))
                {
                my $content = $self->CreateString($startContent, $endContent);

                if (!defined $strings)
                    {  $strings = [ ];  };

                if (lc($tokens->[$symbolIndex]) eq 'qw')
                    {
                    $content =~ tr/ \t\n/ /s;
                    $content =~ s/^ //;

                    my @qwStrings = split(/ /, $content);

                    push @$strings, @qwStrings;
                    }
                else
                    {
                    push @$strings, $content;
                    };
                }
            else
                {  last;  };
            };

        $self->TryToSkipWhitespace($indexRef, $lineNumberRef);
        };

    return $strings;
    };


###############################################################################
# Group: Low Level Parsing Functions


#
#   Function: GenericSkip
#
#   Advances the position one place through general code.
#
#   - If the position is on a comment or string, it will skip it completely.
#   - If the position is on an opening symbol, it will skip until the past the closing symbol.
#   - If the position is on a regexp or quote-like operator, it will skip it completely.
#   - If the position is on a backslash, it will skip it and the following token.
#   - If the position is on whitespace (including comments), it will skip it completely.
#   - Otherwise it skips one token.
#
#   Parameters:
#
#       indexRef - A reference to the current index.
#       lineNumberRef - A reference to the current line number.
#       noRegExps - If set, does not test for regular expressions.
#
sub GenericSkip #(indexRef, lineNumberRef, noRegExps)
    {
    my ($self, $indexRef, $lineNumberRef, $noRegExps, $allowStringedClosingParens) = @_;
    my $tokens = $self->Tokens();

    if ($tokens->[$$indexRef] eq "\\" && $$indexRef + 1 < scalar @$tokens && $tokens->[$$indexRef+1] ne "\n")
        {  $$indexRef += 2;  }

    # Note that we don't want to count backslashed ()[]{} since they could be in regexps.  Also, ()[] are valid variable names
    # when preceded by a string.

    # We can ignore the scope stack because we're just skipping everything without parsing, and we need recursion anyway.
    elsif ($tokens->[$$indexRef] eq '{' && !$self->IsBackslashed($$indexRef))
        {
        $$indexRef++;
        $self->GenericSkipUntilAfter($indexRef, $lineNumberRef, '}', $noRegExps, $allowStringedClosingParens);
        }
    elsif ($tokens->[$$indexRef] eq '(' && !$self->IsBackslashed($$indexRef) && !$self->IsStringed($$indexRef))
        {
        # Temporarily allow stringed closing parenthesis if it looks like we're in an anonymous function declaration with Perl's
        # cheap version of prototypes, such as "my $_declare = sub($) {}".
        my $tempAllowStringedClosingParens = $allowStringedClosingParens;
        if (!$allowStringedClosingParens)
        	{
        	my $tempIndex = $$indexRef - 1;
        	if ($tempIndex >= 0 && $tokens->[$tempIndex] =~ /^[ \t]/)
        		{  $tempIndex--;  }
        	if ($tempIndex >= 0 && $tokens->[$tempIndex] eq 'sub')
        		{  $tempAllowStringedClosingParens = 1;  }
        	}

        $$indexRef++;

        do
            {  $self->GenericSkipUntilAfter($indexRef, $lineNumberRef, ')', $noRegExps, $tempAllowStringedClosingParens);  }
        while ($$indexRef < scalar @$tokens && $self->IsStringed($$indexRef - 1) && !$tempAllowStringedClosingParens);
        }
    elsif ($tokens->[$$indexRef] eq '[' && !$self->IsBackslashed($$indexRef) && !$self->IsStringed($$indexRef))
        {
        $$indexRef++;

        do
            {  $self->GenericSkipUntilAfter($indexRef, $lineNumberRef, ']', $noRegExps, $allowStringedClosingParens);  }
        while ($$indexRef < scalar @$tokens && $self->IsStringed($$indexRef - 1));
        }

    elsif ($self->TryToSkipWhitespace($indexRef, $lineNumberRef) ||
            $self->TryToSkipString($indexRef, $lineNumberRef) ||
            $self->TryToSkipHereDocDeclaration($indexRef, $lineNumberRef) ||
            (!$noRegExps && $self->TryToSkipRegexp($indexRef, $lineNumberRef) ) )
        {
        }

    else
        {  $$indexRef++;  };
    };


#
#   Function: GenericSkipUntilAfter
#
#   Advances the position via <GenericSkip()> until a specific token is reached and passed.
#
sub GenericSkipUntilAfter #(indexRef, lineNumberRef, token, noRegExps, allowStringedClosingParens)
    {
    my ($self, $indexRef, $lineNumberRef, $token, $noRegExps, $allowStringedClosingParens) = @_;
    my $tokens = $self->Tokens();

    while ($$indexRef < scalar @$tokens && $tokens->[$$indexRef] ne $token)
        {  $self->GenericSkip($indexRef, $lineNumberRef, $noRegExps, $allowStringedClosingParens);  };

    if ($tokens->[$$indexRef] eq "\n")
        {  $$lineNumberRef++;  };
    $$indexRef++;
    };


#
#   Function: GenericRegexpSkip
#
#   Advances the position one place through regexp code.
#
#   - If the position is on an opening symbol, it will skip until the past the closing symbol.
#   - If the position is on a backslash, it will skip it and the following token.
#   - If the position is on whitespace (not including comments), it will skip it completely.
#   - Otherwise it skips one token.
#
#   Also differs from <GenericSkip()> in that the parenthesis in $( and $) do count against the scope, where they wouldn't
#   normally.
#
#   Parameters:
#
#       indexRef - A reference to the current index.
#       lineNumberRef - A reference to the current line number.
#       inBrackets - Whether we're in brackets or not.  If true, we don't care about matching braces and parenthesis.
#
sub GenericRegexpSkip #(indexRef, lineNumberRef, inBrackets)
    {
    my ($self, $indexRef, $lineNumberRef, $inBrackets) = @_;
    my $tokens = $self->Tokens();

    if ($tokens->[$$indexRef] eq "\\" && $$indexRef + 1 < scalar @$tokens && $tokens->[$$indexRef+1] ne "\n")
        {  $$indexRef += 2;  }

    # We can ignore the scope stack because we're just skipping everything without parsing, and we need recursion anyway.
    elsif ($tokens->[$$indexRef] eq '{' && !$self->IsBackslashed($$indexRef) && !$inBrackets)
        {
        $$indexRef++;
        $self->GenericRegexpSkipUntilAfter($indexRef, $lineNumberRef, '}');
        }
    elsif ($tokens->[$$indexRef] eq '(' && !$self->IsBackslashed($$indexRef) && !$inBrackets)
        {
        $$indexRef++;
        $self->GenericRegexpSkipUntilAfter($indexRef, $lineNumberRef, ')');
        }
    elsif ($tokens->[$$indexRef] eq '[' && !$self->IsBackslashed($$indexRef) && !$self->IsStringed($$indexRef))
        {
        $$indexRef++;

        do
            {  $self->GenericRegexpSkipUntilAfter($indexRef, $lineNumberRef, ']');  }
        while ($$indexRef < scalar @$tokens && $self->IsStringed($$indexRef - 1));
        }

    elsif ($tokens->[$$indexRef] eq "\n")
        {
        $$lineNumberRef++;
        $$indexRef++;
        }

    else
        {  $$indexRef++;  };
    };


#
#   Function: GenericRegexpSkipUntilAfter
#
#   Advances the position via <GenericRegexpSkip()> until a specific token is reached and passed.
#
sub GenericRegexpSkipUntilAfter #(indexRef, lineNumberRef, token)
    {
    my ($self, $indexRef, $lineNumberRef, $token) = @_;
    my $tokens = $self->Tokens();

    my $inBrackets = ( $token eq ']' );

    while ($$indexRef < scalar @$tokens && $tokens->[$$indexRef] ne $token)
        {  $self->GenericRegexpSkip($indexRef, $lineNumberRef, $inBrackets);  };

    if ($tokens->[$$indexRef] eq "\n")
        {  $$lineNumberRef++;  };
    $$indexRef++;
    };


#
#   Function: SkipRestOfStatement
#
#   Advances the position via <GenericSkip()> until after the end of the current statement, which is defined as a semicolon or
#   a brace group.  Of course, either of those appearing inside parenthesis, a nested brace group, etc. don't count.
#
sub SkipRestOfStatement #(indexRef, lineNumberRef)
    {
    my ($self, $indexRef, $lineNumberRef) = @_;
    my $tokens = $self->Tokens();

    while ($$indexRef < scalar @$tokens &&
             $tokens->[$$indexRef] ne ';' &&
             !($tokens->[$$indexRef] eq '{' && !$self->IsStringed($$indexRef)) )
        {
        $self->GenericSkip($indexRef, $lineNumberRef);
        };

    if ($tokens->[$$indexRef] eq ';')
        {  $$indexRef++;  }
    elsif ($tokens->[$$indexRef] eq '{')
        {  $self->GenericSkip($indexRef, $lineNumberRef);  };
    };


#
#   Function: TryToSkipWhitespace
#
#   If the current position is on whitespace it skips them and returns true.  If there are a number of these in a row, it skips them
#   all.
#
#   Supported Syntax:
#
#       - Whitespace
#       - Line break
#       - All comment forms supported by <TryToSkipComment()>
#       - Here Doc content
#
sub TryToSkipWhitespace #(indexRef, lineNumberRef)
    {
    my ($self, $indexRef, $lineNumberRef) = @_;
    my $tokens = $self->Tokens();

    my $result;

    while ($$indexRef < scalar @$tokens)
        {
        if ($self->TryToSkipHereDocContent($indexRef, $lineNumberRef) ||
            $self->TryToSkipComment($indexRef, $lineNumberRef))
            {
            $result = 1;
            }
        elsif ($tokens->[$$indexRef] =~ /^[ \t]/)
            {
            $$indexRef++;
            $result = 1;
            }
        elsif ($tokens->[$$indexRef] eq "\n")
            {
            $$indexRef++;
            $$lineNumberRef++;
            $result = 1;
            }
        else
            {  last;  };
        };

    return $result;
    };


#
#   Function: TryToSkipComment
#   If the current position is on a comment, skip past it and return true.
#
sub TryToSkipComment #(indexRef, lineNumberRef)
    {
    my ($self, $indexRef, $lineNumberRef) = @_;

    return ( $self->TryToSkipLineComment($indexRef, $lineNumberRef) ||
                $self->TryToSkipPODComment($indexRef, $lineNumberRef) );
    };


#
#   Function: TryToSkipLineComment
#   If the current position is on a line comment symbol, skip past it and return true.
#
sub TryToSkipLineComment #(indexRef, lineNumberRef)
    {
    my ($self, $indexRef, $lineNumberRef) = @_;
    my $tokens = $self->Tokens();

    # Note that $#var is not a comment.
    if ($tokens->[$$indexRef] eq '#' && !$self->IsStringed($$indexRef))
        {
        $self->SkipRestOfLine($indexRef, $lineNumberRef);
        return 1;
        }
    else
        {  return undef;  };
    };


#
#   Function: TryToSkipPODComment
#   If the current position is on a POD comment symbol, skip past it and return true.
#
sub TryToSkipPODComment #(indexRef, lineNumberRef)
    {
    my ($self, $indexRef, $lineNumberRef) = @_;
    my $tokens = $self->Tokens();

    # Note that whitespace is not allowed before the equals sign.  It must directly start a line.
    if ($tokens->[$$indexRef] eq '=' &&
        ( $$indexRef == 0 || $tokens->[$$indexRef - 1] eq "\n" ) &&
        $tokens->[$$indexRef + 1] =~ /^[a-z]/i )
        {
        # Skip until =cut or (NDPODBREAK).  Note that it's theoretically possible for =cut to appear without a prior POD directive.

        do
            {
            if ($tokens->[$$indexRef] eq '=' && lc( $tokens->[$$indexRef + 1] ) eq 'cut')
                {
                $self->SkipRestOfLine($indexRef, $lineNumberRef);
                last;
                }
            elsif ($tokens->[$$indexRef] eq '(' && $$indexRef + 2 < scalar @$tokens &&
                    $tokens->[$$indexRef+1] eq 'NDPODBREAK' && $tokens->[$$indexRef+2] eq ')')
                {
                $$indexRef += 3;
                last;
                }
            else
                {
                $self->SkipRestOfLine($indexRef, $lineNumberRef);
                };
            }
        while ($$indexRef < scalar @$tokens);

        return 1;
        }

    # It's also possible that (NDPODBREAK) will appear without any opening pod statement because "=begin nd" and "=cut" will
    # still result in one.  We need to pick off the stray (NDPODBREAK).
    elsif ($tokens->[$$indexRef] eq '(' && $$indexRef + 2 < scalar @$tokens &&
            $tokens->[$$indexRef+1] eq 'NDPODBREAK' && $tokens->[$$indexRef+2] eq ')')
        {
        $$indexRef += 3;
        return 1;
        }

    else
        {  return undef;  };
    };


#
#   Function: TryToSkipString
#   If the current position is on a string delimiter, skip past the string and return true.
#
#   Parameters:
#
#       indexRef - A reference to the index of the position to start at.
#       lineNumberRef - A reference to the line number of the position.
#       startContentIndexRef - A reference to the variable in which to store the index of the first content token.  May be undef.
#       endContentIndexRef - A reference to the variable in which to store the index of the end of the content, which is one past
#                                        the last content token.  may be undef.
#
#   Returns:
#
#       Whether the position was at a string.  The index, line number, and content index variabls will only be changed if true.
#
#   Syntax Support:
#
#       - Supports quotes, apostrophes, backticks, q(), qq(), qx(), and qw().
#       - All symbols are supported for the letter forms.
#
sub TryToSkipString #(indexRef, lineNumberRef, startContentIndexRef, endContentIndexRef)
    {
    my ($self, $indexRef, $lineNumberRef, $startContentIndexRef, $endContentIndexRef) = @_;
    my $tokens = $self->Tokens();

    # The three string delimiters.  All three are Perl variables when preceded by a dollar sign.
    if (!$self->IsStringed($$indexRef) &&
        ( $self->SUPER::TryToSkipString($indexRef, $lineNumberRef, '\'', '\'', $startContentIndexRef, $endContentIndexRef) ||
          $self->SUPER::TryToSkipString($indexRef, $lineNumberRef, '"', '"', $startContentIndexRef, $endContentIndexRef) ||
          $self->SUPER::TryToSkipString($indexRef, $lineNumberRef, '`', '`', $startContentIndexRef, $endContentIndexRef) ) )
        {
        return 1;
        }
    elsif ($tokens->[$$indexRef] =~ /^(?:q|qq|qx|qw)$/i &&
            ($$indexRef == 0 || $tokens->[$$indexRef - 1] !~ /^[\$\%\@\*]$/))
        {
        $$indexRef++;

        $self->TryToSkipWhitespace($indexRef, $lineNumberRef);

        my $openingSymbol = $tokens->[$$indexRef];
        my $closingSymbol;

        if ($openingSymbol eq '{')
            {  $closingSymbol = '}';  }
        elsif ($openingSymbol eq '(')
            {  $closingSymbol = ')';  }
        elsif ($openingSymbol eq '[')
            {  $closingSymbol = ']';  }
        elsif ($openingSymbol eq '<')
            {  $closingSymbol = '>';  }
        else
            {  $closingSymbol = $openingSymbol;  };

        $self->SUPER::TryToSkipString($indexRef, $lineNumberRef, $openingSymbol, $closingSymbol,
                                                      $startContentIndexRef, $endContentIndexRef);

        return 1;
        }
    else
        {  return undef;  };
    };


#
#   Function: TryToSkipHereDocDeclaration
#
#   If the current position is on a Here Doc declaration, add its terminators to <hereDocTerminators> and skip it.
#
#   Syntax Support:
#
#       - Supports <<EOF
#       - Supports << "String" with all string forms supported by <TryToSkipString()>.
#
sub TryToSkipHereDocDeclaration #(indexRef, lineNumberRef)
    {
    my ($self, $indexRef, $lineNumberRef) = @_;
    my $tokens = $self->Tokens();

    my $index = $$indexRef;
    my $lineNumber = $$lineNumberRef;

    if ($tokens->[$index] eq '<' && $tokens->[$index + 1] eq '<')
        {
        $index += 2;
        my $success;

        # No whitespace allowed with the bare word.
        if ($tokens->[$index] =~ /^[a-z0-9_]/i)
            {
            push @hereDocTerminators, [ $tokens->[$index] ];
            $index++;
            $success = 1;
            }
        else
            {
            $self->TryToSkipWhitespace(\$index, \$lineNumber);

            my ($contentStart, $contentEnd);
            if ($self->TryToSkipString(\$index, \$lineNumber, \$contentStart, \$contentEnd))
                {
                push @hereDocTerminators, [ @{$tokens}[$contentStart..$contentEnd - 1] ];
                $success = 1;
                };
            };

        if ($success)
            {
            $$indexRef = $index;
            $$lineNumberRef = $lineNumber;

            return 1;
            };
        };

    return 0;
    };


#
#   Function: TryToSkipHereDocContent
#
#   If the current position is at the beginning of a line and there are entries in <hereDocTerminators>, skips lines until all the
#   terminators are exhausted or we reach the end of the file.
#
#   Returns:
#
#       Whether the position was on Here Doc content.
#
sub TryToSkipHereDocContent #(indexRef, lineNumberRef)
    {
    my ($self, $indexRef, $lineNumberRef) = @_;
    my $tokens = $self->Tokens();

    # We don't use IsFirstLineToken() because it really needs to be the first line token.  Whitespace is not allowed.
    if ($$indexRef > 0 && $tokens->[$$indexRef - 1] eq "\n")
        {
        my $success = (scalar @hereDocTerminators > 0);

        while (scalar @hereDocTerminators && $$indexRef < scalar @$tokens)
            {
            my $terminatorIndex = 0;

            while ($hereDocTerminators[0]->[$terminatorIndex] eq $tokens->[$$indexRef])
                {
                $terminatorIndex++;
                $$indexRef++;
                };

            if ($terminatorIndex == scalar @{$hereDocTerminators[0]} &&
                ($tokens->[$$indexRef] eq "\n" || ($tokens->[$$indexRef] =~ /^[ \t]/ && $tokens->[$$indexRef + 1] eq "\n")) )
                {
                shift @hereDocTerminators;
                $$indexRef++;
                $$lineNumberRef++;
                }
            else
                {  $self->SkipRestOfLine($indexRef, $lineNumberRef);  };
            };

        return $success;
        }

    else
        {  return 0;  };
    };


#
#   Function: TryToSkipRegexp
#   If the current position is on a regular expression or a quote-like operator, skip past it and return true.
#
#   Syntax Support:
#
#       - Supports //, ??, m//, qr//, s///, tr///, and y///.
#       - All symbols are supported for the letter forms.
#       - ?? is *not* supported because it could cause problems with ?: statements.  The generic parser has a good chance of
#         successfully stumbling through a regex, whereas the regex code will almost certainly see the rest of the file as part of it.
#
sub TryToSkipRegexp #(indexRef, lineNumberRef)
    {
    my ($self, $indexRef, $lineNumberRef) = @_;
    my $tokens = $self->Tokens();

    my $isRegexp;

    # If it's a supported character sequence that's not a variable (ex $qr)...
    if ($tokens->[$$indexRef] =~ /^(?:m|qr|s|tr|y)$/i &&
         ($$indexRef == 0 || $tokens->[$$indexRef - 1] !~ /^[\$\%\@\*\-]$/) )
        {  $isRegexp = 1;  }

    elsif ($tokens->[$$indexRef] eq '/' && !$self->IsStringed($$indexRef))
        {
        # This is a bit of a hack.  If we find a random slash, it could be a divide operator or a bare regexp.  Find the first previous
        # non-whitespace token and if it's text, a closing brace, or a string, assume it's a divide operator.  (Strings don't make
        # much pratical sense there but a regexp would be impossible.)  Otherwise assume it's a regexp.

        # We make a special consideration for split() appearing without parenthesis.  If the previous token is split and it's not a
        # variable, assume it is a regexp even though it fails the above test.

        my $index = $$indexRef - 1;

        while ($index >= 0 && $tokens->[$index] =~ /^(?: |\t|\n)/)
            {  $index--;  };

        if ($index < 0 || $tokens->[$index] !~ /^[a-zA-Z0-9_\)\]\}\'\"\`]/ ||
            ($tokens->[$index] =~ /^split|grep$/ && $index > 0 && $tokens->[$index-1] !~ /^[\$\%\@\*]$/) )
            {  $isRegexp = 1;  };
        };

    if ($isRegexp)
        {
        my $operator = lc($tokens->[$$indexRef]);
        my $index = $$indexRef;
        my $lineNumber = $$lineNumberRef;

        if ($operator =~ /^[\?\/]/)
            {  $operator = 'm';  }
        else
            {
            $index++;

            # Believe it or not, s#...# is allowed.  We can't pass over number signs here.
            if ($tokens->[$index] ne '#')
                {  $self->TryToSkipWhitespace(\$index, \$lineNumber);  };
            };

        if ($tokens->[$index] =~ /^\w/)
            {  return undef;  };
        if ($tokens->[$index] eq '=' && $tokens->[$index+1] eq '>')
        	{  return undef;  };

        my $openingSymbol = $tokens->[$index];
        my $closingSymbol;

        if ($openingSymbol eq '{')
            {  $closingSymbol = '}';  }
        elsif ($openingSymbol eq '(')
            {  $closingSymbol = ')';  }
        elsif ($openingSymbol eq '[')
            {  $closingSymbol = ']';  }
        elsif ($openingSymbol eq '<')
            {  $closingSymbol = '>';  }
        else
            {  $closingSymbol = $openingSymbol;  };

        $index++;

        $self->GenericRegexpSkipUntilAfter(\$index, \$lineNumber, $closingSymbol);

        $$indexRef = $index;
        $$lineNumberRef = $lineNumber;

        if ($operator =~ /^(?:s|tr|y)$/)
            {
            if ($openingSymbol ne $closingSymbol)
                {
                $self->TryToSkipWhitespace($indexRef, $lineNumberRef);

                $openingSymbol = $tokens->[$index];

                if ($openingSymbol eq '{')
                    {  $closingSymbol = '}';  }
                elsif ($openingSymbol eq '(')
                    {  $closingSymbol = ')';  }
                elsif ($openingSymbol eq '[')
                    {  $closingSymbol = ']';  }
                elsif ($openingSymbol eq '<')
                    {  $closingSymbol = '>';  }
                else
                    {  $closingSymbol = $openingSymbol;  };

                $$indexRef++;
                };

            if ($operator eq 's')
                {
                $self->GenericSkipUntilAfter($indexRef, $lineNumberRef, $closingSymbol, 1);
                }
            else # ($operator eq 'tr' || $operator eq 'y')
                {
                while ($$indexRef < scalar @$tokens &&
                          ($tokens->[$$indexRef] ne $closingSymbol || $self->IsBackslashed($$indexRef)) )
                    {
                    if ($tokens->[$$indexRef] eq "\n")
                        {  $$lineNumberRef++;  };
                    $$indexRef++;
                    };

                $$indexRef++;
                };
            };

        # We want to skip any letters after the regexp.  Otherwise something like tr/a/b/s; could have the trailing s; interpreted
        # as another regexp.  Whitespace is not allowed between the closing symbol and the letters.

        if ($tokens->[$$indexRef] =~ /^[a-z]/i)
            {  $$indexRef++;  };

        return 1;
        };

    return undef;
    };



###############################################################################
# Group: Support Functions


#
#   Function: IsStringed
#
#   Returns whether the position is after a string (dollar sign) character.  Returns false if it's preceded by two dollar signs so
#   "if ($x == $$)" doesn't skip the closing parenthesis as stringed.
#
#   Parameters:
#
#       index - The index of the postition.
#
sub IsStringed #(index)
    {
    my ($self, $index) = @_;
    my $tokens = $self->Tokens();

    if ($index > 0 && $tokens->[$index - 1] eq '$' && !($index > 1 && $tokens->[$index - 2] eq '$'))
        {  return 1;  }
    else
        {  return undef;  };
    };


1;
