#!/usr/bin/env perl
use strict;

# The Perl version of 'strregex'
# Used as a reference.

sub usage()
{
my $msg = <<'USAGE';

Description:
The Perl version of 'strregex'
Used as a reference for testing.

The '-bre' flag is unique to 'strregex.pl' as Perl doesn't know about BRE syntax.
With BRE, the 4 characters '\{' '\}' '\(' '\)' are escaped.
They have to be "unescaped" (removing the backslash) before using them in a Perl regex.

Usage:
./strregex.pl string 'm/match/[i][g][m]' [-c] [-bre | -<ERROR_CODE>]
./strregex.pl string 's/this/that/[i][g][m]' [-c] [-bre | -<ERROR_CODE>]
./strregex.pl string 'match' [-c] [-bre | -<ERROR_CODE>]

# Examples
./strregex.pl 'Hello, world!' 'm/^hello/i'
./strregex.pl 'Hello, world!' 's/^Hello/Hi/'
./strregex.pl 'Hello, world!' 'llo'
./strregex.pl '2323' 's/\(2\)\(3\)\1\2/Nothing Compares ${1} U/' -bre # Regex using BRE syntax, requires the -bre flag.
./strregex.pl '2323' 's/(2)(3)\1\2/Nothing Compares ${1} U/'          # Equivalent to the line above using ERE syntax, without the -bre flag.
./strregex.pl '1313' 'm/(1|2)(3|4))' -c                               # Syntax error in regex causes (expected) runtime error:
                                                                      #   Unmatched ) in regex; marked by <-- HERE in m/(?:m/(1|2)(3|4))) <-- HERE / at ./strregex.pl line 191.
./strregex.pl '1313' 'm/(1|2)(3|4))' -c -8                            # Given an expected error code, only print the error code (the runtime error is avoided):
                                                                      #   -8
./strregex.pl 'hello1)' 'hello(1))' -c 1                              # Also accept positive codes. Used with GNU regex oddities, where a regex (unexpectedly) compiles and matches, even if the Perl regex engine would have complained:
                                                                      #    1

USAGE
print $msg;
}


# isescaped(): (from 'strregex.h')
# Check if a character is escaped or not by checking the number of preceeding backslashes.
# For example, handle '$1' as a substition backreference, '\$1' as a literal string, and '\\$1' as a backreference preceeded by '\'
sub isescaped
{
    my ($str, $pos) = @_;
    my $count = 0;
    # Count preceeding backslashes, return rc = 1 if count is odd
    if (($pos > 0) && ($pos < length $str)) {while (($pos) and (substr($str, --$pos, 1) eq '\\')) {++$count;}}
    return $count % 2;
}

# Handle $1, ${1}, and \1 style substitution backreferences
# Inspired of the Perl Monks post below, which only consider backreferences without braces ($1, but not ${1}).
# Besides, a literal '\$1' (looks like a backreference, but is escaped, and thus a literal string) should NOT be evaluated as a backreference.
# https://www.perlmonks.org/?node_id=503847
###sub dyn_replace
sub eval_backrefs
{
    my $replace = shift;
    my @groups;
    {
        no strict 'refs';
        $groups[$_] = $$_ for 1 .. $#-;      # the size of @- tells us the number of capturing groups
    }
    # Match $1, ${1}, or \1
    while ($replace =~ m/\$(\d+)/g) { if (!isescaped($replace, $-[0])) { substr($replace, $-[0], $+[0] - $-[0]) = $groups[$1]; } }
    while ($replace =~ m/\$\{(\d+)\}/g) { if (!isescaped($replace, $-[0])) { substr($replace, $-[0], $+[0] - $-[0]) = $groups[$1]; } }
    while ($replace =~ m/\\(\d+)/g) { if (!isescaped($replace, $-[0])) { substr($replace, $-[0], $+[0] - $-[0]) = $groups[$1]; } }
    return $replace;
}

# If regex is BRE, unescape PATTERN (remove '\' from '\{' '\}' '\(' '\)')
sub bre_unescape
{
    my $pattern = shift;
    my $bre_flag = shift;
    my $pattern_unescaped = "";
    # Only unescape pattern if BRE
    if ($bre_flag)
    {
        for (my $pos = 0; $pos < length $pattern; ++$pos)
        {
            my $c = substr($pattern, $pos, 1);
            if (($c =~ m/\{|\}|\(|\)/) && isescaped($pattern, $pos))
            {
                # Drop last character ('\')
                chop($pattern_unescaped);
            }
            $pattern_unescaped .= $c;
        }
        $pattern = $pattern_unescaped;
    }
    return $pattern;
}

# MAIN
# ----
if (!@ARGV)
{
    usage();
    exit;
}

my ($str, $regex, $opt1, $opt2) = @ARGV;
my $result = "";
my ($is_subst, $is_match) = ();
my $g = 0;

# Get optional option flags
my $count_flag = ($opt1 eq '-c') || ($opt2 eq '-c') | 0;
my $bre_flag = ($opt1 eq '-bre') || ($opt2 eq '-bre') | 0;

# If expected error code was given, only print that error code and exit
if ($opt1 =~ m/^(-){0,1}\d+$/)
{
    $result = $opt1;
}
elsif ($opt2 =~ m/^(-){0,1}\d+$/)
{
    $result = $opt2;
}
if ($result)
{
    #my $exit_code = -((-$result) >> 8);
    my $exit_code = -((-$result) >> 8);
    print "$result\n";
    exit $exit_code;
}

my ($pattern, $replacement, $modifiers) = ($regex =~ m/^s\/(.*)\/(.*)\/((i|g|m){0,1}(i|g|m){0,1}(i|g|m){0,1})$/);

# Check for SUBSTITION, MACTH, or POSIX
$is_subst = defined($pattern and $replacement);
if (!$is_subst)
{
    ($pattern, $modifiers) = ($regex =~ m/^m\/(.*)\/((i|g|m){0,1}(i|g|m){0,1}(i|g|m){0,1})$/);
    $is_match = defined($pattern);

    if (!($is_match))
    {
        $pattern = $regex;
    }
}

if ($modifiers)
{
    # If 'g' is included in modifiers, it must be dealt with apart, as it is not valid as an inline modifer.
    $g = $modifiers =~ s/g//;
}

# Unescape pattern if BRE
$pattern = bre_unescape($pattern, $bre_flag);


# SUBSTITUTION
if ($is_subst)
{
    if ($count_flag)
    {
        if ($g)
        {
            $result = $str =~ s/(?$modifiers:$pattern)/eval_backrefs($replacement)/eg;
        }
        else
        {
            $result = $str =~ s/(?$modifiers:$pattern)/eval_backrefs($replacement)/e;
        }
    }
    else
    {
        if ($g)
        {
            ($result = $str) =~ s/(?$modifiers:$pattern)/eval_backrefs($replacement)/eg;
        }
        else
        {
            ($result = $str) =~ s/(?$modifiers:$pattern)/eval_backrefs($replacement)/e;
        }
    }
}
# MATCH, POSIX
else
{
    if (!$is_match)
    {
        # POSIX never uses modifiers i,g,m
        $modifiers = '';
    }
    if ($count_flag)
    {
        if ($g and $is_match)
        {
            $result = () = ($str =~ m/(?$modifiers:$pattern)/g);
        }
        else
        {
            $result = () = ($str =~ m/(?$modifiers:$pattern)/);
        }
    }
    else
    {
        if ($g and $is_match)
        {
            $result = ($str =~ m/(?$modifiers:$pattern)/g) ? $str : "";
        }
        else
        {
            $result = ($str =~ m/(?$modifiers:$pattern)/) ? $str : "";
        }
    }
}

print "$result\n";
