diff options
| author | Guillermo Ramos | 2019-08-25 15:45:07 +0200 | 
|---|---|---|
| committer | Guillermo Ramos | 2019-08-25 15:45:19 +0200 | 
| commit | aef5d256d075b4640859dfd6bb7da20aa5da1ab9 (patch) | |
| tree | 53d75b66eb18254ed274c7879d73295f128cb62f /022/ch2.pl | |
| parent | 4325599afe25cf8773d6f9d546cfe64208b9303d (diff) | |
| download | perlweekly-aef5d256d075b4640859dfd6bb7da20aa5da1ab9.tar.gz | |
[022#1]: lzw_encode
Diffstat (limited to '022/ch2.pl')
| -rwxr-xr-x[-rw-r--r--] | 022/ch2.pl | 82 | 
1 files changed, 82 insertions, 0 deletions
| diff --git a/022/ch2.pl b/022/ch2.pl index 6f2cd4a..0124071 100644..100755 --- a/022/ch2.pl +++ b/022/ch2.pl @@ -10,3 +10,85 @@  use strict;  use warnings; +use POSIX qw<ceil>; +use Data::Dumper; + +################################################################################ +# Naïve implementation of a bidirectional map +sub bimap_new { +    return {sym2bin=>{}, bin2sym=>{}}; +} +sub bimap_insert { +    my ($dict, $sym) = @_; +    my ($sym2bin, $bin2sym) = @$dict{'sym2bin', 'bin2sym'}; +    die "Symbol '$sym' already in dict" if exists $dict->{sym2bin}{$sym}; +    my $ord = keys %{$dict->{sym2bin}}; # Ordinal of symbol to insert in decimal +    my $bin = sprintf "%b", $ord; # ... and in binary (string of 1/0s) +    $sym2bin->{$sym} = $bin; # Update symbol -> binary mapping +    $bin2sym->{$bin} = $sym; # Update binary -> symbol mapping + +    # Extend with left zeroes the previously inserted binaries +    my $binlen = length $bin; +    foreach my $sym (keys %$sym2bin) { +        my $bin = $sym2bin->{$sym}; +        delete $bin2sym->{$bin}; +        $bin = "0" x ($binlen - length $bin) . $bin; +        $sym2bin->{$sym} = $bin; +        $bin2sym->{$bin} = $sym; +    } +} + + +################################################################################ +# Default dictionary +my $DEFAULT_DICT = bimap_new(); +bimap_insert($DEFAULT_DICT, $_) foreach ('', 'A' .. 'Z'); +sub dict_print { +    my $bin2sym = shift()->{bin2sym}; +    foreach my $bin (sort keys %$bin2sym) { +        printf "%s -> %s\n", $bin2sym->{$bin}, $bin; +    } +} + +sub lzw_encode { +    my $text = shift; +    my %dict = %$DEFAULT_DICT; +    my ($sym2bin, $bin2sym) = @dict{'sym2bin', 'bin2sym'}; + +    my $out = ''; +    while ($text) { +        # Find largest match in dictionary +        foreach my $sym (sort { length $b cmp length $a } keys %$sym2bin) { +            if ($text =~ /^$sym(.*)$/) { +                # Attach the binary corresponding to the matching symbol +                $out .= $sym2bin->{$sym}; +                # ... and cut the symbol from the input text +                $text = substr($text, length $sym); +                if ($1) { +                    # New dict entry ($sym + next character) +                    bimap_insert(\%dict, $sym . substr($1, 0, 1)); +                } else { +                    # No next character: insert EOM marker and we're finished +                    $out .= $sym2bin->{''}; +                } +                last; +            } +        } +        print "[debug] compressing $text\n"; +    } +    return $out; +} + +sub usage { +    die "Usage: $0 -e | --encode | -d | --decode\n"; +} +my $mode = shift || usage; +if ($mode eq '-e' || $mode eq '--encode') { +    my $text = <>; +    my $bin = lzw_encode($text); +    my $comprate = 100 * length($bin) / (8 * length($text)); +    printf STDERR "Compressed to %.2f%% of original size\n", $comprate; +    print $bin; +} else { +    usage; +} | 
