diff options
author | Guillermo Ramos | 2019-08-25 15:45:07 +0200 |
---|---|---|
committer | Guillermo Ramos | 2019-08-25 15:45:19 +0200 |
commit | aef5d256d075b4640859dfd6bb7da20aa5da1ab9 (patch) | |
tree | 53d75b66eb18254ed274c7879d73295f128cb62f /022/ch2.pl | |
parent | 4325599afe25cf8773d6f9d546cfe64208b9303d (diff) | |
download | perlweekly-aef5d256d075b4640859dfd6bb7da20aa5da1ab9.tar.gz |
[022#1]: lzw_encode
Diffstat (limited to '022/ch2.pl')
-rwxr-xr-x[-rw-r--r--] | 022/ch2.pl | 82 |
1 files changed, 82 insertions, 0 deletions
diff --git a/022/ch2.pl b/022/ch2.pl index 6f2cd4a..0124071 100644..100755 --- a/022/ch2.pl +++ b/022/ch2.pl @@ -10,3 +10,85 @@ use strict; use warnings; +use POSIX qw<ceil>; +use Data::Dumper; + +################################################################################ +# Naïve implementation of a bidirectional map +sub bimap_new { + return {sym2bin=>{}, bin2sym=>{}}; +} +sub bimap_insert { + my ($dict, $sym) = @_; + my ($sym2bin, $bin2sym) = @$dict{'sym2bin', 'bin2sym'}; + die "Symbol '$sym' already in dict" if exists $dict->{sym2bin}{$sym}; + my $ord = keys %{$dict->{sym2bin}}; # Ordinal of symbol to insert in decimal + my $bin = sprintf "%b", $ord; # ... and in binary (string of 1/0s) + $sym2bin->{$sym} = $bin; # Update symbol -> binary mapping + $bin2sym->{$bin} = $sym; # Update binary -> symbol mapping + + # Extend with left zeroes the previously inserted binaries + my $binlen = length $bin; + foreach my $sym (keys %$sym2bin) { + my $bin = $sym2bin->{$sym}; + delete $bin2sym->{$bin}; + $bin = "0" x ($binlen - length $bin) . $bin; + $sym2bin->{$sym} = $bin; + $bin2sym->{$bin} = $sym; + } +} + + +################################################################################ +# Default dictionary +my $DEFAULT_DICT = bimap_new(); +bimap_insert($DEFAULT_DICT, $_) foreach ('', 'A' .. 'Z'); +sub dict_print { + my $bin2sym = shift()->{bin2sym}; + foreach my $bin (sort keys %$bin2sym) { + printf "%s -> %s\n", $bin2sym->{$bin}, $bin; + } +} + +sub lzw_encode { + my $text = shift; + my %dict = %$DEFAULT_DICT; + my ($sym2bin, $bin2sym) = @dict{'sym2bin', 'bin2sym'}; + + my $out = ''; + while ($text) { + # Find largest match in dictionary + foreach my $sym (sort { length $b cmp length $a } keys %$sym2bin) { + if ($text =~ /^$sym(.*)$/) { + # Attach the binary corresponding to the matching symbol + $out .= $sym2bin->{$sym}; + # ... and cut the symbol from the input text + $text = substr($text, length $sym); + if ($1) { + # New dict entry ($sym + next character) + bimap_insert(\%dict, $sym . substr($1, 0, 1)); + } else { + # No next character: insert EOM marker and we're finished + $out .= $sym2bin->{''}; + } + last; + } + } + print "[debug] compressing $text\n"; + } + return $out; +} + +sub usage { + die "Usage: $0 -e | --encode | -d | --decode\n"; +} +my $mode = shift || usage; +if ($mode eq '-e' || $mode eq '--encode') { + my $text = <>; + my $bin = lzw_encode($text); + my $comprate = 100 * length($bin) / (8 * length($text)); + printf STDERR "Compressed to %.2f%% of original size\n", $comprate; + print $bin; +} else { + usage; +} |