diff options
author | Guillermo Ramos | 2019-08-25 17:22:20 +0200 |
---|---|---|
committer | Guillermo Ramos | 2019-08-25 18:11:47 +0200 |
commit | bcd0195a350fec1b683d7e2a927f930f5a887d27 (patch) | |
tree | 8cfafc34100816a25d0c13daa66c1f1bad196cc3 /022/ch2.pl | |
parent | aef5d256d075b4640859dfd6bb7da20aa5da1ab9 (diff) | |
download | perlweekly-bcd0195a350fec1b683d7e2a927f930f5a887d27.tar.gz |
[022#1]: lzw_decode: working w strings, not binary
Diffstat (limited to '022/ch2.pl')
-rwxr-xr-x | 022/ch2.pl | 88 |
1 files changed, 63 insertions, 25 deletions
@@ -10,13 +10,24 @@ use strict; use warnings; -use POSIX qw<ceil>; -use Data::Dumper; - ################################################################################ # Naïve implementation of a bidirectional map +my $MAX_BIT_WIDTH = 12; sub bimap_new { - return {sym2bin=>{}, bin2sym=>{}}; + return {binwidth=>0, sym2bin=>{}, bin2sym=>{}}; +} +sub bimap_extend { + my ($dict, $binwidth) = @_; + return unless $binwidth > $dict->{binwidth} && $binwidth <= $MAX_BIT_WIDTH; + my ($sym2bin, $bin2sym) = @$dict{'sym2bin', 'bin2sym'}; + foreach my $sym (keys %$sym2bin) { + my $bin = $sym2bin->{$sym}; + delete $bin2sym->{$bin}; + $bin = "0" x ($binwidth - length $bin) . $bin; + $sym2bin->{$sym} = $bin; + $bin2sym->{$bin} = $sym; + } + $dict->{binwidth} = $binwidth; } sub bimap_insert { my ($dict, $sym) = @_; @@ -24,25 +35,22 @@ sub bimap_insert { die "Symbol '$sym' already in dict" if exists $dict->{sym2bin}{$sym}; my $ord = keys %{$dict->{sym2bin}}; # Ordinal of symbol to insert in decimal my $bin = sprintf "%b", $ord; # ... and in binary (string of 1/0s) + return unless length $bin <= $MAX_BIT_WIDTH; $sym2bin->{$sym} = $bin; # Update symbol -> binary mapping $bin2sym->{$bin} = $sym; # Update binary -> symbol mapping # Extend with left zeroes the previously inserted binaries - my $binlen = length $bin; - foreach my $sym (keys %$sym2bin) { - my $bin = $sym2bin->{$sym}; - delete $bin2sym->{$bin}; - $bin = "0" x ($binlen - length $bin) . $bin; - $sym2bin->{$sym} = $bin; - $bin2sym->{$bin} = $sym; - } + bimap_extend($dict, length($bin)); + + return $bin; } ################################################################################ # Default dictionary my $DEFAULT_DICT = bimap_new(); -bimap_insert($DEFAULT_DICT, $_) foreach ('', 'A' .. 'Z'); +my $STOP = ''; +bimap_insert($DEFAULT_DICT, $_) foreach (map(chr, 1..254), $STOP); sub dict_print { my $bin2sym = shift()->{bin2sym}; foreach my $bin (sort keys %$bin2sym) { @@ -51,30 +59,56 @@ sub dict_print { } sub lzw_encode { - my $text = shift; + my $input = shift; my %dict = %$DEFAULT_DICT; my ($sym2bin, $bin2sym) = @dict{'sym2bin', 'bin2sym'}; my $out = ''; - while ($text) { + while ($input) { # Find largest match in dictionary foreach my $sym (sort { length $b cmp length $a } keys %$sym2bin) { - if ($text =~ /^$sym(.*)$/) { + if (rindex($input, $sym, 0) == 0) { # Attach the binary corresponding to the matching symbol $out .= $sym2bin->{$sym}; # ... and cut the symbol from the input text - $text = substr($text, length $sym); - if ($1) { + $input = substr($input, length $sym); + if ($input) { # New dict entry ($sym + next character) - bimap_insert(\%dict, $sym . substr($1, 0, 1)); + bimap_insert(\%dict, $sym . substr($input, 0, 1)); } else { # No next character: insert EOM marker and we're finished - $out .= $sym2bin->{''}; + $out .= $sym2bin->{$STOP}; + } + last; + } + } + } + return $out; +} + +sub lzw_decode { + my $input = shift; + my %dict = %$DEFAULT_DICT; + my ($sym2bin, $bin2sym) = @dict{'sym2bin', 'bin2sym'}; + + my $out = ''; + my $lastsym; + while ($input) { + foreach my $bin (keys %$bin2sym) { + if ($input =~ /^$bin/) { + my $sym = $bin2sym->{$bin}; + $out .= $sym; + $input = substr($input, length $bin); + last if $sym eq $STOP; + if (defined $lastsym) { + my $bin = bimap_insert(\%dict, $lastsym . substr($sym, 0, 1)); + bimap_extend(\%dict, $dict{binwidth}+1) + if $bin =~ /^1+$/; } + $lastsym = $sym; last; } } - print "[debug] compressing $text\n"; } return $out; } @@ -84,11 +118,15 @@ sub usage { } my $mode = shift || usage; if ($mode eq '-e' || $mode eq '--encode') { - my $text = <>; - my $bin = lzw_encode($text); - my $comprate = 100 * length($bin) / (8 * length($text)); + my $input = <>; + my $out = lzw_encode($input); + my $comprate = 100 * length($out) / (8 * length($input)); printf STDERR "Compressed to %.2f%% of original size\n", $comprate; - print $bin; + print $out; +} elsif ($mode eq '-d' || $mode eq '--decode') { + my $input = <>; + my $out = lzw_decode($input); + print $out; } else { usage; } |