summaryrefslogtreecommitdiff
path: root/022/ch2.pl
diff options
context:
space:
mode:
Diffstat (limited to '022/ch2.pl')
-rwxr-xr-x022/ch2.pl88
1 files changed, 63 insertions, 25 deletions
diff --git a/022/ch2.pl b/022/ch2.pl
index 0124071..66953ea 100755
--- a/022/ch2.pl
+++ b/022/ch2.pl
@@ -10,13 +10,24 @@
use strict;
use warnings;
-use POSIX qw<ceil>;
-use Data::Dumper;
-
################################################################################
# Naïve implementation of a bidirectional map
+my $MAX_BIT_WIDTH = 12;
sub bimap_new {
- return {sym2bin=>{}, bin2sym=>{}};
+ return {binwidth=>0, sym2bin=>{}, bin2sym=>{}};
+}
+sub bimap_extend {
+ my ($dict, $binwidth) = @_;
+ return unless $binwidth > $dict->{binwidth} && $binwidth <= $MAX_BIT_WIDTH;
+ my ($sym2bin, $bin2sym) = @$dict{'sym2bin', 'bin2sym'};
+ foreach my $sym (keys %$sym2bin) {
+ my $bin = $sym2bin->{$sym};
+ delete $bin2sym->{$bin};
+ $bin = "0" x ($binwidth - length $bin) . $bin;
+ $sym2bin->{$sym} = $bin;
+ $bin2sym->{$bin} = $sym;
+ }
+ $dict->{binwidth} = $binwidth;
}
sub bimap_insert {
my ($dict, $sym) = @_;
@@ -24,25 +35,22 @@ sub bimap_insert {
die "Symbol '$sym' already in dict" if exists $dict->{sym2bin}{$sym};
my $ord = keys %{$dict->{sym2bin}}; # Ordinal of symbol to insert in decimal
my $bin = sprintf "%b", $ord; # ... and in binary (string of 1/0s)
+ return unless length $bin <= $MAX_BIT_WIDTH;
$sym2bin->{$sym} = $bin; # Update symbol -> binary mapping
$bin2sym->{$bin} = $sym; # Update binary -> symbol mapping
# Extend with left zeroes the previously inserted binaries
- my $binlen = length $bin;
- foreach my $sym (keys %$sym2bin) {
- my $bin = $sym2bin->{$sym};
- delete $bin2sym->{$bin};
- $bin = "0" x ($binlen - length $bin) . $bin;
- $sym2bin->{$sym} = $bin;
- $bin2sym->{$bin} = $sym;
- }
+ bimap_extend($dict, length($bin));
+
+ return $bin;
}
################################################################################
# Default dictionary
my $DEFAULT_DICT = bimap_new();
-bimap_insert($DEFAULT_DICT, $_) foreach ('', 'A' .. 'Z');
+my $STOP = '';
+bimap_insert($DEFAULT_DICT, $_) foreach (map(chr, 1..254), $STOP);
sub dict_print {
my $bin2sym = shift()->{bin2sym};
foreach my $bin (sort keys %$bin2sym) {
@@ -51,30 +59,56 @@ sub dict_print {
}
sub lzw_encode {
- my $text = shift;
+ my $input = shift;
my %dict = %$DEFAULT_DICT;
my ($sym2bin, $bin2sym) = @dict{'sym2bin', 'bin2sym'};
my $out = '';
- while ($text) {
+ while ($input) {
# Find largest match in dictionary
foreach my $sym (sort { length $b cmp length $a } keys %$sym2bin) {
- if ($text =~ /^$sym(.*)$/) {
+ if (rindex($input, $sym, 0) == 0) {
# Attach the binary corresponding to the matching symbol
$out .= $sym2bin->{$sym};
# ... and cut the symbol from the input text
- $text = substr($text, length $sym);
- if ($1) {
+ $input = substr($input, length $sym);
+ if ($input) {
# New dict entry ($sym + next character)
- bimap_insert(\%dict, $sym . substr($1, 0, 1));
+ bimap_insert(\%dict, $sym . substr($input, 0, 1));
} else {
# No next character: insert EOM marker and we're finished
- $out .= $sym2bin->{''};
+ $out .= $sym2bin->{$STOP};
+ }
+ last;
+ }
+ }
+ }
+ return $out;
+}
+
+sub lzw_decode {
+ my $input = shift;
+ my %dict = %$DEFAULT_DICT;
+ my ($sym2bin, $bin2sym) = @dict{'sym2bin', 'bin2sym'};
+
+ my $out = '';
+ my $lastsym;
+ while ($input) {
+ foreach my $bin (keys %$bin2sym) {
+ if ($input =~ /^$bin/) {
+ my $sym = $bin2sym->{$bin};
+ $out .= $sym;
+ $input = substr($input, length $bin);
+ last if $sym eq $STOP;
+ if (defined $lastsym) {
+ my $bin = bimap_insert(\%dict, $lastsym . substr($sym, 0, 1));
+ bimap_extend(\%dict, $dict{binwidth}+1)
+ if $bin =~ /^1+$/;
}
+ $lastsym = $sym;
last;
}
}
- print "[debug] compressing $text\n";
}
return $out;
}
@@ -84,11 +118,15 @@ sub usage {
}
my $mode = shift || usage;
if ($mode eq '-e' || $mode eq '--encode') {
- my $text = <>;
- my $bin = lzw_encode($text);
- my $comprate = 100 * length($bin) / (8 * length($text));
+ my $input = <>;
+ my $out = lzw_encode($input);
+ my $comprate = 100 * length($out) / (8 * length($input));
printf STDERR "Compressed to %.2f%% of original size\n", $comprate;
- print $bin;
+ print $out;
+} elsif ($mode eq '-d' || $mode eq '--decode') {
+ my $input = <>;
+ my $out = lzw_decode($input);
+ print $out;
} else {
usage;
}