summaryrefslogtreecommitdiff
path: root/010/ch2.pl
diff options
context:
space:
mode:
authorGuillermo Ramos2019-05-30 22:12:23 +0200
committerGuillermo Ramos2019-05-30 22:14:39 +0200
commit8fadbab611b74777d020c4cc18ab2b13776c40c1 (patch)
tree77b770b68936e70fb72640739655dd7abcbaea9e /010/ch2.pl
parentb3fb708aaddb6c7cbd374e708bb2c49d91d3de95 (diff)
downloadperlweekly-8fadbab611b74777d020c4cc18ab2b13776c40c1.tar.gz
[010#2]
Diffstat (limited to '010/ch2.pl')
-rwxr-xr-x010/ch2.pl60
1 files changed, 60 insertions, 0 deletions
diff --git a/010/ch2.pl b/010/ch2.pl
new file mode 100755
index 0000000..9e72fad
--- /dev/null
+++ b/010/ch2.pl
@@ -0,0 +1,60 @@
+#!/usr/bin/env perl
+
+use strict;
+use warnings;
+
+use List::Util qw<max min>;
+
+sub jaro {
+ my @s1 = split //, shift;
+ my @s2 = split //, shift;
+
+ # Two chars in s1 and s2 match if they are not farther away than this number
+ my $match_dist = int(max (scalar @s1, scalar @s2) / 2)-1;
+ $match_dist = 0 if $match_dist < 0;
+
+ # Maps indices from s1 chars to matching s2 chars
+ my %matches;
+
+ # Compute matches (fill the %matches hash)
+ S1:
+ foreach my $i (0 .. $#s1) {
+ foreach my $j ($i-min($i, $match_dist) .. min($i+$match_dist, $#s2)) {
+ # Skip s2 char if already matched
+ next if grep(/$j/, values %matches);
+
+ # If characters match, store match and advance to next char in s1
+ if ($s1[$i] eq $s2[$j]) {
+ $matches{$i} = $j;
+ next S1;
+ }
+ }
+ }
+
+ # Count number of matches and exit if 0
+ my $m = keys %matches;
+ return 0 if $m == 0;
+
+ # Count transpositions
+ my $t = 0;
+ for my $k (keys %matches) {
+ $t += 1 if $k != $matches{$k};
+ }
+
+ # Finally compute and return Jaro distance
+ return (($m/@s1) + ($m/@s2) + (($m-$t)/$m))/3;
+}
+
+sub jaro_winkler {
+ my ($s1, $s2) = @_;
+ my $jarosim = jaro $s1, $s2;
+ my $l;
+ for ($l = 0; $l < length $s1; $l++) {
+ last if (substr($s1, $l, 1) ne (substr $s2, $l, 1));
+ }
+ my $p = 0.1;
+ return $jarosim + $l*$p*(1-$jarosim);
+}
+
+die "Usage: $0 <s1> <s2>" unless @ARGV == 2;
+print jaro_winkler(@ARGV), "\n";