1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
|
#!/usr/bin/env perl
use strict;
use warnings;
use List::Util qw<max min>;
sub jaro {
my @s1 = split //, shift;
my @s2 = split //, shift;
# Two chars in s1 and s2 match if they are not farther away than this number
my $match_dist = int(max (scalar @s1, scalar @s2) / 2)-1;
$match_dist = 0 if $match_dist < 0;
# Maps indices from s1 chars to matching s2 chars
my %matches;
# Compute matches (fill the %matches hash)
S1:
foreach my $i (0 .. $#s1) {
foreach my $j ($i-min($i, $match_dist) .. min($i+$match_dist, $#s2)) {
# Skip s2 char if already matched
next if grep(/$j/, values %matches);
# If characters match, store match and advance to next char in s1
if ($s1[$i] eq $s2[$j]) {
$matches{$i} = $j;
next S1;
}
}
}
# Count number of matches and exit if 0
my $m = keys %matches;
return 0 if $m == 0;
# Count transpositions
my $t = 0;
for my $k (keys %matches) {
$t += 1 if $k != $matches{$k};
}
# Finally compute and return Jaro distance
return (($m/@s1) + ($m/@s2) + (($m-$t)/$m))/3;
}
sub jaro_winkler {
my ($s1, $s2) = @_;
my $jarosim = jaro $s1, $s2;
my $l;
for ($l = 0; $l < length $s1; $l++) {
last if (substr($s1, $l, 1) ne (substr $s2, $l, 1));
}
my $p = 0.1;
return $jarosim + $l*$p*(1-$jarosim);
}
die "Usage: $0 <s1> <s2>" unless @ARGV == 2;
print jaro_winkler(@ARGV), "\n";
|