From 8fadbab611b74777d020c4cc18ab2b13776c40c1 Mon Sep 17 00:00:00 2001 From: Guillermo Ramos Date: Thu, 30 May 2019 22:12:23 +0200 Subject: [010#2] --- 010/ch2.pl | 60 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 60 insertions(+) create mode 100755 010/ch2.pl diff --git a/010/ch2.pl b/010/ch2.pl new file mode 100755 index 0000000..9e72fad --- /dev/null +++ b/010/ch2.pl @@ -0,0 +1,60 @@ +#!/usr/bin/env perl + +use strict; +use warnings; + +use List::Util qw; + +sub jaro { + my @s1 = split //, shift; + my @s2 = split //, shift; + + # Two chars in s1 and s2 match if they are not farther away than this number + my $match_dist = int(max (scalar @s1, scalar @s2) / 2)-1; + $match_dist = 0 if $match_dist < 0; + + # Maps indices from s1 chars to matching s2 chars + my %matches; + + # Compute matches (fill the %matches hash) + S1: + foreach my $i (0 .. $#s1) { + foreach my $j ($i-min($i, $match_dist) .. min($i+$match_dist, $#s2)) { + # Skip s2 char if already matched + next if grep(/$j/, values %matches); + + # If characters match, store match and advance to next char in s1 + if ($s1[$i] eq $s2[$j]) { + $matches{$i} = $j; + next S1; + } + } + } + + # Count number of matches and exit if 0 + my $m = keys %matches; + return 0 if $m == 0; + + # Count transpositions + my $t = 0; + for my $k (keys %matches) { + $t += 1 if $k != $matches{$k}; + } + + # Finally compute and return Jaro distance + return (($m/@s1) + ($m/@s2) + (($m-$t)/$m))/3; +} + +sub jaro_winkler { + my ($s1, $s2) = @_; + my $jarosim = jaro $s1, $s2; + my $l; + for ($l = 0; $l < length $s1; $l++) { + last if (substr($s1, $l, 1) ne (substr $s2, $l, 1)); + } + my $p = 0.1; + return $jarosim + $l*$p*(1-$jarosim); +} + +die "Usage: $0 " unless @ARGV == 2; +print jaro_winkler(@ARGV), "\n"; -- cgit v1.2.3