Monday, November 15, 2010

Levestein Metric

CREATE FUNCTION edit_distance(@s1 nvarchar(3999), @s2 nvarchar(3999))
RETURNS int
AS
BEGIN
  DECLARE @s1_len int, @s2_len int, @i int, @j int, @s1_char nchar, @c int, @c_temp int,
    @cv0 varbinary(8000), @cv1 varbinary(8000)
  SELECT @s1_len = LEN(@s1), @s2_len = LEN(@s2), @cv1 = 0x0000, @j = 1, @i = 1, @c = 0
  WHILE @j <= @s2_len
    SELECT @cv1 = @cv1 + CAST(@j AS binary(2)), @j = @j + 1
  WHILE @i <= @s1_len
  BEGIN
    SELECT @s1_char = SUBSTRING(@s1, @i, 1), @c = @i, @cv0 = CAST(@i AS binary(2)), @j = 1
    WHILE @j <= @s2_len
    BEGIN
      SET @c = @c + 1
      SET @c_temp = CAST(SUBSTRING(@cv1, @j+@j-1, 2) AS int) +
        CASE WHEN @s1_char = SUBSTRING(@s2, @j, 1) THEN 0 ELSE 1 END
      IF @c > @c_temp SET @c = @c_temp
      SET @c_temp = CAST(SUBSTRING(@cv1, @j+@j+1, 2) AS int)+1
      IF @c > @c_temp SET @c = @c_temp
      SELECT @cv0 = @cv0 + CAST(@c AS binary(2)), @j = @j + 1
    END
    SELECT @cv1 = @cv0, @i = @i + 1
  END
  RETURN @c
END

No comments:

Post a Comment