Dijkstra's Algorithm in C# with Generics

I recently needed to to implement a shortest-path algorithm (to identify preferred domain controllers using site link costs) and I found Dijkstra's Algorithm

Path class

using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;

namespace DijkstraAlgorithm {
   public class Path<T> {
      public T Source { get; set; }

      public T Destination { get; set; }

      /// <summary>
      /// Cost of using this path from Source to Destination
      /// </summary>
      /// 
      /// Lower costs are preferable to higher costs
      /// </remarks>
      public int Cost { get; set; }
   }
}

ExtensionMethods class

using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;

namespace DijkstraAlgorithm {
   public static class ExtensionMethods {
      /// <summary>
      /// Adds or Updates the dictionary to include the destination and its associated cost 
      /// and complete path (and param arrays make paths easier to work with)
      /// </summary>
      public static void Set<T>(this Dictionary<T, KeyValuePair<int, LinkedList<Path<T>>>> Dictionary, 
                                T destination, int Cost, params Path<T>[] paths) {
         var CompletePath = paths == null ? new LinkedList<Path<T>>() : new LinkedList<Path<T>>(paths);
         Dictionary[destination] = new KeyValuePair<int, LinkedList<Path<T>>>(Cost, CompletePath);
      }
   }
}

Engine class

using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;

namespace DijkstraAlgorithm {
   /// <summary>
   /// Calculates the best route between various paths, using Dijkstra's algorithm
   /// </summary>
   public static class Engine {
      public static LinkedList<Path<T>> CalculateShortestPathBetween<T>(T source, T destination, IEnumerable<Path<T>> Paths) {
         return CalculateFrom(source, Paths)[destination];
      }

      public static Dictionary<T, LinkedList<Path<T>>> CalculateShortestFrom<T>(T source, IEnumerable<Path<T>> Paths) {
         return CalculateFrom(source, Paths);
      }

      private static Dictionary<T, LinkedList<Path<T>>> CalculateFrom<T>(T source, IEnumerable<Path<T>> Paths) {
         // validate the paths
         if (Paths.Any(p => p.Source.Equals(p.Destination)))
            throw new ArgumentException("No path can have the same source and destination");

         // keep track of the shortest paths identified thus far
         Dictionary<T, KeyValuePair<int, LinkedList<Path<T>>>> ShortestPaths = new Dictionary<T, KeyValuePair<int, LinkedList<Path<T>>>>();

         // keep track of the locations which have been completely processed
         List<T> LocationsProcessed = new List<T>();

         // include all possible steps, with Int.MaxValue cost
         Paths.SelectMany(p => new T[] { p.Source, p.Destination })           // union source and destinations
                 .Distinct()                                                  // remove duplicates
                 .ToList()                                                    // ToList exposes ForEach
                 .ForEach(s => ShortestPaths.Set(s, Int32.MaxValue, null));   // add to ShortestPaths with MaxValue cost

         // update cost for self-to-self as 0; no path
         ShortestPaths.Set(source, 0, null);

         // keep this cached
         var LocationCount = ShortestPaths.Keys.Count;

         while (LocationsProcessed.Count < LocationCount) {
            T _locationToProcess = default(T);

            //Search for the nearest location that isn't handled
            foreach (T _location in ShortestPaths.OrderBy(p => p.Value.Key).Select(p => p.Key).ToList()) {
               if (!LocationsProcessed.Contains(_location)) {
                  if (ShortestPaths[_location].Key == Int32.MaxValue)
                     return ShortestPaths.ToDictionary(k => k.Key, v => v.Value.Value); //ShortestPaths[destination].Value;

                  _locationToProcess = _location;
                  break;
               }
            } // foreach

            var _selectedPaths = Paths.Where(p => p.Source.Equals(_locationToProcess));

            foreach (Path<T> path in _selectedPaths) {
               if (ShortestPaths[path.Destination].Key > path.Cost + ShortestPaths[path.Source].Key) {
                  ShortestPaths.Set(
                      path.Destination,
                      path.Cost + ShortestPaths[path.Source].Key,
                      ShortestPaths[path.Source].Value.Union(new Path<T>[] { path }).ToArray());
               }
            }

            //Add the location to the list of processed locations
            LocationsProcessed.Add(_locationToProcess);
         } // while

         return ShortestPaths.ToDictionary(k => k.Key, v => v.Value.Value);
         //return ShortestPaths[destination].Value;
      }
   }
}

Test

using System.Linq;
using DijkstraAlgorithm;
using FluentAssertions;
using Microsoft.VisualStudio.TestTools.UnitTesting;

namespace UnitTestProject1 {
   [TestClass]
   public class UnitTest1 {
      [TestMethod]
      public void Calculate_A_to_D_given_AB_BC_CD__should_be__ABCD() {
         var Results = Engine.CalculateShortestPathBetween(
             "A",
             "D",
             new Path<string>[] {
                new Path<string>() { Source = "A", Destination = "B", Cost = 3 },
                new Path<string>() { Source = "B", Destination = "C", Cost = 3 },
                new Path<string>() { Source = "C", Destination = "D", Cost = 3 }
             });

         Results.Sum(r => r.Cost).Should().Be(9);
         Results.Count.Should().Be(3);

         Results.First().Cost.Should().Be(3);
         Results.First().Source.Should().Be("A");
         Results.First().Destination.Should().Be("B");

         Results.Skip(1).First().Cost.Should().Be(3);
         Results.Skip(1).First().Source.Should().Be("B");
         Results.Skip(1).First().Destination.Should().Be("C");

         Results.Skip(2).First().Cost.Should().Be(3);
         Results.Skip(2).First().Source.Should().Be("C");
         Results.Skip(2).First().Destination.Should().Be("D");
      }

      [TestMethod]
      public void Calculate_A_to_D_given_AB_BC_CD_DE__should_be__ABCD() {
         var Results = Engine.CalculateShortestPathBetween(
             "A",
             "D",
             new Path<string>[] {
                new Path<string>() { Source = "A", Destination = "B", Cost = 3 },
                new Path<string>() { Source = "B", Destination = "C", Cost = 3 },
                new Path<string>() { Source = "C", Destination = "D", Cost = 3 },
                new Path<string>() { Source = "D", Destination = "E", Cost = 3 }
             });

         Results.Sum(r => r.Cost).Should().Be(9);
         Results.Count.Should().Be(3);

         Results.First().Cost.Should().Be(3);
         Results.First().Source.Should().Be("A");
         Results.First().Destination.Should().Be("B");

         Results.Skip(1).First().Cost.Should().Be(3);
         Results.Skip(1).First().Source.Should().Be("B");
         Results.Skip(1).First().Destination.Should().Be("C");

         Results.Skip(2).First().Cost.Should().Be(3);
         Results.Skip(2).First().Source.Should().Be("C");
         Results.Skip(2).First().Destination.Should().Be("D");
      }

      [TestMethod]
      public void Calculate_A_to_D_given_AB_AC_AD_AE_BC_CD_DE__should_be__ACD() {
         var Results = Engine.CalculateShortestPathBetween(
             "A",
             "D",
             new Path<string>[] {
                new Path<string>() { Source = "A", Destination = "B", Cost = 3 },
                new Path<string>() { Source = "A", Destination = "C", Cost = 3 },
                new Path<string>() { Source = "A", Destination = "D", Cost = 7 }, // set this just above ABC (3+3=6)
                new Path<string>() { Source = "A", Destination = "E", Cost = 3 },
                new Path<string>() { Source = "B", Destination = "C", Cost = 3 },
                new Path<string>() { Source = "C", Destination = "D", Cost = 3 },
                new Path<string>() { Source = "D", Destination = "E", Cost = 3 }
             });

         Results.Sum(r => r.Cost).Should().Be(6);
         Results.Count.Should().Be(2);

         Results.First().Cost.Should().Be(3);
         Results.First().Source.Should().Be("A");
         Results.First().Destination.Should().Be("C");

         Results.Skip(1).First().Cost.Should().Be(3);
         Results.Skip(1).First().Source.Should().Be("C");
         Results.Skip(1).First().Destination.Should().Be("D");
      }

      [TestMethod]
      public void Calculate_A_to_D_given_AB_AC_AD_AE_BC_CD_DE__should_be__AD() {
         var Results = Engine.CalculateShortestPathBetween(
             "A",
             "D",
             new Path<string>[] {
                new Path<string>() { Source = "A", Destination = "B", Cost = 3 },
                new Path<string>() { Source = "A", Destination = "C", Cost = 3 },
                new Path<string>() { Source = "A", Destination = "D", Cost = 5 }, // set this just below ABC (3+3=6)
                new Path<string>() { Source = "A", Destination = "E", Cost = 3 },
                new Path<string>() { Source = "B", Destination = "C", Cost = 3 },
                new Path<string>() { Source = "C", Destination = "D", Cost = 3 },
                new Path<string>() { Source = "D", Destination = "E", Cost = 3 }
             });

         Results.Sum(r => r.Cost).Should().Be(5);
         Results.Count.Should().Be(1);

         Results.Single().Cost.Should().Be(5);
         Results.Single().Source.Should().Be("A");
         Results.Single().Destination.Should().Be("D");
      }
   }
}

You can find the code on GitHub.

Happy coding!

PSC.Search: a new implementation of Levenshtein distance algorithm

This article describes a simple implementation of the string search. It can be used for approximate string matching (for more information, see http://en.wikipedia.org/wiki/Fuzzy_string_searching).

Other algorithms for approximate string searching exist (e.g., Soundex), but those aren't as easy to implement. The algorithm in this article is easy to implement, and can be used for tasks where approximate string searching is used in an easy way.

The algorithm used the Levenshtein-distance for determining how exact a string from a word list matches the word to be found. Information about the Levenshtein-distance can be found at http://en.wikipedia.org/wiki/Levenshtein_distance.

C# implementation

using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;

namespace PSC.Search.Demo
{
    class Program
    {
        static void Main(string[] args)
        {
            string word = "Pure Source Code";
            List<string> wordList = new List<string>
            {
                "Code Project",
                "Pure SourceCOde",
                "sourcecode",
                "puresourcecode",
                "Source Kode",
                "Kode Project",
                "Other Source"
            };

            List<wordrank> foundWords = 
                           FuzzySearch.Search(word, wordList, 0.30);

            foundWords.ForEach(i => Console.WriteLine(i.Word + 
                                          " (" + i.Rank.ToString() + 
                                          ")"));
            Console.ReadKey();
        }
    }
}

Output:

FuzzySearch

A basic approach is shown. Instead of the Levenshtein-distance, a more optimized algorithm could be used - but here, a quite simple implementation is given for clarity reasons.

public static int LevenshteinDistance(string src, string dest)
{
    int[,] d = new int[src.Length + 1, dest.Length + 1];
    int i, j, cost;
    char[] str1 = src.ToCharArray();
    char[] str2 = dest.ToCharArray();

    for (i = 0; i <= str1.Length; i++)
    {
        d[i, 0] = i;
    }

    for (j = 0; j <= str2.Length; j++)
    {
        d[0, j] = j;
    }

    for (i = 1; i <= str1.Length; i++)
    {
        for (j = 1; j <= str2.Length; j++)
        {
            if (str1[i - 1] == str2[j - 1])
                cost = 0;
            else
                cost = 1;

            d[i, j] =
                Math.Min(
                    d[i - 1, j] + 1,                    // Deletion
                    Math.Min(
                        d[i, j - 1] + 1,                // Insertion
                        d[i - 1, j - 1] + cost));       // Substitution

            if ((i > 1) && (j > 1) && 
                (str1[i - 1] == str2[j - 2]) && 
                (str1[i - 2] == str2[j - 1]))
            {
                d[i, j] = Math.Min(d[i, j], d[i - 2, j - 2] + cost);
            }
        }
    }

    return d[str1.Length, str2.Length];
}

In the search process, for each word in the wordlist, the Levenshtein-distance is computed, and with this distance, a score. This score represents how good the strings match. The input argument fuzzyness determines how much the strings can differ.

public static List<wordrank> Search(string word, List wordList, 
                                    double fuzzyness)
{
    List<wordrank> foundWords = new List<wordrank>();

    foreach (string s in wordList)
    {
        // Calculate the Levenshtein-distance:
        int levenshteinDistance = LevenshteinDistance(word, s);

        // Length of the longer string:
        int length = Math.Max(word.Length, s.Length);

        // Calculate the score:
        double score = 1.0 - (double)levenshteinDistance / length;

        // Match?
        if (score > fuzzyness)
            foundWords.Add(new WordRank() { Rank = score, Word = s });
    }

    return foundWords;
}

SQL implementation

Also I've implemented a version for SQL.

CREATE FUNCTION edit_distance(@s1 nvarchar(3999), @s2 nvarchar(3999))
RETURNS int
AS
BEGIN
 DECLARE @s1_len int, @s2_len int
 DECLARE @i int, @j int, @s1_char nchar, @c int, @c_temp int
 DECLARE @cv0 varbinary(8000), @cv1 varbinary(8000)

 SELECT
  @s1_len = LEN(@s1),
  @s2_len = LEN(@s2),
  @cv1 = 0x0000,
  @j = 1, @i = 1, @c = 0

 WHILE @j <= @s2_len
  SELECT @cv1 = @cv1 + CAST(@j AS binary(2)), @j = @j + 1

 WHILE @i <= @s1_len
 BEGIN
  SELECT
   @s1_char = SUBSTRING(@s1, @i, 1),
   @c = @i,
   @cv0 = CAST(@i AS binary(2)),
   @j = 1

  WHILE @j <= @s2_len
  BEGIN
   SET @c = @c + 1
   SET @c_temp = CAST(SUBSTRING(@cv1, @j+@j-1, 2) AS int) +
    CASE WHEN @s1_char = SUBSTRING(@s2, @j, 1) THEN 0 ELSE 1 END
   IF @c > @c_temp SET @c = @c_temp
   SET @c_temp = CAST(SUBSTRING(@cv1, @j+@j+1, 2) AS int)+1
   IF @c > @c_temp SET @c = @c_temp
   SELECT @cv0 = @cv0 + CAST(@c AS binary(2)), @j = @j + 1
 END

 SELECT @cv1 = @cv0, @i = @i + 1
 END

 RETURN @c
END

Usage:

select
 dbo.edit_distance('Fuzzy String Match','fuzzy string match'),
 dbo.edit_distance('fuzzy','fuzy'),
 dbo.edit_distance('Fuzzy String Match','fuzy string match'),
 dbo.edit_distance('levenshtein distance sql','levenshtein sql server'),
 dbo.edit_distance('distance','server')



Happy coding!

Advertsing

125X125_06

TagCloud

MonthList

CommentList