The Algorithms logo
算法
关于我们捐赠

Soundex

using System.Collections.Generic;
using System.Linq;

namespace Algorithms.Encoders;

/// <summary>
///     Class for Soundex encoding strings.
/// </summary>
public class SoundexEncoder
{
    /// <summary>
    ///     Encodes a string using the Soundex Algorithm.
    /// </summary>
    /// <param name="text">The string to encode.</param>
    /// <returns>The Soundex encoded string (one uppercase character and three digits).</returns>
    public string Encode(string text)
    {
        text = text.ToLowerInvariant();
        var chars = OmitHAndW(text);
        IEnumerable<int> numbers = ProduceNumberCoding(chars);
        numbers = CollapseDoubles(numbers);
        numbers = OmitVowels(numbers);
        numbers = CollapseLeadingDigit(numbers, text[0]);
        numbers = numbers.Take(3);
        numbers = PadTo3Numbers(numbers);
        var final = numbers.ToArray();
        return $"{text.ToUpperInvariant()[0]}{final[0]}{final[1]}{final[2]}";
    }

    private IEnumerable<int> CollapseLeadingDigit(IEnumerable<int> numbers, char c)
    {
        using var enumerator = numbers.GetEnumerator();
        enumerator.MoveNext();
        if (enumerator.Current == MapToNumber(c))
        {
            enumerator.MoveNext();
        }

        do
        {
            yield return enumerator.Current;
        }
        while (enumerator.MoveNext());
    }

    private IEnumerable<int> PadTo3Numbers(IEnumerable<int> numbers)
    {
        using var enumerator = numbers.GetEnumerator();
        for (var i = 0; i < 3; i++)
        {
            yield return enumerator.MoveNext()
                ? enumerator.Current
                : 0;
        }
    }

    private IEnumerable<int> OmitVowels(IEnumerable<int> numbers) => numbers.Where(i => i != 0);

    private IEnumerable<char> OmitHAndW(string text) => text.Where(c => c != 'h' && c != 'w');

    private IEnumerable<int> CollapseDoubles(IEnumerable<int> numbers)
    {
        var previous = int.MinValue;
        foreach (var i in numbers)
        {
            if (previous != i)
            {
                yield return i;
                previous = i;
            }
        }
    }

    private IEnumerable<int> ProduceNumberCoding(IEnumerable<char> text) => text.Select(MapToNumber);

    private int MapToNumber(char ch)
    {
        var mapping = new Dictionary<char, int>
        {
            ['a'] = 0,
            ['e'] = 0,
            ['i'] = 0,
            ['o'] = 0,
            ['u'] = 0,
            ['y'] = 0,
            ['h'] = 8,
            ['w'] = 8,
            ['b'] = 1,
            ['f'] = 1,
            ['p'] = 1,
            ['v'] = 1,
            ['c'] = 2,
            ['g'] = 2,
            ['j'] = 2,
            ['k'] = 2,
            ['q'] = 2,
            ['s'] = 2,
            ['x'] = 2,
            ['z'] = 2,
            ['d'] = 3,
            ['t'] = 3,
            ['l'] = 4,
            ['m'] = 5,
            ['n'] = 5,
            ['r'] = 6,
        };

        return mapping[ch];
    }
}