c# - GroupByUntilChanged : is there a LINQ GroupBy operator that groups by SUCCESSIVE equal keys?

标签 c# linq group-by ienumerable linq-to-objects

据我了解,LINQ(可枚举)运算符 GroupBy 将在返回第一个分组之前枚举整个源序列。 如果我知道键已排序或者如果我不关心具有不同的分组键,这对大数据不是很友好并且没有必要。

有没有我错过的运算符,它只对同一键的连续项目进行分组,并在新键出现时立即返回分组?

因为我没有找到如何使用现有功能实现此目的,所以我编写了自己的运算符并将其命名为 GroupByUntilChanged。仍然更愿意使用框架代码。

public static class MyEnumerable
{
    /// <summary>
    /// Groups SUCCESSIVE elements of a sequence having the same key [...].
    /// </summary>
    public static IEnumerable<IGrouping<TKey, TElement>> GroupByUntilChanged<TSource, TKey, TElement>(
        this IEnumerable<TSource> source,
        Func<TSource, TKey> keySelector,
        Func<TSource, TElement> elementSelector,
        IEqualityComparer<TKey> comparer)
    {
        if (source == null) throw new ArgumentNullException(nameof(source));
        if (keySelector == null) throw new ArgumentNullException(nameof(keySelector));
        if (elementSelector == null) throw new ArgumentNullException(nameof(elementSelector));

        return GroupByUntilChangedEnumerator(source, keySelector, elementSelector, comparer ?? EqualityComparer<TKey>.Default);
    }

    // omitted: 7 more overloads returning GroupByUntilChangedEnumerator

    private static IEnumerable<IGrouping<TKey, TElement>> GroupByUntilChangedEnumerator<TSource, TKey, TElement>(
        IEnumerable<TSource> source,
        Func<TSource, TKey> keySelector,
        Func<TSource, TElement> elementSelector,
        IEqualityComparer<TKey> comparer)
    {
        using (var e = source.GetEnumerator())
        {
            if (!e.MoveNext())
                yield break;
            var grouping = new Grouping<TKey, TElement>(keySelector(e.Current), elementSelector(e.Current));
            while (e.MoveNext())
            {
                var key = keySelector(e.Current);
                if (comparer.Equals(grouping.Key, key))
                    grouping.Add(elementSelector(e.Current));
                else
                {
                    yield return grouping;
                    grouping = new Grouping<TKey, TElement>(key, elementSelector(e.Current));
                }
            }
            yield return grouping;
        }
    }

    /// <summary>
    /// <see cref="IGrouping{TKey, TElement}"/> implementation.
    /// </summary>
    private sealed class Grouping<TKey, TElement> : IGrouping<TKey, TElement>
    {
        private readonly IList<TElement> _elements;

        public Grouping(TKey key, TElement first)
        {
            Key = key;
            _elements = new List<TElement> { first };
        }

        public TKey Key { get; }

        public void Add(TElement next)
        {
            _elements.Add(next);
        }

        public IEnumerator<TElement> GetEnumerator()
        {
            return _elements.GetEnumerator();
        }

        IEnumerator IEnumerable.GetEnumerator()
        {
            return _elements.GetEnumerator();
        }
    }
}

最佳答案

这不是标准的 System.Linq 扩展。但是,我确实在 MoreLinq.MoreEnumerable.GroupAdjacent 中找到了它。 .特别是,来源与您所拥有的非常接近(为后代复制和粘贴):

    private static IEnumerable<IGrouping<TKey, TElement>> GroupAdjacentImpl<TSource, TKey, TElement>(
        this IEnumerable<TSource> source,
        Func<TSource, TKey> keySelector,
        Func<TSource, TElement> elementSelector,
        IEqualityComparer<TKey> comparer)
    {
        Debug.Assert(source != null);
        Debug.Assert(keySelector != null);
        Debug.Assert(elementSelector != null);
        Debug.Assert(comparer != null);

        using (var iterator = source.Select(item => new KeyValuePair<TKey, TElement>(keySelector(item), elementSelector(item)))
                                    .GetEnumerator())
        {
            var group = default(TKey);
            var members = (List<TElement>) null;

            while (iterator.MoveNext())
            {
                var item = iterator.Current;
                if (members != null && comparer.Equals(group, item.Key))
                {
                    members.Add(item.Value);
                }
                else
                {
                    if (members != null)
                        yield return CreateGroupAdjacentGrouping(group, members);
                    group = item.Key;
                    members = new List<TElement> { item.Value };
                }
            }

            if (members != null)
                yield return CreateGroupAdjacentGrouping(group, members);
        }
    }

    private static Grouping<TKey, TElement> CreateGroupAdjacentGrouping<TKey, TElement>(TKey key, IList<TElement> members)
    {
        Debug.Assert(members != null);
        return Grouping.Create(key, members.IsReadOnly ? members : new ReadOnlyCollection<TElement>(members));
    }

    static class Grouping
    {
        public static Grouping<TKey, TElement> Create<TKey, TElement>(TKey key, IEnumerable<TElement> members)
        {
            return new Grouping<TKey, TElement>(key, members);
        }
    }

    #if !NO_SERIALIZATION_ATTRIBUTES
    [Serializable]
    #endif
    private sealed class Grouping<TKey, TElement> : IGrouping<TKey, TElement>
    {
        private readonly IEnumerable<TElement> _members;

        public Grouping(TKey key, IEnumerable<TElement> members)
        {
            Debug.Assert(members != null);
            Key = key;
            _members = members;
        }

        public TKey Key { get; private set; }

        public IEnumerator<TElement> GetEnumerator()
        {
            return _members.GetEnumerator();
        }

        IEnumerator IEnumerable.GetEnumerator()
        {
            return GetEnumerator();
        }
    }

虽然这不是框架代码,但它仍然是经过良好编写和测试的代码 ( tests ),来 self 至少见过我尊重的 C# 开发人员使用的库。

关于c# - GroupByUntilChanged : is there a LINQ GroupBy operator that groups by SUCCESSIVE equal keys?,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/34706379/

相关文章:

c# - 我们可以在代码隐藏中使用 WPF 绑定(bind)中的 Where 子句吗?

c# - T4 - Entity Framework 错误 : Method not found: 'System.Data.Entity.DbSet` 1

c# - 从 asp.net MVC 中的函数返回对象的问题

c# - ASP.NET CORE 2.2 使用 dotConnect for MySQL 作为提供程序和 Entity Framework 扩展 : The Provider could not be resolved

c# - 使用 Linq 选择具有特定属性的节点

sql - SELECT SQL语法,用于WHERE子句中的计数

vb.net - LINQ to Entities 无法识别方法 [Type] GetValue [Type]

c# - 以编程方式构建 LINQ 查询而不用局部变量欺骗我

mysql - SQL查询GROUP BY,返回符合LEAST、COALESCE条件的分组

MySQL - 动态数据透视表分组问题