c# - 使用另一个字节列表/数组计算字节列表/数组中的出现次数

标签 c# .net vb.net compression

我试图计算一个字节序列在另一个字节序列中出现的所有次数。但是,如果它已经对字节进行了计数,则它不能重新使用字节。例如给定字符串k.k.k.k.k.k.让我们假设字节序列是 k.k然后它只会找到 3 次而不是 5 次,因为它们会被分解为:[k.k].[k.k].[k.k].并且不喜欢 [k.[k].[k].[k].[k].k]他们重叠的地方基本上只是向右移动2。

理想情况下,我们的想法是了解压缩字典或运行时编码的外观。所以目标是获得
k.k.k.k.k.k.减少到只有 2 个部分,因为 (k.k.k.) 是您可以拥有的最大和最好的符号。

这是迄今为止的来源:

using System;
using System.Collections.Generic;
using System.Collections;
using System.Linq;
using System.Text;
using System.IO;


    static class Compression 
    {
        static int Main(string[] args)
        {

            List<byte> bytes = File.ReadAllBytes("ok.txt").ToList();
            List<List<int>> list = new List<List<int>>();

            // Starting Numbers of bytes - This can be changed manually.
            int StartingNumBytes = bytes.Count;
            for (int i = StartingNumBytes; i > 0; i--)
            {
                Console.WriteLine("i: " + i);

                for (int ii = 0; ii < bytes.Count - i; ii++)
                {
                    Console.WriteLine("ii: " + i);
                    // New pattern comes with refresh data.
                    List<byte> pattern = new List<byte>();

                    for (int iii = 0; iii < i; iii++)
                    {
                        pattern.Add(bytes[ii + iii]);
                    }



                    DisplayBinary(bytes, "red");
                    DisplayBinary(pattern, "green");

                    int matches = 0;
                   // foreach (var position in bytes.ToArray().Locate(pattern.ToArray()))
                    for (int position = 0; position < bytes.Count; position++) {
                        if (pattern.Count > (bytes.Count - position))
                        {
                            continue;
                        }


                        for (int iiii = 0; iiii < pattern.Count; iiii++)
                        {
                            if (bytes[position + iiii] != pattern[iiii])
                            {
                                //Have to use goto because C# doesn't support continue <level>
                                goto outer;
                            }

                        }

                        // If it made it this far, it has found a match.
                        matches++;
                        Console.WriteLine("Matches: " + matches + " Orig Count: " + bytes.Count + " POS: " + position);
                        if (matches > 1)
                        {
                            int numBytesToRemove = pattern.Count;
                            for (int ra = 0; ra < numBytesToRemove; ra++)
                            {
                                // Remove it at the position it was found at, once it
                                // deletes the first one, the list will shift left and you'll need to be here again.
                                bytes.RemoveAt(position);
                            }
                            DisplayBinary(bytes, "red");
                            Console.WriteLine(pattern.Count + " Bytes removed.");

                            // Since you deleted some bytes, set the position less because you will need to redo the pos.
                            position = position - 1;
                        }


                        outer:
                            continue;
                    }

                    List<int> sublist = new List<int>();
                    sublist.Add(matches);
                    sublist.Add(pattern.Count);
                    // Some sort of calculation to determine how good the symbol was
                    sublist.Add(bytes.Count-((matches * pattern.Count)-matches));
                    list.Add(sublist);

                }

            }



            Display(list);
            Console.Read();
            return 0;
        }


        static void DisplayBinary(List<byte> bytes, string color="white")
        {
            switch(color){
                case "green":
                    Console.ForegroundColor = ConsoleColor.Green;
                    break;
                case "red":
                    Console.ForegroundColor = ConsoleColor.Red;
                    break;
                default:
                    break;
            }


            for (int i=0; i<bytes.Count; i++)
            {
                if (i % 8 ==0)
                    Console.WriteLine();
                Console.Write(GetIntBinaryString(bytes[i]) + " ");
            }
            Console.WriteLine();
            Console.ResetColor();
        }
        static string GetIntBinaryString(int n)
        {
            char[] b = new char[8];
            int pos = 7;
            int i = 0;

            while (i < 8)
            {
                if ((n & (1 << i)) != 0)
                {
                    b[pos] = '1';
                }
                else
                {
                    b[pos] = '0';
                }
                pos--;
                i++;
            }
            //return new string(b).TrimStart('0');
            return new string(b);
        }

        static void Display(List<List<int>> list)
        {
            //
            // Display everything in the List.
            //
            Console.WriteLine("Elements:");
            foreach (var sublist in list)
            {
                foreach (var value in sublist)
                {
                    Console.Write("{0,4}", value);

                }
                Console.WriteLine();
            }

            //
            // Display total count.
            //
            int count = 0;
            foreach (var sublist in list)
            {
                count += sublist.Count;
            }
            Console.WriteLine("Count:");
            Console.WriteLine(count);
        }

        static public int SearchBytePattern(byte[] pattern, byte[] bytes)
        {
            int matches = 0;
            // precomputing this shaves some seconds from the loop execution
            int maxloop = bytes.Length - pattern.Length;
            for (int i = 0; i < maxloop; i++)
            {
                if (pattern[0] == bytes[i])
                {
                    bool ismatch = true;
                    for (int j = 1; j < pattern.Length; j++)
                    {
                        if (bytes[i + j] != pattern[j])
                        {
                            ismatch = false;
                            break;
                        }
                    }
                    if (ismatch)
                    {
                        matches++;
                        i += pattern.Length - 1;
                    }
                }
            }
            return matches;
        }
    }

引用帖子获取文件的非二进制应该是,这里是二进制数据:011010110010111001101011001011100110101100101110011010110010111001101011001011100110101100101110我希望它比开始时更小。

最佳答案

private static int CountOccurences(byte[] target, byte[] pattern)
{
    var targetString = BitConverter.ToString(target);
    var patternString = BitConverter.ToString(pattern);
    return new Regex(patternString).Matches(targetString).Count;
}

关于c# - 使用另一个字节列表/数组计算字节列表/数组中的出现次数,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/6406022/

相关文章:

c# - 按名称访问 excel 单元格

c# - 按位归零在结构构造中意味着什么?

c# - 是什么导致 PIA API 在 Release 和 Debug 版本之间有所不同?

c# - 当 DataGridView 为空时显示文本

c# - 带有 nunit 的 Microsoft Fakes 和 Shims

c# - 当程序崩溃而没有异常时如何调试程序?

c# - 计算日期时间列表中的总分钟数

.net - 有人可以澄清我对模拟的验证概念的理解吗?

C# 可移植 IDE 和编译器 - 它需要的一切都需要在 USB 闪存驱动器上

c# - 使用 LINQ 对 DataTable 进行 LIKE 查询