c++ - 使用具有重复项的 int 二进制文件进行快速排序。递归有问题

标签 c++ recursion binaryfiles quicksort

首先,我想声明这是作业。

通常我们会有作业,我会在完成后发布问题,我想继续使用它来提高我的能力,但在这种情况下我只是被难住了!

我们的任务是使用二进制文件实现快速排序,仅使用 file.seek 、 file.read 和 file.write 命令。这是整数的二进制文件。

我遇到的问题是递归部分,在枢轴的“子树”上调用函数。我一直在关注这个网站上提出的算法:http://xoax.net/comp_sci/crs/algorithms/lessons/Lesson4/并一直使用代码示例作为我的二进制文件实现的伪代码。

这是我的代码:

//Algorithm and code example used:
void manage( fstream & file , int lindex , int fSize ){


    //Chunks of 1 or 0 do not need sorting
    if( fSize <= 1 )
        return;

    //Choose point in file as "pivot." Pivot is a value.
    //pp is the index of "pivot"
    int pp = (rand() % fSize) + lindex;
    file.seekp( pp * sizeof( int ) , file.beg );
    int pivot;
    file.read( (char*)&pivot , sizeof( int ) );

    //Choose "indices" to be swapped. These will be used in seekp
    int leftIndex = lindex;
    int rightIndex = fSize - 1;

    //Swap val , swap val, temp storage, swap index 1 , swap index 2
    int leftSwap , rightSwap , temp , tempI1 , tempI2 = 0;

    //Dummy indecies to help with tracking partitions.
    int dumL = leftIndex;
    int dumR = rightIndex;

    while( dumL < dumR ){

        //Move to left index from the file beginning.
        file.seekp( dumL * sizeof( int ) , file.beg );

        do{

            tempI1 = file.tellp() / sizeof( int );
            dumL = tempI1;
            file.read( (char*)&temp , sizeof( int ) );
        }

        while( temp < pivot );

        leftSwap = temp;

        //Move to right index.
        file.seekp( dumR * sizeof( int ) , file.beg );
        int n = 1;

        do{

            tempI2 = file.tellp() / sizeof( int );
            dumR = tempI2;
            file.read( (char*)&temp , sizeof( int ) );
            file.seekp( ( rightIndex - n ) * sizeof( int ) , file.beg );
            n++;
        }           

        while( temp > pivot );

        rightSwap = temp;

        //Swap values
        int hold = 0;

        if( leftSwap == rightSwap && rightSwap == pivot )
            break;

        file.seekp( tempI1 * sizeof( int ) , file.beg );
        file.read( (char*)&hold , sizeof( int ) );

        file.seekp( tempI1 * sizeof( int ) , file.beg );
        file.write( (char*)&rightSwap , sizeof( int ) );

        file.seekp( tempI2 * sizeof( int ) , file.beg );
        file.write( (char*)&leftSwap , sizeof( int ) );
    }

    cout << "pp: " << pp << endl;
    cout << "dumL: " << dumL << endl;
    cout << "dumR: " << dumR << endl << endl;

    //cout << "Lmanage: \n\t Lindex: 0\n\tSize: " << dumL << endl;
    manage( file , 0 , dumL );
    //cout << "Rmanage: \n\t Lindex: " << dumL + 1 << "\n\tSize: " << fSize - dumL - 1 << endl;
    manage( file , dumL + 1 , fSize - (dumL - leftIndex) - 1 );
}


void quicksort( fstream & file , int iSize ){

    srand( ( unsigned int ) time( 0 ) );
    manage( file , 0 , iSize );
}


int main( int argc, char* argv[] ){

    if( argc != 2 ){

        cout << "Invalid number of arguments.\n";
        return -1;
    }

    fstream file;
    int x = 0;

    file.open( argv[1] , ios::binary | ios::out | ios::in );

    //Calculate number of ints in file.
    file.seekg( 0 , file.end );
    int size = file.tellg() / sizeof( int );

    cout << "size: " << size << endl;

    quicksort( file , size );

    return 0;
}

“arg”是一个包含 100 个整数的二进制文件,可能有重复项。问题是它似乎对前 1/4 进行了排序,但索引混淆了并且“大小”参数变为负数。


编辑:添加了我的主要内容并更新了评论中的更改。

最佳答案

我意识到就地 qsort 算法可能不是一个流行的选择,但它使这个特定实例更容易理解。请参阅以下内容:

#include <iostream>
#include <fstream>
#include <vector>
#include <cstdlib>
#include <random>
using namespace std;

static void quicksort_stream(std::iostream& st,
                             std::ios::off_type left,  // inclusive
                             std::ios::off_type right, // exclusive
                             unsigned int indent=0)
{
    std::ios::off_type len = (right - left);
    if (len <= 1)
        return;

    // an interesting way of looking at the sort.
    std::string sIndent(indent,' ');
    cerr << sIndent << left << ',' << right << endl;

    // choose a random pivot index form our range:
    std::ios::off_type pivot_idx = left + std::rand() % (len-1);

    // get the pivot value, then swap the *last* value in our
    //  range into the pivot slot. it will be putting the pivot
    //  value in when were done.
    int pivot = 0, val = 0;
    st.seekg(pivot_idx * sizeof(int), ios::beg);
    st.read((char*)&pivot, sizeof(int));
    st.seekg((right-1) * sizeof(int), ios::beg);
    st.read((char*)&val, sizeof(int));
    st.seekg(pivot_idx * sizeof(int), ios::beg);
    st.write((char*)&val, sizeof(int));

    // now start the partition scan.
    pivot_idx = left;
    for (std::ios::off_type i=left; i<(right-1);++i)
    {
        st.seekg(i * sizeof(int), ios::beg);
        st.read((char*)&val, sizeof(int));
        if (val < pivot)
        {
            // swap the current selection with whatever is at
            //  the curent pivot index.
            int val2 = 0;
            st.seekg(pivot_idx * sizeof(int), ios::beg);
            st.read((char*)&val2, sizeof(int));
            st.seekg(i * sizeof(int), ios::beg);
            st.write((char*)&val2, sizeof(int));
            st.seekg(pivot_idx * sizeof(int), ios::beg);
            st.write((char*)&val, sizeof(int));
            ++pivot_idx;
        }
    }

    // store the pivot value back at the pivot index,
    //  placing that value in the last slot of the partition.
    st.seekg(pivot_idx * sizeof(int), ios::beg);
    st.read((char*)&val, sizeof(int));
    st.seekg((right-1) * sizeof(int), ios::beg);
    st.write((char*)&val, sizeof(int));
    st.seekg(pivot_idx * sizeof(int), ios::beg);
    st.write((char*)&pivot, sizeof(int));

    // and finally,invoke on subsequences. skipping the pivot index
    quicksort_stream(st, left, pivot_idx, indent+1);
    quicksort_stream(st, pivot_idx+1, right, indent+1);
}

int main(int argc, char *argv[])
{
    if (argc < 2)
        return EXIT_FAILURE;

    // create a vector of 100 random int values in [1,1000]
    std::vector<int> vals;

    // build generator
    std::random_device rd;
    std::mt19937 rgen(rd());
    std::uniform_int_distribution<> dist(0, 999);
    std::generate_n(std::back_inserter(vals), 100, [&dist,&rgen](){ return dist(rgen); });

    // open the output file and dump this to that location.
    std::fstream fs(argv[1], ios::out|ios::binary);
    fs.write((const char*)vals.data(), vals.size() * sizeof(vals[0]));
    fs.flush();
    fs.close();

    // now open the stream and sort it
    fs.open(argv[1], ios::in|ios::out|ios::binary);
    quicksort_stream(fs, 0, 100);
    fs.flush();
    fs.close();

    // clear the content of the exiting vector.
    std::fill(vals.begin(), vals.end(), 0);
    fs.open(argv[1], ios::in|ios::binary);
    fs.read((char *)vals.data(), vals.size() * sizeof(vals[0]));
    cout << endl << "Sorted" << endl;
    std::copy(vals.begin(), vals.end(), std::ostream_iterator<int>(cout, "\n"));

    return 0;
}

我在其中包含了递归缩进“打印”机制,以演示快速排序的本质及其工作原理。示例运行的输出如下所示。希望对您有所帮助。

示例输出

0,100
 0,66
  2,66
   2,23
    2,20
     2,5
     6,20
      6,15
       6,11
        7,11
         9,11
       12,15
        12,14
      16,20
       17,20
        17,19
    21,23
   24,66
    24,62
     24,44
      24,29
       24,26
       27,29
      30,44
       30,42
        30,32
        33,42
         33,39
          33,37
           33,35
         40,42
     45,62
      45,60
       46,60
        46,52
         46,48
         49,52
          50,52
        53,60
         53,59
          53,57
           55,57
    63,66
 67,100
  67,72
   67,69
   70,72
  73,100
   73,94
    73,85
     73,80
      73,75
      76,80
       76,78
     81,85
      81,84
       82,84
    86,94
     86,89
     90,94
      90,92
   95,100
    95,99
     97,99

Sorted
3
8
23
27
40
54
68
78
90
97
118
120
127
130
139
149
153
155
158
168
201
210
221
235
240
241
247
260
271
274
285
292
311
317
325
327
330
332
334
362
371
410
415
427
444
478
481
487
492
499
499
513
540
542
543
543
556
567
575
578
621
624
634
634
635
661
676
676
690
693
694
706
739
777
780
793
793
798
822
834
835
836
836
850
865
871
883
884
900
903
907
917
924
924
935
943
945
946
983
996

关于c++ - 使用具有重复项的 int 二进制文件进行快速排序。递归有问题,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/16137850/

相关文章:

c++ - 二进制模式+格式化文本操作还是文本模式+二进制数据操作——有意义吗?

c# - 使用 BinaryFormatter 附加到文件

c++ - 使用二进制模式将数据结构写入文件

c++ - QT根据串行接口(interface)上​​的输入显示不同的图像

c++ - 算法在计算时很快 - 将计算数据写入输出缓冲区时速度很慢

c++ - 如何通过 C++ 程序运行 Windows 命令?

c++ - 如何以允许传递临时对象的方式将 std::istream 传递给函数?

exception - 不知道如何从 : clojure. lang.PersistentList$1 创建 ISeq

对C中的递归感到困惑

javascript - 高效地走上 DOM 树,检查 nodeType 和属性?