c - 使用二分搜索比较两个文件的一些快速帮助

标签 c string binary-search

所以我为一个项目编写了这段代码,我认为它可以工作,但它只是对我的一个文件(IkeaWords.txt 文件)中的第一个术语进行比较。 我哪里做错了? 所以这就是我写的,希望这已经足够了。

  /*Display each IKEA product name that can be found in the English     dictionary.
The full list of the 1764 unique IKEA product words is in IKEAwords.txt
To see if words exist in English, use the 40,444 word English dictionary dictionary.txt, 
where the longest word has 21 characters.  To lookup a word in the dictionary consider 
using binary search, 
Print out each word that is found.
*/

#define _CRT_SECURE_NO_WARNINGS
#define NumberOfWordsInDictionary 40437
#define MaxWordSize 21+1
#define NumberOfWordsInIkea 1764
#include <stdio.h>
#include <string.h>   // for string length
#include <stdlib.h>   // for exit()

                 // Maximum size of any word in the dictionary, + 1 for null
const char DictionaryFileName[] = "dictionary.txt";  // File name for where dictionary words are found
const char IkeaFileName[] = "IKEAwords.txt";

                                        //--------------------------------------------------------------------------------------
                                        // Use binary search to look up the word from the .txt file in the dictionary array, 
                                            //returning index if found, -1 otherwise
int binarySearch(const char ikeaWord[][MaxWordSize],    // word to be looked up
    const char dictionary[][MaxWordSize],     // the dictionary of words
    int numberOfDictionaryWords         //number of words in the dictionary
    )             
{
    int low, mid, high;     // array indices for binary search
    int searchResult = -1;  // Stores index of word if search succeeded, else -1

                            // Binary search for word
    low = 0;
    high = numberOfDictionaryWords - 1;
    int i = 0;
    while (i < MaxWordSize)
    {
        while (low <= high)
        {
            mid = (low + high) / 2;
            // searchResult negative value means word is to the left, positive value means
            // word is to the right, value of 0 means word was found
            searchResult = strcmp(ikeaWord[i], dictionary[mid]);
            if (searchResult == 0) {
                // Word IS in dictionary, so return the index where the word was found
                return mid;
            }
            else if (searchResult < 0)
            {
                high = mid - 1; // word should be located prior to mid location
            }
            else
            {
                low = mid + 1; // word should be located after mid location
            }
        }
        i++;
    }


    // Word was not found
    return -1;
}//end binarySearch()


 //--------------------------------------------------------------------------------------
 // Read in the words from the dictionary file
void readWordsInFromDictionaryFile(FILE *pInputFile, char dictionary[][MaxWordSize])
{
    int index = 0;      // index of dictionary word being read
    int maxWordLength = 0;

    // Associate the actual file name with file pointer and try to open it
    pInputFile = fopen(DictionaryFileName, "r");
    // verify that file open worked
    if (pInputFile == NULL) {
        printf("Can't open %s. Verify it is in correct location\n", DictionaryFileName);
        exit(-1);
    }

    // Keep reading words while there are any
    while (fscanf(pInputFile, "%s", dictionary[index]) != EOF) {
        int tempLength = (int)strlen(dictionary[index]);
        if (tempLength > maxWordLength) {
            maxWordLength = tempLength;
        }
        index++;
    }
    // uncomment out code test array dictionary[][]
    //printf("There were %d words in the dictionary, with max length %d. \n", index, maxWordLength);
    fclose(pInputFile);   // close the dictionary file
    printf("There were %d words read from the dictionary with max length %d.\n", index, maxWordLength);
}//end readInputFile()

void readWordsInFromIkeaFile(FILE *pInputFile2, char ikeaWord[][MaxWordSize])
{
    int index2 = 0;      // index of dictionary word being read
    int maxIkeaWordLength = 0;

    // Associate the actual file name with file pointer and try to open it
    pInputFile2 = fopen(IkeaFileName, "r");
    // verify that file open worked
    if (pInputFile2 == NULL)
    {
        printf("Can't open %s. Verify it is in correct location\n", IkeaFileName);
        exit(-1);
    }

    // Keep reading words while there are any
    while (fscanf(pInputFile2, "%s", ikeaWord[index2]) != EOF)
    {
        int tempLength2 = (int)strlen(ikeaWord[index2]);
        if (tempLength2 > maxIkeaWordLength)
        {
            maxIkeaWordLength = tempLength2;
        }
        index2++;
    }
    printf("There were %d words read from the Ikea file with max length %d.\n", index2,maxIkeaWordLength);
}
 //--------------------------------------------------------------------------------------
int main()
{


    char dictionary[NumberOfWordsInDictionary][MaxWordSize];
    char ikeaWord[NumberOfWordsInIkea][MaxWordSize];


    FILE *pInputFile = fopen(DictionaryFileName, "r");     // file pointer
    FILE *pInputFile2 = fopen(IkeaFileName, "r");
    readWordsInFromDictionaryFile(pInputFile, dictionary);
    readWordsInFromIkeaFile(pInputFile2, ikeaWord); // used as input




                               // Find index of word in dictionary
    int index = -1;
    int j = 0; // counter
    while(j<NumberOfWordsInIkea)
    {
    index = binarySearch(ikeaWord[j], dictionary, NumberOfWordsInDictionary);

    // Display results
        if (index != -1)
        {
                // word was found, so display it
            printf("The word \"%s\" was found.\n", dictionary[index]);
        }
    j++;
    }


    system("pause");
    return 0;
}

如果您也需要了解的话,我是在 Visual Studio 2015 中编写的。

感谢您的帮助!

最佳答案

您的代码中有几个错误和不必要的东西。我冒昧地更改了一些内容以使其正常工作(如果您按照注释中的提示进行操作,您可能已经找到它们),并更改了一些内容以使其更清晰(来自 GCC 的非编译器警告)。由于缺少 MSVS,因此未与 MSVS 进行检查。

#define _CRT_SECURE_NO_WARNINGS
// changed values to accomodate different data-files sizes
#define NumberOfWordsInDictionary 99172
#define MaxWordSize 64
#define NumberOfWordsInIkea 1393
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
// from /usr/dict/words (put to lower case)
const char DictionaryFileName[] = "words.txt";
// scraped from http://lar5.com/ikea/ (put to lower case)
const char IkeaFileName[] = "ikea_names.txt";

// ripped 'const' and changed ikeaWord[][] to take a the single entry
int binarySearch(char *ikeaWord, char dictionary[][MaxWordSize],
                 int numberOfDictionaryWords)
{
  int low, mid, high;
  int searchResult = -1;
  low = 0;
  high = numberOfDictionaryWords - 1;

  // ripped outer loop because we search for Ikea names one by one
  while (low <= high) {
    mid = (low + high) / 2;
    searchResult = strcmp(ikeaWord, dictionary[mid]);
    if (searchResult == 0) {
      return mid;
    } else if (searchResult < 0) {
      high = mid - 1;
    } else {
      low = mid + 1;
    }
  }
  return -1;
}

int readWordsInFromDictionaryFile(FILE * pInputFile,
                                  char dictionary[][MaxWordSize])
{
  int index = 0;
  int maxWordLength = 0;
  // ripped fopen() because that happened already in main()

  // Changed from fscanf to fgets because the *scanf() family is a 
  // never ending source of problems, see stackoverflow et al. for endless examples
  while (fgets(dictionary[index], MaxWordSize - 1, pInputFile)) {
    int tempLength = (int) strlen(dictionary[index]);
    // Because of the change from fscanf to fgets we need to snip the newline off
    // (for "\r\n" endings snipp two)
    dictionary[index][tempLength - 1] = '\0';
    if (tempLength > maxWordLength) {
      maxWordLength = tempLength;
    }
    index++;
  }
  // If fgets returns NULL it is either EOF or an error
  if (ferror(pInputFile)) {
    fprintf(stderr, "something bad happend while reading dictionary\n");
    return 0;
  }
  fclose(pInputFile);
  printf("There were %d words read from the dictionary with max length %d.\n",
         index, maxWordLength);
  return 1;
}

// snipped off the addition of "2" to the variable names, no need for that
int readWordsInFromIkeaFile(FILE * pInputFile, char ikeaWord[][MaxWordSize])
{
  int index = 0;
  int maxIkeaWordLength = 0;

  while (fgets(ikeaWord[index], MaxWordSize - 1, pInputFile)) {

    int tempLength = (int) strlen(ikeaWord[index]);
    ikeaWord[index][tempLength - 1] = '\0';
    if (tempLength > maxIkeaWordLength) {
      maxIkeaWordLength = tempLength;
    }
    index++;
  }
  if (ferror(pInputFile)) {
    fprintf(stderr, "something bad happend while reading ikeawords\n");
    return 0;
  }
  printf("There were %d words read from the Ikea file with max length %d.\n",
         index, maxIkeaWordLength);
  return 1;
}

 //--------------------------------------------------------------------------------------
int main()
{
  char dictionary[NumberOfWordsInDictionary][MaxWordSize];
  char ikeaWord[NumberOfWordsInIkea][MaxWordSize];

  int res;
  // added error-checks
  FILE *pInputFile = fopen(DictionaryFileName, "r");
  if (pInputFile == NULL) {
    fprintf(stderr, "Can't open %s. Verify it is in correct location\n",
            DictionaryFileName);
    exit(EXIT_FAILURE);
  }
  FILE *pInputFile2 = fopen(IkeaFileName, "r");
  if (pInputFile2 == NULL) {
    fprintf(stderr, "Can't open %s. Verify it is in correct location\n",
            IkeaFileName);
    exit(EXIT_FAILURE);
  }
  if ((res = readWordsInFromDictionaryFile(pInputFile, dictionary)) == 0) {
    fprintf(stderr, "Error in reading dictionary\n");
    exit(EXIT_FAILURE);
  }
  if ((res = readWordsInFromIkeaFile(pInputFile2, ikeaWord)) == 0) {
    fprintf(stderr, "Error in reading ikea-file\n");
    exit(EXIT_FAILURE);
  }

  int index = -1;
  int j = 0;
  while (j < NumberOfWordsInIkea) {
    index = binarySearch(ikeaWord[j], dictionary, NumberOfWordsInDictionary);

    if (index != -1) {
      printf("The word \"%s\" was found.\n", dictionary[index]);
    }
    j++;
  }
// Seems to be useful when run in MS-Windows
#if defined _WIN32 ||  defined WIN32 || defined WIN64 || defined _WIN64
   sytem("pause");
#endif
  exit(EXIT_SUCCESS);
}

我没有打磨每个角落,还需要一些工作。例如:读取两个文件的两个函数实际上在做相同的事情,只是针对不同的文件和不同的字典。这可以通过单个函数来完成。文件的名称、文件的长度以及这些文件的条目的长度是固定的,它们可以是动态的,以便能够使用不同的输入而无需重新编译。

但一切都结束了:开始还不错!

关于c - 使用二分搜索比较两个文件的一些快速帮助,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/39780898/

相关文章:

c - EXC_BAD_ACCESS错误C初学者程序

当我插入变量时 C 程序崩溃

c - 你如何解读这个集会?

javascript - 如何用 RegEx 找到的变量名替换 RegEx?

c++ - 如何用空格替换 std::string 中的所有非字母字符(数字和特殊字符)

c++ - std::binary_search 的自定义比较函数

char * buf = malloc(sizeof (char *) * 16) vs char buf[ sizeof (char *) * 16]

java - 用于从文件中读取矩阵的单行 Java 扫描器

java - 二进制搜索算法的问题

list - 类型错误:列表索引必须是整数,而不是 float