c++ - 字数统计程序 C++

标签 c++ vector

我目前正在尝试用 C++ 编写一个字数统计程序,但在解析字符串并将字词彼此分开时遇到了困难。除此之外,我很难让唯一单词的字数在每次重复单词时增加。据我所知,我的 findWord() 和 DistinctWords() 函数很可能是问题所在。也许您会看到我在其他功能中没有看到的东西,至于上述功能,我不知道它们中有什么问题。这些是我的导师提供的指示:


创建一个程序,计算并报告文本文件中不同的、不区分大小写的单词的出现次数。

程序应该有一个循环:

1.提示用户输入文件名。如果用户仅按下 Enter 键,则终止循环和程序。

2.验证输入名称的文件是否存在。如果文件不存在,则显示相应的消息并返回到步骤 1。

3.读取并显示文件内容。

4.显示文件中不同单词的计数。

5.显示文件中每个不同单词的排序列表以及每个单词的出现次数。按字数降序排列列表,按字数升序排列。


我现在很困,我的作业在午夜到期。帮助当然会非常感激。感谢您的时间。这是我的代码,我还将在其后复制粘贴示例测试文本文件:


#include <iostream>
#include <iomanip>
#include <string>
#include <fstream>      // Needed to use files
#include <vector>
#include <algorithm>    // Needed for sort from standard libraries
using namespace std;

struct WordCount{
    string word;    // Word
    int count;      // Occurence #

    void iCount(){ count++; }

    WordCount(string s){ word = s; count = 1;}
};

// Function prototypes
string InputText();         // Get user file name and get text from said file
string Normalize(string);   // Convert string to lowercase and remove punctuation
vector<WordCount> DistinctWords(string);    // Sorted vector of word count structures
bool findWord(string, vector<WordCount>);   // Linear search for word in vector of structures
void DisplayResults(vector<WordCount>);     // Display results

// Main
int main(int argc, char** argv) {
    // Program Title
    cout << "Lab 9 - Text File Word Counter\n";
    cout << "-------------------------------\n\n";

    // Input text from file
    string buffer = InputText();

    while (buffer != ""){
        // Title for text file reading
        cout << "\nThis is the text string read from the file\n";
        cout << "-------------------------------------------\n";
        cout << buffer << endl << endl;

        // Build vector of words and counts
        vector<WordCount> words = DistinctWords(buffer);

        // Display results
        cout << "There are " << words.size() << " unique words in the above text." << endl;
        cout << "--------------------------------------------" << endl << endl;
        DisplayResults(words);
        buffer = InputText();
    }
    return 0;
}

/***********************************************
InputText() - 
Gets user file name and gets text from the file.
************************************************/
string InputText(){
    string fileName;
    ifstream inputFile;     // Input file stream object
    string str;     // Temporary string
    string text;    // Text file string

    cout << "File name? "; 
    getline(cin, fileName);

    // Case to terminate the program for enter key
    if (fileName.empty()){ exit(0);}

    // Open file
    inputFile.open(fileName);

    if (!inputFile){
        cout << "Error opening data file\n";
        cout << "File name? "; cin >> fileName;
    }
    else{
        while (!inputFile.eof()){
            getline(inputFile, str);
            text += str;
        }
    }
    inputFile.close(); return text;
}

/****************************************************
Normalize(string) - 
Converts string to lowercase and removes punctuation.
*****************************************************/
string Normalize(string s){
    // Initialize variables
    string nString;
    char c;

    // Make all text lowercase
    for (int i = 0; i < s.length(); i++){
        c = s[i];
        c = tolower(c);
        nString += c;
    }

    // Remove punctuation
    for (int i = 0; i < nString.length(); i++){
        if (ispunct(nString[i]))
            nString.erase(i, 1);
    }
    // Return converted string
    return nString;
}

/******************************************
vector<WordCount> DistinctWords(string) - 
Sorts vector of word count structures.
*******************************************/
vector<WordCount> DistinctWords(string s){
    vector<WordCount> words; // Initialize vector for words
    string nString = Normalize(s); // Convert passed string to lowercase and remove punctuation

    // Parse string
    istringstream iss(nString);

    while(iss >> nString){
        string n;   // Intialize temporary string

        iss >> n;   // Put word in n
        if (findWord(n, words) == true){ continue; }        // Check to verify that there is no preexisting occurence of the word passed
        else{
            WordCount tempO(n);     // Make structure object with n
            words.push_back(tempO);     // Push structure object into words vector
        }
    }
    return words;
}

/*********************************************
bool findWord(string, vector<WordCount>) -
Linear search for word in vector of structures
**********************************************/
bool findWord(string s, vector<WordCount> words){
    // Search through vector 
    for (auto r : words){
        if (r.word == s){   // Increment count of object if found again
            r.iCount(); return true;
        }
        else        // Go back to main function if not found
            return false;       
    }
}

/***********************************************
void DisplayResults(vector<WordCount>) -
Displays results.
************************************************/
void DisplayResults(vector<WordCount> words){
    // TROUBLESHOOT FIRST ERASE THIS AFTER!!!!!
    cout << "Word"  << setw(20) << "Count\n";
    cout << "-----------------------\n";
    for (auto &r : words){
        cout << setw(6) << left << r.word;
        cout << setw(15) << right << r.count << endl;
    }
}

这是最好的时代,这是最坏的时代,这是智慧的时代,这是愚蠢的时代,这是信仰的时代,这是怀疑的时代,这是不信任的时代光明,那是黑暗的季节,那是希望的 Spring ,那是绝望的冬天,我们拥有一切,我们一无所有,我们都直奔天堂,我们都直奔另一个方式 - 简而言之,那个时期与现在的时期非常相似,一些最喧闹的权威坚持认为它只是在最高级的比较中被接受,无论是好是坏。

This is the example display he provided for this particular test file

最佳答案

你几乎成功了! 您只是忘记通过引用而不是通过复制来传递“单词” vector 。 我还在末尾包含了一个用于排序的自定义比较器。

#include <iostream>
#include <sstream>
#include <iomanip>
#include <string>
#include <fstream>      // Needed to use files
#include <vector>
#include <algorithm>    // Needed for sort from standard libraries
using namespace std;

struct WordCount{
    string word;    // Word
    int count;      // Occurence #

    void iCount(){ count++; }

    WordCount(string s){ word = s; count = 1;}
};

struct {
    bool operator()(const WordCount& a, const WordCount& b)
    {   
        if (a.count < b.count)
            return false;
        else if (a.count > b.count)
            return true;
        else{
            if (a.word < b.word)
                return true;
            else
                return false;
        }
    }   
} CompareWordCount;

// Function prototypes
string InputText();         // Get user file name and get text from said file
string Normalize(string);   // Convert string to lowercase and remove punctuation
vector<WordCount> DistinctWords(string);    // Sorted vector of word count structures
bool findWord(string, vector<WordCount>&);   // Linear search for word in vector of structures
void DisplayResults(vector<WordCount>);     // Display results

// Main
int main(int argc, char** argv) {
    // Program Title
    cout << "Lab 9 - Text File Word Counter\n";
    cout << "-------------------------------\n\n";

    // Input text from file
    string buffer = InputText();

    while (buffer != ""){
        // Title for text file reading
        cout << "\nThis is the text string read from the file\n";
        cout << "-------------------------------------------\n";
        cout << buffer << endl << endl;

        // Build vector of words and counts
        vector<WordCount> words = DistinctWords(buffer);

        // Display results
        cout << "There are " << words.size() << " unique words in the above text." << endl;
        cout << "--------------------------------------------" << endl << endl;
        DisplayResults(words);
        buffer = InputText();
        buffer = "";
    }
    return 0;
}

/***********************************************
InputText() - 
Gets user file name and gets text from the file.
************************************************/
string InputText(){
    string fileName;
    ifstream inputFile;     // Input file stream object
    string str;     // Temporary string
    string text;    // Text file string

    cout << "File name? "; 
    getline(cin, fileName);
    // Case to terminate the program for enter key
    if (fileName.empty()){ exit(0);}

    // Open file
    inputFile.open(fileName);

    if (!inputFile){
        cout << "Error opening data file\n";
        cout << "File name? "; cin >> fileName;
    }
    else{
        while (!inputFile.eof()){
            getline(inputFile, str);
            text += str;
        }
    }
    inputFile.close(); return text;
}

/****************************************************
Normalize(string) - 
Converts string to lowercase and removes punctuation.
*****************************************************/
string Normalize(string s){
    // Initialize variables
    string nString;
    char c;

    // Make all text lowercase
    for (int i = 0; i < s.length(); i++){
        c = s[i];
        c = tolower(c);
        if (isalpha(c) || isblank(c))
            nString += c;
    }

    // Return converted string
    return nString;
}

/******************************************
vector<WordCount> DistinctWords(string) - 
Sorts vector of word count structures.
*******************************************/
vector<WordCount> DistinctWords(string s){
    vector<WordCount> words; // Initialize vector for words
    string nString = Normalize(s); // Convert passed string to lowercase and remove punctuation

    // Parse string
    istringstream iss(nString);

    string n;   // Intialize temporary string
    while(iss >> n){

        if (findWord(n, words) == true){ continue; }        // Check to verify that there is no preexisting occurence of the word passed
        else{
            WordCount tempO(n);     // Make structure object with n
            words.push_back(tempO);     // Push structure object into words vector
        }
    }
    return words;
}

/*********************************************
bool findWord(string, vector<WordCount>) -
Linear search for word in vector of structures
**********************************************/
bool findWord(string s, vector<WordCount>& words){
    // Search through vector 
    for (auto& r : words){
        if (r.word.compare(s) == 0){   // Increment count of object if found again
            r.iCount(); return true;
        }
    }
}

/***********************************************
void DisplayResults(vector<WordCount>) -
Displays results.
************************************************/
void DisplayResults(vector<WordCount> words){
    // TROUBLESHOOT FIRST ERASE THIS AFTER!!!!!
    cout << "Word"  << setw(20) << "Count\n";
    cout << "-----------------------\n";

    sort(words.begin(), words.end(),CompareWordCount);
    for (auto &r : words){
        cout << setw(6) << left << r.word;
        cout << setw(15) << right << r.count << endl;
    }
}

关于c++ - 字数统计程序 C++,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/43685816/

相关文章:

c++ - 如何在 C++ 中比较两个 vector 的字符串内容

java - 矩形对象抛出错误,说它在java中找不到.contains()方法

c++ - 指向对象的指针的私有(private) vector 。如何使用 getter 方法访问这些对象?

c++ - 无法在 mfc 应用程序中创建 GroupBox

c++ - 了解 `std::unordered_set`的用法

c++ - GetQueuedCompletionStatus 停止读取串口

c++ - 为什么我不能使用指针和长度创建 vector

c++ - 计算顶点法线时出现问题(在 C++ 中,用于 openGL 项目)

C++ STL 映射异常。 (不可变树的根)

c++ - C++ 数组中的多字节 UTF-8