c++ - (C++) K-Means 聚类问题

标签 c++ algorithm machine-learning computer-vision k-means

我在使用 K 均值聚类算法时遇到了一些问题。输入文件如下所示:

4

60 60

23 45

25 11

30 11

...

...

...

在一个 60x60 的图像网格中,总共有 4 个簇。该算法似乎有效,但是当它重新开始重新计算质心并更改标签时,网格中的标签慢慢开始变为仅 1。大约 5 次迭代后,所有标签都变为 1。据我所知,我一遍又一遍地检查了代码,但我无法弄清楚为什么所有标签都只变为 1。感谢您的帮助!

#include <iostream>
#include <fstream>
#include <cmath>
using namespace std;

class Node {
public:
    int x;
    int y;
    int label;
    double distace;
    Node* next;

    void printNode() {
        cout << "X:\t" << x << endl;
        cout << "Y:\t" << y << endl;
        cout << "Label:\t" << label << endl;
    }

    Node(int i, int j) {
        x = i;
        y = j;
        next = nullptr;
    }
};

class LinkedList {
public:
    Node* head;
    int length;
    Node* scanner;

    LinkedList() {
        Node* n = new Node(-999, -999);
        head = n;
        scanner = head;
        length = 0;
    }

    void insert(Node* n) {
        if (!head->next) {
            head->next = n;
            length++;
            return;
        }
        n->next = head->next;
        head->next = n;
        length++;
    }

    void deleteNode(Node* n) {
        Node* prev = head;
        Node* current = head->next;
        while (current) {
            if (n->x == current->x && 
                n->y == current->y &&
                n->label == current->label) {
                    prev->next = current->next;
                    return;
                }
                prev = prev->next;
                current = current->next;
        }
        length--;
    }

    void printList() {
        Node* current = head->next;
        while (current) {
            current->printNode();
            cout << endl;
            current = current->next;
        }
    }

    Node* scan() {
        scanner = scanner->next;
        if (!scanner) return 0;
        return scanner;
    }

    void resetScanner() {
        scanner = head;
    }

    void changeLabelTo(int x, int y, int newLabel) {
        Node* current = head->next;
        while (current) {
            if (current->x == x &&
                current->y == y) {
                    current->label = newLabel;
                }

            current = current->next;
        }
    }
};

class KMeans {
public:
    struct xycoord {
      int x;
      int y;
    };

    int k;
    xycoord* kcentroids;
    LinkedList ll;
    int row;
    int col;
    int** image;
    int tracker;

    int getLabel() {
        int x = tracker++;
        if (tracker > k) { tracker = 1; }
        return x;
    }

    KMeans(int clusters, int r, int c) { 
        k = clusters;
        tracker = 1;
        kcentroids = new xycoord[k+1];
        row = r;
        col = c;
        image = new int*[row];
        for (int i = 0; i < row; i++)
            image[i] = new int[col];

    }

    ~KMeans() {
        delete [] kcentroids;
        for (int i = 0; i < row; i++)
            delete [] image[i];
        delete [] image;

        cout << "Called!" << endl;
    }

    void displayImage() {
        for (int i = 0; i < row; i++) {
            for (int j = 0; j < col; j++) {
                if (image[i][j] == 0) cout << " ";
                else { cout << image[i][j]; }
            }
            cout << endl;
        }
    }

    void imageOutput() {
        for (int i = 0; i < ll.length; i++) {
            Node* n = ll.scan();
            if (n) {
                image[n->x][n->y] = n->label;
            }
        }
        displayImage();
    }

    void computeCentroids() {
        for (int i = 1; i <= k; i++) {
            kcentroids[i].x = 0;
            kcentroids[i].y = 0;
        }

        int* count = new int[k+1];
        for (int i  = 0; i < ll.length; i++) {
            Node* n = ll.scan();
            if (n) {
                kcentroids[n->label].x += n->x;
                kcentroids[n->label].y += n->y;
                count[n->label]++;
            }
        }

        cout << endl;

        for (int i = 1; i <= k; i++) {
            kcentroids[i].x = kcentroids[i].x / count[i];
            kcentroids[i].y = kcentroids[i].y / count[i];

            /*
             * i - label
             * 6 - 4
             * 7 - 3
             * 8 - 2
             * 9 - 1
             */
            //image[kcentroids[i].x][kcentroids[i].y] = 10-i;
        }


        delete [] count;
    }

    void computeDistanceAndSetLabels() {
        for (int i = 0; i < ll.length; i++) {
            int minLabel = 0;
            double min = 99999.0;
            Node* n = ll.scan();
            for (int j = 1; j <= k; j++) {
                double m = 0.0;
                // distance formula
                m = sqrt(pow(n->x-kcentroids[j].x, 2) + pow(n->y-kcentroids[j].y,2));
                if (m < min) {
                    min = m;
                    minLabel = j;
                }
            }
            cout << i << " " << n->x << " " << n->y << " " << n->label << " ";
            n->label = minLabel;
            cout << minLabel << " " << n->label <<endl;
        }
    }

    void startClustering() {
        // more than 2 starts showing 1 take over all labels, 
        // this is to be changed to something better however
        for (int i = 0; i < 4; i++) {
            ll.resetScanner();
            imageOutput();
            ll.resetScanner();
            computeCentroids();
            ll.resetScanner();
            computeDistanceAndSetLabels();
            ll.resetScanner();
            imageOutput();
       }
    }
};

Node* createNode(int x, int y, int k) {
    Node* n = new Node(x, y);
    n->label = k;
    return n;
}

int main(int argc, char* argv[]) {
    if (argc < 4) {
        cout << "Please start program with: program in.txt out1.txt out2.txt" << endl;
        return -1;
    }

    ifstream in(argv[1]);
    if (!in) {
        cout << "File: " << argv[1] << " could not be read" << endl;
        return -2;
    }

    int k, rows, cols;
    in >> k;
    in >> rows;
    in >> cols;
    cout << k << " " << rows << " " << cols << endl;

    KMeans km(k, rows, cols);
    km.displayImage();

    LinkedList ll;
    int num;
    while (in >> num) {
        int num2;
        in >> num2;
        ll.insert(createNode(num, num2, km.getLabel()));
    }
    km.ll = ll;
    km.ll.printList();
    km.startClustering();
    km.displayImage();

    return 0;
}

最佳答案

您没有在 computeCentroids() 中将计数初始化为零,这会导致未定义的行为。将它们与质心一起归零:

int* count = new int[k+1];
for (int i = 1; i <= k; i++) {
    kcentroids[i].x = 0;
    kcentroids[i].y = 0;
    count[i] = 0;
}

当你像这样新建它们时,你也可以将它们归零:

int* count = new int[k+1]();

关于c++ - (C++) K-Means 聚类问题,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/33862005/

相关文章:

python - 训练数据中的文档属于 LDA 中的特定主题

machine-learning - 用于分类和回归的递归特征消除的模型

machine-learning - 是否可以在 sklearn 管道中更改 pandas 列数据类型?

algorithm - 借助堆栈确定该单词来自语言

python - 计算交叉点的更有效方法?

c++ - 链接问题 C++、glew 和 SDL

c++ - QLayout 与 QStackedWidget 的问题

algorithm - 从树中选择 K 个节点的方法数,如果选择节点,则必须选择节点的父节点

c++ - 从 C++ 动态数组中添加/删除元素

c++ - Node.js、SQLite 和 C++