c - 如何检查一个点是否在点簇内

标签 c algorithm geometry cluster-analysis quantization

我问这个问题是为了做 vector 量化的作业。

我实现了一种相当经典的算法来检测点簇的中心。然而,在输入数据中,有几个簇(簇的数量和总输入已知),我需要找到每个簇的中心,但我不知道哪些点构成一个簇。因此,如果我设法在集群内部或附近的某个位置(比任何其他初始化中心更接近)初始化 future 的中心点,我的算法可以迭代并转到正确的中心。

但是我不知道如何正确初始化。我正在随机初始化并检查两个中心是否彼此太近以及中心是否距离任何输入点太远,但此方法不容易参数化,即花费大量“计算”时间或无法获得正确的中心.

我的想法很简单,随机初始化并检查该点是否在簇内。有人知道我该怎么做吗?我无法构造多边形,因为我不知道簇的限制。 我更喜欢用 C 语言实现,但我也只接受这些想法!

编辑:输入数据示例:

Example Data: The red  points are what I should get as a result

我的代码:

#include <stdio.h>
#include <string.h>
#include <math.h>
#include <stdlib.h>
#include <time.h>
#include <float.h>

#define TRAINING_CYCLE 10000
#define LEARNING_RATE  0.001
#define CENTROID_DISTANCE_SCALE 0.7  //used for setting a minimal distance between centroids
#define CENTROID_POINT_SCALE 0.1
#define CLUSTER_SIZE_PERCENTAGE 0.3

//User:      REMOVED
//Password:  REMOVED

typedef enum { false, true } bool;

typedef struct point{
    double x;
    double y;
} point;

int nbOfClusters;

double in1[1000];
double in2[1000];


point centers[100]; //later it is limited by number of clusters

int dataSize=0;
double maxX1, maxY1, maxX2, maxY2=0; //maximums of each data set
double deltaX, deltaY=0; //error toleration of each axis

double getAbs(double n){
    if (n>=0){
        return n;
    } else {
        return (-1)*n;
    }
}

int findNearestCentroid(point p1){ //returns the location in the table of the nearest centroid to the argument point
    double distance=DBL_MAX;
    int nearest=0;
    for (int i=0; i<nbOfClusters; i++){
        double distance_temp = (p1.x-centers[i].x)*(p1.x-centers[i].x)+(p1.y-centers[i].y)*(p1.y-centers[i].y);
        if ( distance_temp < distance){
            distance=distance_temp;
            nearest=i;
        }
    }
    return nearest;
}

double getDistance(point p1, point p2){
    return sqrt((p1.x-p2.x)*(p1.x-p2.x)+(p1.y-p2.y)*(p1.y-p2.y));
}

bool isCentroidsNear(double minDistance){

    for (int i=0;i<nbOfClusters;i++){
        for (int j=0; j<nbOfClusters; j++){
            if (i != j){
                double temp_distance=getDistance(centers[i],centers[j]);
                if (temp_distance<minDistance){ // the distance shouldn't be small
                    return true;
                }
            }
        }
    }
    return false; //if nothing hit the condition, there is no centroid too close to another
}
point findNearestInput(int centroid){ //returns the location in the table of the nearest centroid to the argument point
    double distance=DBL_MAX;
    point returnPoint;
    int nearest=0;
    for (int i=0; i<nbOfClusters; i++){
        double distance_temp = (in1[i]-centers[centroid].x)*(in1[i]-centers[centroid].x)+(in2[i]-centers[centroid].y)*(in2[i]-centers[centroid].y);
        if ( distance_temp < distance){
            distance=distance_temp;
            nearest=i;
        }
    }
    returnPoint.x=in1[nearest];
    returnPoint.y=in2[nearest];
    return returnPoint;
}

bool isPointNear(double minDistance){
    for(int i=0; i<nbOfClusters; i++){
        double distance=getDistance(findNearestInput(i),centers[i]); //the distance to the nearest point
        if(distance>minDistance){
            return true;
        }
    }
    return false;
}

bool isCountNearPoints(double distance){
    int counter=0;
    for(int i=0;i<nbOfClusters;i++){
        point p;
        for(int j=0; j<dataSize; j++){
            p.x=in1[j];
            p.y=in2[j];
            double tempDistance=getDistance(p,centers[i]);
            if (tempDistance<distance){
                counter++;
            }
        }
        //this is the number of points that the centroid should be near to
        int minNearPoints = dataSize/nbOfClusters*CLUSTER_SIZE_PERCENTAGE;
        if (counter<minNearPoints){
            return true;
        }
    }
    return false;
}

int main()
{
    char dummy[1];
    scanf("%c",&dummy[0]);
    nbOfClusters=dummy[0]-'0';



    while ( scanf("%lf,%lf", &in1[dataSize], &in2[dataSize]) != EOF){
        dataSize++;
    }


   //finding the maximums to determine the error toleration delta

    for(int i =0; i< dataSize; i++){
        if(in1[i]>0 && in1[i] > maxX1){
            maxX1=in1[i];
        }
        if(in2[i]>0 && in2[i]>maxY1){
            maxY1=in2[i];
        }
        if(in1[i]<0 && in1[i] < maxX1){
            maxX2=in1[i];
        }
        if(in2[i]<0 && in2[i] < maxY1){
            maxY2=in2[i];
        }
    }

    //double minDistance = CENTROID_DISTANCE_SCALE*sqrt((maxX1-maxX2)*(maxX1-maxX2)+(maxY1-maxY2)*(maxY1-maxY2));
    double minDistance = 1/nbOfClusters*sqrt((maxX1-maxX2)*(maxX1-maxX2)+(maxY1-maxY2)*(maxY1-maxY2));
    double pointMinDistance = CENTROID_POINT_SCALE*sqrt((maxX1-maxX2)*(maxX1-maxX2)+(maxY1-maxY2)*(maxY1-maxY2));

/*
    do { //randomly generate centroids but have finally nothing near
        for(int i=0; i<nbOfClusters; i++){
            centers[i].x=(double)rand()/RAND_MAX*2*(maxX1-maxX2)-(maxX1-maxX2);
            centers[i].y=(double)rand()/RAND_MAX*2*(maxY1-maxY2)-(maxY1-maxY2);
        }
    //} while(isCentroidsNear(minDistance) || isCountNearPoints(pointMinDistance));
    } while(isCentroidsNear(minDistance) || isPointNear(pointMinDistance));
    //} while(isCentroidsNear(minDistance));
    */
    int randomInputs[50];
    bool isSame;
    //generating nbOfClusters amount of random numbers from dataSize range that will later used to pick inputs
    do {
        do{
            //generate random numbers
            for(int i=0; i<nbOfClusters; i++){
                randomInputs[i]=(int)((double)rand()/RAND_MAX*dataSize);
            }
            isSame = false;
            //checking if the generated numbers are the same
            for(int i=0; i<nbOfClusters-1; i++){
                for(int j=i+1; j<nbOfClusters; j++){
                    if(randomInputs[i]==randomInputs[j] ){
                        isSame=true;
                        break;
                    }
                }
                if(isSame){
                    break;
                }
            }

        }while(isSame);
        //assign centroids to the generated numbers
        for (int i =0;i<nbOfClusters;i++){
            centers[i].x=in1[randomInputs[i]];
            centers[i].y=in2[randomInputs[i]];
        }
    }while(isCentroidsNear(minDistance)); //if the centroids are too close, i.e. in the same cluster
    //learning
    point p1;//point for iteration

    for (int ii=0; ii<TRAINING_CYCLE; ii++){
        for (int i=0; i<dataSize; i++){

            //construct a point
            p1.x=in1[i];
            p1.y=in2[i];

            //find the nearest point and the distance to it
            int nearPt=findNearestCentroid(p1);
            double distance=getDistance(p1,centers[nearPt]);

            //the distance that I want to move it
            double deltaDistance=LEARNING_RATE*distance;

            //moving the center on the DIRECTION of the other point
            //the slope of the line passing through both
            double slope=(in2[i]-centers[nearPt].y)/(in1[i]-centers[nearPt].x);

            double dx,dy;
            // finding how much the x needs to change => totalchange^2=dx^2+dy^2 but I know dy from dx
            dx=sqrt(deltaDistance*deltaDistance/(1+slope*slope)); //dx=(totaldist^2/(1+slope^2)


            //dx is always positive till now, so it should be neg. if the center is to the right of the point
            if(centers[nearPt].x>in1[i]){
                dx=(-1)*dx;
            }
            dy=slope*dx;
            //updating the center value
            centers[nearPt].x += dx;
            centers[nearPt].y += dy;

        }

    }

    //printing the results

    for (int i=0; i<nbOfClusters; i++){
        printf("%lf,%lf\n",centers[i].x,centers[i].y);
    }

    return 0;
}

最佳答案

通常的方法是从现有数据中选择点,而不是均匀随机。

由于在您的数据模型中,每个点都属于一个簇,因此选择现有点可以解决您的(模糊)问题,不是吗?

关于c - 如何检查一个点是否在点簇内,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/41696135/

相关文章:

javascript - 如何求圆弧的第二个 Angular

python - 通过算法生成 STL(3-D 几何文件)的最佳工具?

c - Net-SNMP 一次只运行一个处理程序吗?

c - 带十六进制负值的 sscanf

将指针转换为二维数组

相关项目分组算法

c# - 将 int 转换为 double 并四舍五入到最后一个整数

algorithm - 矩形可以放在另一个矩形里面吗?

C程序子进程在父进程之前再次摩擦

java - 创建一个函数,该函数返回重新排列为回文的数组字符串