Board logo

标题: [分享] K均值算法的实现,c++版本 [打印本页]

作者: mqzcxx    时间: 2012-2-20 12:48     标题: K均值算法的实现,c++版本

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <conio.h>
#include <math.h>
#include <iostream.h>
#define SUCCESS   1
#define FAILURE   0
#define TRUE   1
#define FALSE   0
#define MAXVECTDIM 20
#define MAXPATTERN 20
#define MAXCLUSTER 10
#define SIZEVECTOR 2  
#define ResultsFile "Results.txt"

char *f2a(double x, int width)
{
char cbuf[255];
char *cp;
int i,k;
int d,s;
cp=fcvt(x,width,&d,&s);
if (s)
{
strcpy(cbuf,"-");
}
else
{
strcpy(cbuf," ");
}
if (d>0)
{
for (i=0; i<d; i++)
{
cbuf[i+1]=cp[i];
}
cbuf[d+1]=0;
cp+=d;
strcat(cbuf,".");
strcat(cbuf,cp);
}
else
{
if (d==0)
{
strcat(cbuf,".");
strcat(cbuf,cp);
}
else
{
k=-d;
strcat(cbuf,".");
for (i=0; i<k; i++)
{
   strcat(cbuf,"0");
}
strcat(cbuf,cp);
}
}
cp=&cbuf[0];
return cp;
}


struct aCluster
{
double       Center[MAXVECTDIM];
int          Member[MAXPATTERN];
int          NumMembers;
};

struct aVector
{
double       Center[MAXVECTDIM];
int          Size;
};

class System
{
private:
double Pattern[MAXPATTERN][MAXVECTDIM+1];
aCluster Cluster[MAXCLUSTER];
FILE *OutFile;     //存放结果文件
int   NumPatterns;          // 点(向量)的个数
int   NumClusters;          // 簇的个数
void DistributeSamples();
int   CalcNewClustCenters();
double EucNorm(int, int);    //计算E准则
int   FindClosestCluster(int);
                                       
public:
system();
int LoadPatterns(char *fname);     
void InitClusters();               
void RunKMeans();               
void ShowClusters();            
void ShowCenters();
void WriteResults();
void CloseResults();
};

void System::ShowCenters()
{
int i;
fprintf(OutFile,"\n求出新的聚类点为:\n");
for (i=0; i<NumClusters; i++)
{
Cluster[i].Member[0]=i;
fprintf(OutFile,"最合适的第%d个点为:ClusterCenter[%d]=(%f,%f)\n",i+1,i,Cluster[i].Center[0],Cluster[i].Center[1]);
}
fprintf(OutFile,"\n");
}

void System::WriteResults()
{
if( ( OutFile = fopen(ResultsFile, "w" ) ) == NULL )
{
printf( "输出结果文件打开失败!\npress any key to continue..." );
getchar();
exit(0);
}
}

void System::CloseResults()
{
fclose(OutFile);
}

int System:oadPatterns(char *fname)
{
FILE *InFilePtr;
int    i,j;
double x;
if((InFilePtr = fopen(fname, "r")) == NULL)
{
getchar();
return FAILURE;
}
fscanf(InFilePtr, "%d", &NumPatterns);
fscanf(InFilePtr, "%d", &NumClusters);
for (i=0; i<NumPatterns; i++)        
{
for (j=0; j<SIZEVECTOR; j++)      
{
   fscanf(InFilePtr,"%lg",&x);   
   Pattern[i][j]=x;
}
}
fprintf(OutFile,"从文件中读取的%d个点的坐标数据如下:\n",NumPatterns);
for (i=0; i<NumPatterns; i++)
{
fprintf(OutFile,"attern[%d]=(%2.3f,%2.3f)\n",i,Pattern[i][0],Pattern[i][1]);
}
fprintf(OutFile,"\n--------------------\n");
return SUCCESS;
}

void System::InitClusters()
{
int i,j;
fprintf(OutFile,"假设前%d个点为聚类点:\n",NumClusters);
for (i=0; i<NumClusters; i++)
{
Cluster[i].Member[0]=i;
for (j=0; j<SIZEVECTOR; j++)
{
   Cluster[i].Center[j]=Pattern[i][j];
}
}
for (i=0; i<NumClusters; i++)
{
fprintf(OutFile,"ClusterCenter[%d]=(%f,%f)\n",i,Cluster[i].Center[0],Cluster[i].Center[1]);
}
fprintf(OutFile,"\n");
}

void System::RunKMeans()
{
int converged;
int pass;
pass=1;
converged=FALSE;
while (converged==FALSE)
{
fprintf(OutFile,"\nPASS=%d\n\n",pass++);
cout<<"第"<<pass++<<"次计算:"<<endl;
DistributeSamples();
converged=CalcNewClustCenters();
ShowCenters();
cout<<endl;
}
}

double System::EucNorm(int p, int c)   
{
double dist,x;                     
int i;                             
char zout[128];
char znum[40];
char *pnum;

pnum=&znum[0];
strcpy(zout,"d=sqrt(");
fprintf(OutFile,"聚类点ClusterCenter[%d]与Pattern[%d]的距离为:",c,p);
dist=0;
for (i=0; i<SIZEVECTOR ;i++)
{
x=(Cluster[c].Center[i]-Pattern[p][i])*(Cluster[c].Center[i]-Pattern[p][i]);
strcat(zout,f2a(x,4));
if (i==0)
   strcat(zout,"+");
dist += (Cluster[c].Center[i]-Pattern[p][i])*(Cluster[c].Center[i]-Pattern[p][i]);
}
fprintf(OutFile,"%s)",zout);
return dist;
}

int System::FindClosestCluster(int pat)
{
int i, ClustID;
double MinDist, d;
MinDist =9.9e+99;
ClustID=-1;
for (i=0; i<NumClusters; i++)
{
d=EucNorm(pat,i);
fprintf(OutFile," = %f\n",sqrt(d));
if (d<MinDist)
{
   MinDist=d;
   ClustID=i;
}
}
if (ClustID<0)
{
fprintf(OutFile,"Aaargh");
exit(0);
}
return ClustID;
}

void System:istributeSamples()
{
int i,pat,Clustid,MemberIndex;
for (i=0; i<NumClusters;i++)
{
Cluster[i].NumMembers=0;
}
for (pat=0; pat<NumPatterns; pat++)
{
Clustid= FindClosestCluster(pat);
fprintf(OutFile,"发现点Patern[%d] 更接近于聚类点ClusterCenter[%d]\n\n",pat,Clustid);
MemberIndex=Cluster[Clustid].NumMembers;
Cluster[Clustid].Member[MemberIndex]=pat;
Cluster[Clustid].NumMembers++;
}
}

int System::CalcNewClustCenters()
{
int ConvFlag,VectID,i,j,k;
double tmp[MAXVECTDIM];
char xs[255];
char ys[255];
char nc1[20];
char nc2[20];
char *pnc1;
char *pnc2;

pnc1=&nc1[0];
pnc2=&nc2[0];
ConvFlag=TRUE;
fprintf(OutFile,"新的聚类点计算公式为:\n");
for (i=0; i<NumClusters; i++)            
{
pnc1=itoa(Cluster[i].NumMembers,nc1,10);
pnc2=itoa(i,nc2,10);
strcpy(xs,"Cluster Center");
strcat(xs,nc2);
strcat(xs,"(1/");
strcpy(ys,"(1/");
strcat(xs,nc1);
strcat(ys,nc1);
strcat(xs,")(");
strcat(ys,")(");
for (j=0; j<SIZEVECTOR; j++)        
{
   tmp[j]=0.0;
}
for (j=0; j<Cluster[i].NumMembers; j++)
{
   VectID=Cluster[i].Member[j];
   for (k=0; k<SIZEVECTOR; k++)     
   {
    tmp[k] += Pattern[VectID][k];  
    if (k==0)
    {
     strcat(xs,f2a(Pattern[VectID][k],3));
    }
    else
    {
     strcat(ys,f2a(Pattern[VectID][k],3));
    }
   }
   if(j<Cluster[i].NumMembers-1)
   {
    strcat(xs,"+");
    strcat(ys,"+");
   }
   else
   {
    strcat(xs,")");
    strcat(ys,")");
   }
}
for (k=0; k<SIZEVECTOR; k++)           
{
   tmp[k]=tmp[k]/Cluster[i].NumMembers;
   if (tmp[k] != Cluster[i].Center[k])
    ConvFlag=FALSE;
   Cluster[i].Center[k]=tmp[k];
}
fprintf(OutFile,"%s,\n",xs);
fprintf(OutFile,"%s\n",ys);
}
fprintf(OutFile,"\n\n");
for (i=0; i<NumClusters; i++)
{
fprintf(OutFile,"在第%d个聚类内的点有:\n",i+1);
cout<<"在第"<<i+1<<"个聚类内的点有:";
for (j=0; j<Cluster[i].NumMembers; j++)
{
   fprintf(OutFile," %d %s ",Cluster[i].Member[j],",");
   cout<<Cluster[i].Member[j]<<",";
}
fprintf(OutFile,"\n");
cout<<endl;
}
return ConvFlag;
}

void System::ShowClusters()
{
int cl;
for (cl=0; cl<NumClusters; cl++)
{
fprintf(OutFile,"\n聚类点%d坐标为:==>[%f,%f]\n", cl+1,Cluster[cl].Center[0],Cluster[cl].Center[1]);
}
}

void main()
{
System kmeans;
char ReadFileName;
kmeans.WriteResults();
printf("请输入存放数据的文件名:");
scanf("%s",&ReadFileName);
if (kmeans.LoadPatterns(&ReadFileName)==FAILURE)
{
printf("对不起,文件%s不存在。\n",&ReadFileName);
getchar();
exit(0);
}
kmeans.InitClusters();
kmeans.RunKMeans();
kmeans.ShowClusters();
kmeans.CloseResults();
}
/*the source of data

13// the number of data
3 //the number of cluster

//the detail about data
1.0 1.0
1.2 1.2
0.8 1.2
0.9 0.7
1.3 0.9
1.0 1.4
3.0 3.0
3.1 2.8
3.2 3.4
2.7 3.3
2.6 2.9
2.8 2.9
3.0 3.1
*/




欢迎光临 IT家园 (http://bbs.it998.com/) Powered by Discuz! 7.2