具體算法步驟如下; 1創(chuàng)建節(jié)點N 2如果訓練集為空,在返回節(jié)點N標記為Failure 3如果訓練集中的所有記錄都屬于同一個類別,則以該類別標記節(jié)點N 4如果候選屬性為空,則返回N作為葉節(jié)點,標記為訓練集中最普通的類; 5for each 候選屬性 attribute_list 6if 候選屬性是聯系的then 7對該屬性進行離散化 8選擇候選屬性attribute_list中具有最高信息增益的屬性D 9標記節(jié)點N為屬性D 10for each 屬性D的一致值d 11由節(jié)點N長出一個條件為D=d的分支 12設s是訓練集中D=d的訓練樣本的集合 13if s為空 14加上一個樹葉,標記為訓練集中最普通的類 15else加上一個有C4.5(R - {D},C,s)返回的點
創(chuàng)新互聯-專業(yè)網站定制、快速模板網站建設、高性價比龍州網站開發(fā)、企業(yè)建站全套包干低至880元,成熟完善的模板庫,直接使用。一站式龍州網站制作公司更省心,省錢,快速模板網站建設找我們,業(yè)務覆蓋龍州地區(qū)。費用合理售后完善,十多年實體公司更值得信賴。
C++代碼你可以參考下
C4.5算法源代碼(C++)
// C4.5_test.cpp : Defines the entry point for the console application.
//
#include "stdafx.h"
#include stdio.h
#include math.h
#include "malloc.h"
#include stdlib.h
const int MAX = 10;
int** iInput;
int i = 0;//列數
int j = 0;//行數
void build_tree(FILE *fp, int* iSamples, int* iAttribute,int ilevel);//輸出規(guī)則
int choose_attribute(int* iSamples, int* iAttribute);//通過計算信息增益率選出test_attribute
double info(double dTrue,double dFalse);//計算期望信息
double entropy(double dTrue, double dFalse, double dAll);//求熵
double splitinfo(int* list,double dAll);
int check_samples(int *iSamples);//檢查samples是否都在同一個類里
int check_ordinary(int *iSamples);//檢查最普通的類
int check_attribute_null(int *iAttribute);//檢查attribute是否為空
void get_attributes(int *iSamples,int *iAttributeValue,int iAttribute);
int _tmain(int argc, _TCHAR* argv[])
{
FILE *fp;
FILE *fp1;
char iGet;
int a = 0;
int b = 0;//a,b是循環(huán)變量
int* iSamples;
int* iAttribute;
fp = fopen("c:\\input.txt","r");
if (NULL == fp)
{
printf("error\n");
return 0;
}
iGet = getc(fp);
while (('\n' != iGet)(EOF != iGet))
{
if (',' == iGet)
{
i++;
}
iGet = getc(fp);
}
i++;
iAttribute = (int *)malloc(sizeof(int)*i);
for (int k = 0; ki; k++)
{
iAttribute[k] = (int)malloc(sizeof(int));
iAttribute[k] = 1;
}
while (EOF != iGet)
{
if ('\n' == iGet)
{
j++;
}
iGet = getc(fp);
}
j++;
iInput = (int **)malloc(sizeof(int*)*j);
iSamples = (int *)malloc(sizeof(int)*j);
for (a = 0;a j;a++)
{
iInput[a] = (int *)malloc(sizeof(int)*i);
iSamples[a] = (int)malloc(sizeof(int));
iSamples[a] = a;
}
a = 0;
fclose(fp);
fp=fopen("c:\\input.txt","r");
iGet = getc(fp);
while(EOF != iGet)
{
if ((',' != iGet)('\n' != iGet))
{
iInput[a][b] = iGet - 48;
b++;
}
if (b == i)
{
a++;
b = 0;
}
iGet = getc(fp);
}
fp1 = fopen("d:\\output.txt","w");
build_tree(fp1,iSamples,iAttribute,0);
fclose(fp);
return 0;
}
void build_tree(FILE * fp, int* iSamples, int* iAttribute,int level)//
{
int iTest_Attribute = 0;
int iAttributeValue[MAX];
int k = 0;
int l = 0;
int m = 0;
int *iSamples1;
for (k = 0; kMAX; k++)
{
iAttributeValue[k] = -1;
}
if (0 == check_samples(iSamples))
{
fprintf(fp,"result: %d\n",iInput[iSamples[0]][i-1]);
return;
}
if (1 == check_attribute_null(iAttribute))
{
fprintf(fp,"result: %d\n",check_ordinary(iSamples));
return;
}
iTest_Attribute = choose_attribute(iSamples,iAttribute);
iAttribute[iTest_Attribute] = -1;
get_attributes(iSamples,iAttributeValue,iTest_Attribute);
k = 0;
while ((-1 != iAttributeValue[k])(k MAX))
{
l = 0;
m = 0;
while ((-1 != iSamples[l])(l j))
{
if (iInput[iSamples[l]][iTest_Attribute] == iAttributeValue[k])
{
m++;
}
l++;
}
iSamples1 = (int *)malloc(sizeof(int)*(m+1));
l = 0;
m = 0;
while ((-1 != iSamples[l])(l j))
{
if (iInput[iSamples[l]][iTest_Attribute] == iAttributeValue[k])
{
iSamples1[m] = iSamples[l];
m++;
}
l++;
}
iSamples1[m] = -1;
if (-1 == iSamples1[0])
{
fprintf(fp,"result: %d\n",check_ordinary(iSamples));
return;
}
fprintf(fp,"level%d: %d = %d\n",level,iTest_Attribute,iAttributeValue[k]);
build_tree(fp,iSamples1,iAttribute,level+1);
k++;
}
}
int choose_attribute(int* iSamples, int* iAttribute)
{
int iTestAttribute = -1;
int k = 0;
int l = 0;
int m = 0;
int n = 0;
int iTrue = 0;
int iFalse = 0;
int iTrue1 = 0;
int iFalse1 = 0;
int iDepart[MAX];
int iRecord[MAX];
double dEntropy = 0.0;
double dGainratio = 0.0;
double test = 0.0;
for (k = 0;kMAX;k++)
{
iDepart[k] = -1;
iRecord[k] = 0;
}
k = 0;
while ((l!=2)(k(i - 1)))
{
if (iAttribute[k] == -1)
{
l++;
}
k++;
}
if (l == 1)
{
for (k = 0;k(k-1);k++)
{
if (iAttribute[k] == -1)
{
return iAttribute[k];
}
}
}
for (k = 0;k (i-1);k++)
{
l = 0;
iTrue = 0;
iFalse = 0;
if (iAttribute[k] != -1)
{
while ((-1 != iSamples[l])(l j))
{
if (0 == iInput[iSamples[l]][i-1])
{
iFalse++;
}
if (1 == iInput[iSamples[l]][i-1])
{
iTrue++;
}
l++;
}
for (n = 0;nl;n++)//計算該屬性有多少不同的值并記錄
{
m = 0;
while((iDepart[m]!=-1)(m!=MAX))
{
if (iInput[iSamples[n]][iAttribute[k]] == iDepart[m])
{
break;
}
m++;
}
if (-1 == iDepart[m])
{
iDepart[m] = iInput[iSamples[n]][iAttribute[k]];
}
}
while ((iDepart[m] != -1)(m!=MAX))
{
for (n = 0;nl;n++)
{
if (iInput[iSamples[n]][iAttribute[k]] == iDepart[m])
{
if (1 == iInput[iSamples[n]][i-1])
{
iTrue1++;
}
if (0 == iInput[iSamples[n]][i-1])
{
iFalse1++;
}
iRecord[m]++;
}
}
dEntropy += entropy((double)iTrue1,(double)iFalse1,(double)l);
iTrue1 = 0;
iFalse1 = 0;
m++;
}
double dSplitinfo = splitinfo(iRecord,(double)l);
if (-1 == iTestAttribute)
{
iTestAttribute = k;
dGainratio = (info((double)iTrue,(double)iFalse)-dEntropy)/dSplitinfo;
}
else
{
test = (info((double)iTrue,(double)iFalse)-dEntropy)/dSplitinfo;
if (dGainratio test)
{
iTestAttribute = k;
dGainratio = test;
}
}
}
}
return iTestAttribute;
}
double info(double dTrue,double dFalse)
{
double dInfo = 0.0;
dInfo = ((dTrue/(dTrue+dFalse))*(log(dTrue/(dTrue+dFalse))/log(2.0))+(dFalse/(dTrue+dFalse))*(log(dFalse/(dTrue+dFalse))/log(2.0)))*(-1);
return dInfo;
}
double entropy(double dTrue, double dFalse, double dAll)
{
double dEntropy = 0.0;
dEntropy = (dTrue + dFalse)*info(dTrue,dFalse)/dAll;
return dEntropy;
}
double splitinfo(int* list,double dAll)
{
int k = 0;
double dSplitinfo = 0.0;
while (0!=list[k])
{
dSplitinfo -= ((double)list[k]/(double)dAll)*(log((double)list[k]/(double)dAll));
k++;
}
return dSplitinfo;
}
int check_samples(int *iSamples)
{
int k = 0;
int b = 0;
while ((-1 != iSamples[k])(k j-1))
{
if (iInput[k][i-1] != iInput[k+1][i-1])
{
b = 1;
break;
}
k++;
}
return b;
}
int check_ordinary(int *iSamples)
{
int k = 0;
int iTrue = 0;
int iFalse = 0;
while ((-1 != iSamples[k])(k i))
{
if (0 == iInput[iSamples[k]][i-1])
{
iFalse++;
}
else
{
iTrue++;
}
k++;
}
if (iTrue = iFalse)
{
return 1;
}
else
{
return 0;
}
}
int check_attribute_null(int *iAttribute)
{
int k = 0;
while (k (i-1))
{
if (-1 != iAttribute[k])
{
return 0;
}
k++;
}
return 1;
}
void get_attributes(int *iSamples,int *iAttributeValue,int iAttribute)
{
int k = 0;
int l = 0;
while ((-1 != iSamples[k])(k j))
{
l = 0;
while (-1 != iAttributeValue[l])
{
if (iInput[iSamples[k]][iAttribute] == iAttributeValue[l])
{
break;
}
l++;
}
if (-1 == iAttributeValue[l])
{
iAttributeValue[l] = iInput[iSamples[k]][iAttribute];
}
k++;
}
}
1、熵是描述區(qū)域的隨機程度的,P=ΣC*logC,C是灰度概率值,當圖像均勻時,各灰度值的概率基本相等,熵可以達到最大
2、例程:
#includeiostream.h
#includemath.h
int?i,j;
double?rowsum(double?table[][4],int?nrow)//定義第i行的邊際概率函數
{
for(i=0;inrow;i++)
{
for(?j=1;j4;j++)
table[i][0]+=table[i][j];?
}
return?0;
}
double?liesum(double?table[4][4],int?nlie)//定義第j列的邊際概率函數
{?for(?j=0;jnlie;j++)
{
for(?i=1;i4;i++)
table[0][j]+=table[i][j];
}
return?0;
}
void?main()
{?
double?p[4][4]={{1.0/8.0,1.0/16.0,1.0/32.0,1.0/32.0},{1.0/16.0,1.0/8.0,1.0/32.0,1.0/32.0},
{1.0/16.0,1.0/16.0,1.0/16.0,1.0/16.0},{1.0/4.0,0.0,0.0,0.0}};
for?(?i=0;i4;i++)//輸出概率矩陣
{
for?(?j=0;j4;j++)
coutp[i][j]"?";
coutendl;??????
}coutendl;?
rowsum(p,4);//調用函數輸出第i行的邊際概率?
for?(i?=0;i4;i++)
{cout"第"i"行的邊際概率p""["i"]""是"p[i][0]endl;}coutendl;
liesum(p,4);//調用函數輸出第j列的邊際概率
for?(?j?=0;j4;j++)
{cout"第"j"列的條件概率p""["j"]""是"p[0][j]endl;}coutendl;
//?double?p[4][4];
double?H1=0.0;
for(?i=0;i4;i++)
{H1+=p[i][0]*(log((1.0/p[i][0])/log(2.0)));}
double?H2=0.0;
for(?j=0;j4;j++)
{H2+=p[0][j]*(log((1.0/p[0][j])/log(2.0)));}
double?H3=0.0;?
for(i=0;i3;i++)
for(j=0;j4;j++)?
{H3+=p[i][j]*(log(1.0/p[i][j])/log(2.0));}
H3+=p[4][1]*(log(1.0/p[4][1])/log(2.0));
cout"X的熵:H(X)="H1endl;
cout"Y的熵:H(Y)="H2endl;
cout"(X,Y)的熵:H(X,Y)="H3endl;
coutendl;
cout"條件熵:H(X|Y)="H3-H2endl;
cout"條件熵:H(Y|X)="H3-H1endl;
cout"互信息:I(X;Y)="H1+H2-H3endl;
int?size=4;//定義聯合概率p為維數組
double?*p;
p=new?double[size];?
for?(?i=0;i4;i++)//聯合概率計算
{
for?(?j=0;j4;j++)
{
/*int?nSize;?
scanf(?"%d",?nSize?);?
int?*p?=?(?int*?)malloc(?sizeof(?int?)?*?nSize?);?
for(?int?i?=?0;?i??nSize;?i++?)?
p[?i?]?=?0;
double?table[4][4];
p[i]=pp[0][i]*table[i][j];
cout"聯合概率""p""["i"]""["j"]""是"p[i]endl;
}
}
for?(?i=0;i4;i++)//聯合熵的計算
{
for?(?j=0;j4;j++)
{???
//?H+=p[i][j]*log(1.0/p[i][j]);
H+=p[i]*(log((1.0/p[i])/log(2.0)));
}
}
cout"聯合H(x,y)熵為"Hendl;
delete?[]p;?*/
}
#include stdio.h
#include string.h
#includemath.h
int main()//是少了main函數,程序里面一定要有main函數的
{
double p[100];//每個信源的概率
int n;//信源個數
int i;
double sum=0;
scanf("%d",n);
for(i=0;in;i++)
{
scanf("%lf",p[i]);
sum+=-p[i]*(log(p[i])/log(2.0));
}
printf("%lf\n",sum);
return 0;
}
當前文章:c語言熵函數 c的熵是多少
網站地址:http://chinadenli.net/article40/hpiseo.html
成都網站建設公司_創(chuàng)新互聯,為您提供網站營銷、品牌網站設計、品牌網站建設、移動網站建設、服務器托管、網站維護
聲明:本網站發(fā)布的內容(圖片、視頻和文字)以用戶投稿、用戶轉載內容為主,如果涉及侵權請盡快告知,我們將會在第一時間刪除。文章觀點不代表本網站立場,如需處理請聯系客服。電話:028-86922220;郵箱:631063699@qq.com。內容未經允許不得轉載,或轉載時需注明來源: 創(chuàng)新互聯