/*---------------------------------------------------*/ /* Induction of Decision Trees (Sicstus Prolog V2+) */ /*---------------------------------------------------*/ /* (C) 1994 Zdravko Markov */ /*---------------------------------------------------*/ ?- op(100,fx,if). ?- op(99,xfy,then). /*---------------------------------------------------*/ id3 :- retractall(node(_,_,_)), retractall(if _ then _), findall(N,example(N,_,_),E), example(_,_,L), !, get_attributes(L,A), idt(E,root,A), assert_rules. idt([E],Parent,_) :- example(E,C,_), write(node(leaf,C,Parent)),nl, assertz(node(leaf,C,Parent)), !. idt(E,Parent,_) :- single_class(E,C), !, write(node(leaf,C,Parent)),nl, assertz(node(leaf,C,Parent)). idt(Es,Parent,As) :- choose_attribute(Es,As,A,Values,Rest), !, partition(Values,A,Es,Parent,Rest). idt(E,_,Attr) :- write('Error in data' - E/Attr),nl. single_class([],_) :- !. single_class([E|T],C) :- example(E,C,_), !, single_class(T,C). get_attributes([],[]) :- !. get_attributes([A=_|T],[A|W]) :- get_attributes(T,W). partition([],_,_,_,_) :- !. partition([V|Vs],A,Es,Parent,Rest) :- get_subset(Es,A=V,Ei), !, gen_name(Node), write(node(Node,A=V,Parent)),nl, assertz(node(Node,A=V,Parent)), idt(Ei,Node,Rest), !, partition(Vs,A,Es,Parent,Rest). choose_attribute(Es,As,A,Values,Rest) :- length(Es,LenEs), information_content(Es,LenEs,I), !, findall(A/Values/Gain, (member(A,As), get_values(Es,A,[],Values), split_into_subsets(Values,Es,A,Ess), residual_information(Ess,LenEs,R), Gain is I - R), All), max(All,A/Values/_), efface(A,As,Rest), !. split_into_subsets([],_,_,[]) :- !. split_into_subsets([V|Vs],Es,A,[Ei|Rest]) :- get_subset(Es,A=V,Ei), !, split_into_subsets(Vs,Es,A,Rest). residual_information([],_,0) :- !. residual_information([Ei|Es],Len,Res) :- length(Ei,LenEi), information_content(Ei,LenEi,I), !, residual_information(Es,Len,R), Res is R + I*LenEi/Len. information_content(Es,Len,I) :- setof(C,E^(member(E,Es),example(E,C,_)),Classes), !, sum_terms(Classes,Es,Len,I). sum_terms([],_,_,0) :- !. sum_terms([C|Cs],Es,Len,Info) :- bagof(E,(member(E,Es),example(E,C,_)),InC), length(InC,N), sum_terms(Cs,Es,Len,I), Info is I - (N/Len)*(log(N/Len)/log(2)). get_values([],_,Values,Values) :- !. get_values([E|Es],A,Vs,Values) :- example(E,_,L), member(A=V,L), !, (member(V,Vs), !, get_values(Es,A,Vs,Values); get_values(Es,A,[V|Vs],Values) ). get_subset([],_,[]) :- !. get_subset([E|Es],A,[E|W]) :- example(E,_,L), member(A,L), !, get_subset(Es,A,W). get_subset([_|Es],A,W) :- get_subset(Es,A,W). assert_rules :- path(root,Path,Conclusion), assertz(if Path then Conclusion), fail. assert_rules. path(Parent,[],Class) :- node(leaf,Class,Parent), !. path(Parent,[A|Path],Leaf) :- node(Son,A,Parent), path(Son,Path,Leaf). /*------------------- Auxiliary --------------------------*/ gen_name(M) :- retract(nam(N)), M is N+1, assert(nam(M)), !. gen_name(1) :- assert(nam(1)). efface(X,[X|T],T) :- !. efface(X,[Y|T],[Y|Z]) :- efface(X,T,Z). subset([],_) :- !. subset([X|T],L) :- member(X,L), !, subset(T,L). max([X],X) :- !. max([X/M|T],Y/N) :- max(T,Z/K), (M>K,Y/N=X/M;Y/N=Z/K), !. member(X,[X|_]). member(X,[_|T]) :- member(X,T).