class node:
def __init__(self):
self.subtree = []
class decisionTree:
def __init__(self):
self.raw_data = []
self.raw_data_lines = 0
self.a = []
self.a_num = 0
def load_data(self, path):
fp = open(path, "r")
lines = 0
line = fp.readline()
line = line[0:-1]
line = line.split(",")
for i in line[1:-1]:
self.a.append(i)
self.a_num = len(self.a)
while True:
line = fp.readline()
if line == "":
break
line = line[0:-1]
line = line.split(",")
self.raw_data.append(line)
lines+=1
self.raw_data_lines = lines
def get_lines(self):
return self.raw_data_lines
def tree_generate(self, D, A, node):
tc = {}
for i in D:
_c = self.raw_data[i][7]
if not _c in tc:
tc[_c] = 0
tc[_c] += 1
max_i = ""
for i in tc:
if max_i == "":
max_i = i
if tc[max_i] < tc[i]:
max_i = i
_max_c = tc[max_i]
c = ""
for i in D:
if c == "":
c = self.raw_data[i][7]
if self.raw_data[i][7] != c:
c = ""
break
if c!= "":
node["C"] = c
node["leaf"] = 1
return
c = ""
for j in A:
c = ""
for i in D:
if c == "":
c = self.raw_data[i][7]
if self.raw_data[i][7] != c:
c = ""
break
if c == "":
break
if c != "" or len(A) == 0:
node["C"] = _max_c
node["leaf"] = 1
return
AV = []
for i in A:
AV.append(i)
a_star = AV.pop()
ta = []
for i in D:
_a = self.raw_data[i][a_star]
if not _a in ta:
ta.append(_a)
for _a_star_v in ta:
DV = []
for i in D:
if self.raw_data[i][a_star] == _a_star_v:
DV.append(i)
new_node = {"D":DV, "A":AV, "AS":_a_star_v, "C": 0, "child":[], "leaf":0}
node["child"].append(new_node)
if len(DV) == 0:
new_node["C"] = _max_c
new_node["leaf"] = 1
return
else:
self.tree_generate(DV, AV, new_node)
def show_tree(self, node, tab, col):
star_leaf = ""
if node["leaf"] == 1:
star_leaf = "*"
if col == 0:
print(tab, "Class:",node["C"], node["AS"], " ", star_leaf)
if col == 1:
print(tab, "Class:",node["C"], " A[", node["A"], "]", star_leaf)
if col == 2:
print(tab, "Class:",node["C"], " D[", node["D"], "]", star_leaf)
for child in node["child"]:
self.show_tree(child, tab+"\t", col)
dt = decisionTree()
dt.load_data("../dataset/data3.0.csv")
D = [ i for i in range(dt.raw_data_lines) ]
A = [ i for i in range(1, dt.a_num+1) ]
node = {"D":D, "A":A, "AS":"", "C": 0, "child":[], "leaf":0}
dt.tree_generate(D, A, node)
dt.show_tree(node, "", 0)
dt.show_tree(node, "", 1)
dt.show_tree(node, "", 2)