苗火 Nicholas
[ML]Decision Tree
2022-1-25 萧


class node:
def __init__(self):
self.subtree = []

class decisionTree:
def __init__(self):
self.raw_data = []
self.raw_data_lines = 0
self.a = []
self.a_num = 0

def load_data(self, path):
fp = open(path, "r")
lines = 0

line = fp.readline()
line = line[0:-1]
line = line.split(",")
for i in line[1:-1]:
self.a.append(i)
self.a_num = len(self.a)
while True:
line = fp.readline()
if line == "":
break
line = line[0:-1]
line = line.split(",")
self.raw_data.append(line)
lines+=1

self.raw_data_lines = lines

def get_lines(self):
return self.raw_data_lines

def tree_generate(self, D, A, node):
tc = {}
for i in D:
_c = self.raw_data[i][7]
if not _c in tc:
tc[_c] = 0
tc[_c] += 1
max_i = ""
for i in tc:
if max_i == "":
max_i = i
if tc[max_i] < tc[i]:
max_i = i
_max_c = tc[max_i]

c = ""
for i in D:
if c == "":
c = self.raw_data[i][7]
if self.raw_data[i][7] != c:
c = ""
break
if c!= "":
node["C"] = c
node["leaf"] = 1
return

c = ""
for j in A:
c = ""
for i in D:
if c == "":
c = self.raw_data[i][7]
if self.raw_data[i][7] != c:
c = ""
break
if c == "":
break

if c != "" or len(A) == 0:
node["C"] = _max_c
node["leaf"] = 1
return


AV = []
for i in A:
AV.append(i)

a_star = AV.pop()
ta = []
for i in D:
_a = self.raw_data[i][a_star]
if not _a in ta:
ta.append(_a)

for _a_star_v in ta:
DV = []
for i in D:
if self.raw_data[i][a_star] == _a_star_v:
DV.append(i)

new_node = {"D":DV, "A":AV, "AS":_a_star_v, "C": 0, "child":[], "leaf":0}
node["child"].append(new_node)
if len(DV) == 0:
new_node["C"] = _max_c
new_node["leaf"] = 1
return
else:
self.tree_generate(DV, AV, new_node)

def show_tree(self, node, tab, col):
star_leaf = ""
if node["leaf"] == 1:
star_leaf = "*"

if col == 0:
print(tab, "Class:",node["C"], node["AS"], " ", star_leaf)
if col == 1:
print(tab, "Class:",node["C"], " A[", node["A"], "]", star_leaf)
if col == 2:
print(tab, "Class:",node["C"], " D[", node["D"], "]", star_leaf)

for child in node["child"]:
self.show_tree(child, tab+"\t", col)

dt = decisionTree()
dt.load_data("../dataset/data3.0.csv")

D = [ i for i in range(dt.raw_data_lines) ]
A = [ i for i in range(1, dt.a_num+1) ]


node = {"D":D, "A":A, "AS":"", "C": 0, "child":[], "leaf":0}
dt.tree_generate(D, A, node)

dt.show_tree(node, "", 0)
dt.show_tree(node, "", 1)
dt.show_tree(node, "", 2)
发表评论:
昵称

邮件地址 (选填)

个人主页 (选填)

内容