[ML]Decision Tree

2022-1-25 写技术


class node:
    def __init__(self):
        self.subtree = []

class decisionTree:
    def __init__(self):
        self.raw_data = []
        self.raw_data_lines = 0
        self.a = []
        self.a_num = 0

    def load_data(self, path):
        fp = open(path, "r")
        lines = 0

        line = fp.readline()
        line = line[0:-1]
        line = line.split(",")
        for i in line[1:-1]:
            self.a.append(i)
        self.a_num = len(self.a)
        while True:
            line = fp.readline()
            if line == "":
                break
            line = line[0:-1]
            line = line.split(",")
            self.raw_data.append(line)
            lines+=1

        self.raw_data_lines = lines

    def get_lines(self):
        return self.raw_data_lines

    def tree_generate(self, D, A, node):
        tc = {}
        for i in D:
            _c = self.raw_data[i][7]
            if not _c in tc:
                tc[_c] = 0
            tc[_c] += 1
        max_i = ""
        for i in tc:
            if max_i == "":
                max_i = i
            if tc[max_i] < tc[i]:
                max_i = i
        _max_c = tc[max_i]

        c = ""
        for i in D:
            if c == "":
                c = self.raw_data[i][7]
            if self.raw_data[i][7] != c:
                c = ""
                break
        if c!= "":
            node["C"] = c
            node["leaf"] = 1
            return

        c = ""
        for j in A:
            c = ""
            for i in D:
                if c == "":
                    c = self.raw_data[i][7]
                if self.raw_data[i][7] != c:
                    c = ""
                    break
            if c == "":
                break

        if c != "" or len(A) == 0:
            node["C"] = _max_c
            node["leaf"] = 1
            return

        
        AV = []
        for i in A:
            AV.append(i)

        a_star = AV.pop()
        ta = []
        for i in D:
            _a = self.raw_data[i][a_star]
            if not _a in ta:
                ta.append(_a)

        for _a_star_v in ta:
            DV = []
            for i in D:
                if self.raw_data[i][a_star] == _a_star_v:
                    DV.append(i)

            new_node = {"D":DV, "A":AV, "AS":_a_star_v, "C": 0, "child":[], "leaf":0}
            node["child"].append(new_node)
            if len(DV) == 0:
                new_node["C"] = _max_c
                new_node["leaf"] = 1
                return
            else:
                self.tree_generate(DV, AV, new_node)

    def show_tree(self, node, tab, col):
        star_leaf = ""
        if node["leaf"] == 1:
            star_leaf = "*"

        if col == 0:
            print(tab, "Class:",node["C"], node["AS"], " ",  star_leaf)
        if col == 1:
            print(tab, "Class:",node["C"], " A[", node["A"], "]", star_leaf)
        if col == 2:
            print(tab, "Class:",node["C"], " D[", node["D"], "]", star_leaf)

        for child in node["child"]:
            self.show_tree(child, tab+"\t", col)

dt = decisionTree()
dt.load_data("../dataset/data3.0.csv")

D = [ i for i in range(dt.raw_data_lines) ]
A = [ i for i in range(1, dt.a_num+1) ]


node = {"D":D, "A":A, "AS":"", "C": 0, "child":[], "leaf":0}
dt.tree_generate(D, A, node)

dt.show_tree(node, "", 0)
dt.show_tree(node, "", 1)
dt.show_tree(node, "", 2)

标签: machine_learning

发表评论:

Powered by anycle 湘ICP备15001973号-1