Chat GPT gg - HackMD

# Chat GPT gg ## Decision tree - py decision tree without lib ```python class TreeNode: def __init__(self, data, target): self.data = data # Data points at this node self.target = target # Target values at this node self.feature_index = None # Index of feature to split on self.threshold = None # Threshold value for splitting self.left = None # Left subtree self.right = None # Right subtree def calculate_gini(targets): # Calculate Gini impurity for a set of target values total_samples = len(targets) if total_samples == 0: return 0.0 class_counts = {0: 0, 1: 0} for target in targets: class_counts[target] += 1 gini = 1.0 for class_count in class_counts.values(): class_probability = class_count / total_samples gini -= class_probability ** 2 return gini def split_data(data, targets, feature_index, threshold): left_data, left_targets, right_data, right_targets = [], [], [], [] for i in range(len(data)): if data[i][feature_index] <= threshold: left_data.append(data[i]) left_targets.append(targets[i]) else: right_data.append(data[i]) right_targets.append(targets[i]) return left_data, left_targets, right_data, right_targets def find_best_split(data, targets): best_gini = 1.0 best_feature_index = None best_threshold = None for feature_index in range(len(data[0])): unique_values = list(set(point[feature_index] for point in data)) unique_values.sort() for i in range(len(unique_values) - 1): threshold = (unique_values[i] + unique_values[i + 1]) / 2 left_data, left_targets, right_data, right_targets = split_data(data, targets, feature_index, threshold) gini = (len(left_data) / len(data)) * calculate_gini(left_targets) + \ (len(right_data) / len(data)) * calculate_gini(right_targets) if gini < best_gini: best_gini = gini best_feature_index = feature_index best_threshold = threshold return best_feature_index, best_threshold def build_tree(data, targets): if len(set(targets)) == 1: return TreeNode(data, targets[0]) best_feature_index, best_threshold = find_best_split(data, targets) left_data, left_targets, right_data, right_targets = split_data(data, targets, best_feature_index, best_threshold) left_subtree = build_tree(left_data, left_targets) right_subtree = build_tree(right_data, right_targets) root = TreeNode(data, targets) root.feature_index = best_feature_index root.threshold = best_threshold root.left = left_subtree root.right = right_subtree return root def predict(tree, point): if tree.left is None and tree.right is None: return tree.target if point[tree.feature_index] <= tree.threshold: return predict(tree.left, point) else: return predict(tree.right, point) # Example usage if __name__ == "__main__": # Sample data and targets data = [ [2, 3], [3, 6], [4, 4], [5, 7], [6, 6] ] targets = [0, 1, 0, 1, 1] # Build the decision tree decision_tree = build_tree(data, targets) # Make predictions test_point = [4.5, 5] prediction = predict(decision_tree, test_point) print("Predicted class:", prediction) ``` ## Random Forest - py random forest without lib, utilize tree code above ```python import random class RandomForest: def __init__(self, num_trees, max_features=None): self.num_trees = num_trees self.max_features = max_features self.trees = [] def fit(self, data, targets): for _ in range(self.num_trees): tree_data, tree_targets = self.bootstrap_sample(data, targets) tree = build_tree(tree_data, tree_targets) self.trees.append(tree) def bootstrap_sample(self, data, targets): sample_data, sample_targets = [], [] num_samples = len(data) for _ in range(num_samples): index = random.randint(0, num_samples - 1) sample_data.append(data[index]) sample_targets.append(targets[index]) return sample_data, sample_targets def predict(self, point): predictions = [predict(tree, point) for tree in self.trees] majority_vote = max(set(predictions), key=predictions.count) return majority_vote # Example usage if __name__ == "__main__": # Sample data and targets data = [ [2, 3], [3, 6], [4, 4], [5, 7], [6, 6] ] targets = [0, 1, 0, 1, 1] # Build the random forest num_trees = 3 max_features = 1 # Number of features to consider at each split random_forest = RandomForest(num_trees, max_features) random_forest.fit(data, targets) # Make predictions test_point = [4.5, 5] prediction = random_forest.predict(test_point) print("Random Forest Predicted class:", prediction) ```