# Chat GPT gg
## Decision tree
- py decision tree without lib
```python
class TreeNode:
def __init__(self, data, target):
self.data = data # Data points at this node
self.target = target # Target values at this node
self.feature_index = None # Index of feature to split on
self.threshold = None # Threshold value for splitting
self.left = None # Left subtree
self.right = None # Right subtree
def calculate_gini(targets):
# Calculate Gini impurity for a set of target values
total_samples = len(targets)
if total_samples == 0:
return 0.0
class_counts = {0: 0, 1: 0}
for target in targets:
class_counts[target] += 1
gini = 1.0
for class_count in class_counts.values():
class_probability = class_count / total_samples
gini -= class_probability ** 2
return gini
def split_data(data, targets, feature_index, threshold):
left_data, left_targets, right_data, right_targets = [], [], [], []
for i in range(len(data)):
if data[i][feature_index] <= threshold:
left_data.append(data[i])
left_targets.append(targets[i])
else:
right_data.append(data[i])
right_targets.append(targets[i])
return left_data, left_targets, right_data, right_targets
def find_best_split(data, targets):
best_gini = 1.0
best_feature_index = None
best_threshold = None
for feature_index in range(len(data[0])):
unique_values = list(set(point[feature_index] for point in data))
unique_values.sort()
for i in range(len(unique_values) - 1):
threshold = (unique_values[i] + unique_values[i + 1]) / 2
left_data, left_targets, right_data, right_targets = split_data(data, targets, feature_index, threshold)
gini = (len(left_data) / len(data)) * calculate_gini(left_targets) + \
(len(right_data) / len(data)) * calculate_gini(right_targets)
if gini < best_gini:
best_gini = gini
best_feature_index = feature_index
best_threshold = threshold
return best_feature_index, best_threshold
def build_tree(data, targets):
if len(set(targets)) == 1:
return TreeNode(data, targets[0])
best_feature_index, best_threshold = find_best_split(data, targets)
left_data, left_targets, right_data, right_targets = split_data(data, targets, best_feature_index, best_threshold)
left_subtree = build_tree(left_data, left_targets)
right_subtree = build_tree(right_data, right_targets)
root = TreeNode(data, targets)
root.feature_index = best_feature_index
root.threshold = best_threshold
root.left = left_subtree
root.right = right_subtree
return root
def predict(tree, point):
if tree.left is None and tree.right is None:
return tree.target
if point[tree.feature_index] <= tree.threshold:
return predict(tree.left, point)
else:
return predict(tree.right, point)
# Example usage
if __name__ == "__main__":
# Sample data and targets
data = [
[2, 3],
[3, 6],
[4, 4],
[5, 7],
[6, 6]
]
targets = [0, 1, 0, 1, 1]
# Build the decision tree
decision_tree = build_tree(data, targets)
# Make predictions
test_point = [4.5, 5]
prediction = predict(decision_tree, test_point)
print("Predicted class:", prediction)
```
## Random Forest
- py random forest without lib, utilize tree code above
```python
import random
class RandomForest:
def __init__(self, num_trees, max_features=None):
self.num_trees = num_trees
self.max_features = max_features
self.trees = []
def fit(self, data, targets):
for _ in range(self.num_trees):
tree_data, tree_targets = self.bootstrap_sample(data, targets)
tree = build_tree(tree_data, tree_targets)
self.trees.append(tree)
def bootstrap_sample(self, data, targets):
sample_data, sample_targets = [], []
num_samples = len(data)
for _ in range(num_samples):
index = random.randint(0, num_samples - 1)
sample_data.append(data[index])
sample_targets.append(targets[index])
return sample_data, sample_targets
def predict(self, point):
predictions = [predict(tree, point) for tree in self.trees]
majority_vote = max(set(predictions), key=predictions.count)
return majority_vote
# Example usage
if __name__ == "__main__":
# Sample data and targets
data = [
[2, 3],
[3, 6],
[4, 4],
[5, 7],
[6, 6]
]
targets = [0, 1, 0, 1, 1]
# Build the random forest
num_trees = 3
max_features = 1 # Number of features to consider at each split
random_forest = RandomForest(num_trees, max_features)
random_forest.fit(data, targets)
# Make predictions
test_point = [4.5, 5]
prediction = random_forest.predict(test_point)
print("Random Forest Predicted class:", prediction)
```