Created in May 2024 PDF: paulgalea.com/Projects/Titanic_Survival/Visualisation.pdf /* ..................... */ /* ........TOOLS........ */ /* ..................... */ Python Adobe Illustrator /* ..................... */ /* ....PYTHON SCRIPT.... */ /* ..................... */ import pandas as pd from sklearn.tree import DecisionTreeClassifier, plot_tree from sklearn.ensemble import RandomForestClassifier from sklearn.linear_model import LogisticRegression from sklearn.model_selection import cross_val_score import matplotlib.pyplot as plt # Data d = pd.read_excel(r'DATA LOCATION') d = d.dropna(subset=['Age']).assign(Survived=d['Survived'].map({'N':0,'Y':1}), Sex=d['Sex'].map({'M':1,'F':0})) d['Age'] = pd.cut(d['Age'], bins=[0,10,40,float('inf')], labels=['Age 0-9','Age 10-39','Age 40+'], right=False) print(d.groupby(['Sex','Position','Age'], observed=False)['Survived'].value_counts().unstack()) d = pd.concat([d,pd.get_dummies(d[['Position','Age']], prefix='', prefix_sep='').astype(int)], axis=1) # Model features, target = d.drop(columns=['Name','Age','Position','Survived']), d['Survived'] models, results, cv, random_state, class_weight = {}, {}, 5, 0, 'balanced' models = { 'Decision Tree':DecisionTreeClassifier(class_weight=class_weight, max_depth=3, random_state=random_state), 'Random Forest':RandomForestClassifier(class_weight=class_weight, random_state=random_state), 'Logistic Regression':LogisticRegression(class_weight=class_weight, random_state=random_state)} for name, model in models.items(): accuracy = cross_val_score(model, features, target, cv=cv, scoring='accuracy').mean() precision = cross_val_score(model, features, target, cv=cv, scoring='precision').mean() recall = cross_val_score(model, features, target, cv=cv, scoring='recall').mean() f1 = cross_val_score(model, features, target, cv=cv, scoring='f1').mean() results[name] = {'Name':name,'Accuracy':accuracy,'Precision':precision,'Recall':recall,'F1':f1} print('\nFeatures:', ', '.join(features.columns), '\n\n', round(pd.DataFrame(results.values()),2)) # Visual plot_tree(models['Decision Tree'].fit(features, target), feature_names=features.columns, class_names=['Lost','Saved'], filled=True, fontsize=7) plt.show() /* ..................... */ /* .......SOURCES....... */ /* ..................... */ Viewed online May 2024: 1. historyonthenet.com/the-titanic-why-did-people-believe-titanic-was-unsinkable 2. britannica.com/topic/titanic 3. deseret.com/23602652/titanic-lifeboats 4. encyclopedia-titanica.org/explorer (used for passenger count data) 5. titanic.fandom.com/wiki/women_and_children_first 6. espace.library.uq.edu.au/view/uq:152940 7. encyclopedia-titanica.org/titanic-deckplans 8. encyclopedia-titanica.org/titanics-aft-lifeboats.html 9. historyonthenet.com/last-night-titanic-cooks