Salifort Motors - Team Summary ¶
Document Information¶
Document Title | Salifort Motors - Team Summary |
Author | Rod Slater |
Version | 1.0 |
Created | 01-11-2023 |
Modified | 16-11-2023 |
Client Details¶
Client Name | Salifort Motors |
Client Contact | Mr HR Team |
Client Email | hr@salifortmotors.it |
Client Project | HR Team Data Driven Solutions from Machine Learning Models |
import pickle
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
# set Pandas Display Options
pd.options.display.float_format = '{:.2f}'.format
pd.set_option('display.precision', 2)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', 120)
# Add a little colour
class color:
PURPLE = '\033[95m'
CYAN = '\033[96m'
DARKCYAN = '\033[36m'
BLUE = '\033[94m'
GREEN = '\033[92m'
YELLOW = '\033[93m'
RED = '\033[91m'
BOLD = '\033[1m'
UNDERLINE = '\033[4m'
END = '\033[0m'
load_path = "/home/hass/Documents/Learning/Salifort-Motors-Capstone-Project/00-data_cleaned/"
save_path = "/home/hass/Documents/Learning/Salifort-Motors-Capstone-Project/00-data_cleaned/"
# Load dataset into a dataframe
#df = pd.read_csv(load_path + "data_cleaned_NoOl_FE_AllFeat.csv") # 11991 rows, all data including outliers
df = pd.read_csv(load_path + "data_cleaned_Ol_NoFE_AllFeat.csv") # 11167 rows, no outliers (mostly from tenure)
#df.describe()
#team = 'management'
#var = 'last_eval'
def plot_compare(team, var):
team_name = team.upper() + " Dept"
cols = ['salary', 'dept', 'last_eval', 'tenure', 'satisfaction', 'left', 'number_project', 'avg_mnth_hrs']
left_df = df[(df['left'] == 1) & (df['dept'] == team)][['salary', 'dept', 'last_eval', 'tenure', 'satisfaction', 'left', 'number_project', 'avg_mnth_hrs']]
stayed_df = df[(df['left'] == 0) & (df['dept'] == team)][['salary', 'dept', 'last_eval', 'tenure', 'satisfaction', 'left', 'number_project', 'avg_mnth_hrs']]
all_df= df[['salary', 'dept', 'last_eval', 'tenure', 'satisfaction', 'left', 'number_project', 'avg_mnth_hrs']]
labels = df['salary'].unique()
# Set the style of the visualization
sns.set(style="darkgrid")
x_order = ['low','medium','high']
#plt.figure(figsize=(8, 6))
fig, axes = plt.subplots(nrows=2, ncols=3, figsize=(16, 8), sharey=True)
hsplit = all_df[var].mean()
# Variable by Salary
sns.violinplot(x='salary', y=var, data=all_df, hue='salary', order=x_order, ax=axes[0,0], palette='colorblind')
axes[0,0].set_title(f'{var} - All Employees')
axes[0,0].set_xlabel('Salary')
axes[0,0].set_ylabel(f'{var}')
axes[0,0].axhline(y=hsplit, color='red', linestyle='--', linewidth=2.5, label='Vertical Line 2')
axes[0,0].text(0.5, hsplit + .05, 'Company Average', color='red')
axes[0,0].set_xticks(labels)
axes[0,0].set_xticklabels(labels,rotation=0)
#plt.title(f'Employees who left, {var} by Department')
sns.violinplot(x='salary', y=var, data=left_df, hue='salary', order=x_order, ax=axes[0,1], palette='colorblind')
axes[0,1].set_title(f'{var} - {team_name} - Employees who Left')
axes[0,1].set_xlabel('Salary')
axes[0,1].set_ylabel(f'{var}')
axes[0,1].set_xticks(labels)
axes[0,1].axhline(y=hsplit, color='red', linestyle='--', linewidth=2.5, label='Vertical Line 2')
axes[0,1].text(0.5, hsplit + .05, 'Company Average', color='red')
axes[0,1].set_xticklabels(labels,rotation=0)
sns.violinplot(x='salary', y=var, data=stayed_df, hue='salary', order=x_order, ax=axes[0,2], palette='colorblind')
axes[0,2].set_title(f'{var} - {team_name} - Employees who Stayed')
axes[0,2].set_xlabel('Salary')
axes[0,2].set_ylabel(f'{var}')
axes[0,2].set_xticks(labels)
axes[0,2].axhline(y=hsplit, color='red', linestyle='--', linewidth=2.5, label='Vertical Line 2')
axes[0,2].text(0.5, hsplit + .05, 'Company Average', color='red')
axes[0,2].set_xticklabels(labels,rotation=0)
sns.violinplot(x='tenure', y=var, data=all_df, hue='tenure', ax=axes[1,0], palette='colorblind')
axes[1,0].set_title(f'{var} - All Employees')
axes[1,0].set_xlabel('Tenure')
axes[1,0].set_ylabel(f'{var}')
#axes[1,0].set_xticks(labels)
axes[1,0].axhline(y=hsplit, color='red', linestyle='--', linewidth=2.5, label='Vertical Line 2')
axes[1,0].text(0.5, hsplit + .05, 'Company Average', color='red')
#axes[1,0].set_xticklabels(labels,rotation=0)
axes[1,0].legend([], [], frameon=False)
sns.violinplot(x='tenure', y=var, data=left_df, hue='tenure', ax=axes[1,1], palette='colorblind')
axes[1,1].set_title(f'{var} - {team_name} - Employees who Left')
axes[1,1].set_xlabel('Tenure')
axes[1,1].set_ylabel(f'{var}')
#axes[1,0].set_xticks(labels)
axes[1,1].axhline(y=hsplit, color='red', linestyle='--', linewidth=2.5, label='Vertical Line 2')
axes[1,1].text(0.5, hsplit + .05, 'Company Average', color='red')
axes[1,1].legend([], [], frameon=False)
sns.violinplot(x='tenure', y=var, data=stayed_df, hue='tenure', ax=axes[1,2], palette='colorblind')
axes[1,2].set_title(f'{var} - {team_name} - Employees who Stayed')
axes[1,2].set_xlabel('Tenure')
axes[1,2].set_ylabel(f'{var}')
#axes[1,0].set_xticks(labels)
axes[1,2].axhline(y=hsplit, color='red', linestyle='--', linewidth=2.5, label='Vertical Line 2')
axes[1,2].text(0.5, hsplit + .05, 'Company Average', color='red')
axes[1,2].legend([], [], frameon=False)
plt.tight_layout()
# Show the plot
plt.show()
def plot_count_compare(team):
team_name = team.upper() + " Dept"
df[(df['left'] == 1) & (df['dept'] == team)]
left_df = df[(df['left'] == 1) & (df['dept'] == team)][['salary', 'dept', 'last_eval', 'tenure', 'satisfaction', 'left', 'number_project', 'avg_mnth_hrs']]
stayed_df = df[(df['left'] == 0) & (df['dept'] == team)][['salary', 'dept', 'last_eval', 'tenure', 'satisfaction', 'left', 'number_project', 'avg_mnth_hrs']]
all_df= df[['salary', 'dept', 'last_eval', 'tenure', 'satisfaction', 'left', 'number_project', 'avg_mnth_hrs']]
labels = df['salary'].unique()
# Set the style of the visualization
sns.set(style="darkgrid")
fig, axes = plt.subplots(nrows=1, ncols=3, figsize=(16, 6), sharey=False, )
x_order = ['low','medium','high']
#hsplit = all_df[var].mean()
sns.countplot(x='salary', data=all_df, order=x_order,ax=axes[0], hue='salary', palette='colorblind')
axes[0].set_title(f'All Employees')
axes[0].set_xlabel('Salary')
axes[0].set_ylabel(f'Count')
#axes[0].text(0.5, hsplit + .05, 'Company Average', color='red')
axes[0].set_xticks(labels)
axes[0].set_xticklabels(labels,rotation=90)
#plt.title(f'Employees who left, {var} by Department')
sns.countplot(x='salary', data=left_df, order=x_order, ax=axes[1], hue='salary', palette='colorblind')
axes[1].set_title(f'dept {team_name} - Employees who Left')
axes[1].set_xlabel('Salary')
axes[1].set_ylabel(f'Count')
axes[1].set_xticks(labels)
# axes[1].axhline(y=hsplit, color='red', linestyle='--', linewidth=2.5, label='Vertical Line 2')
#axes[1].text(0.5, hsplit + .05, 'Company Average', color='red')
axes[1].set_xticklabels(labels,rotation=90)
sns.countplot(x='salary', data=stayed_df, order=x_order, ax=axes[2], hue='salary',palette='colorblind')
axes[2].set_title(f'dept {team_name} - Employees who Stayed')
axes[2].set_xlabel('Salary')
axes[2].set_ylabel(f'Count')
axes[2].set_xticks(labels)
# axes[2].axhline(y=hsplit, color='red', linestyle='--', linewidth=2.5, label='Vertical Line 2')
#axes[2].text(0.5, hsplit + .05, 'Company Average', color='red')
axes[2].set_xticklabels(labels,rotation=90)
plt.tight_layout()
# Show the plot
plt.show()
# PLOT - Edit the variable below for the team to analyse
# 'sales', 'accounting', 'hr', 'technical', 'support', 'management', 'it', 'product_mng', 'marketing', 'randd'],
team = 'it'
Prepared for the IT Team¶
Introduction¶
I trust this summary will provide valuable insights into the dynamics of your team.
By analysing the available data on team members, together with the HR team you can begin to identify areas for improvement and develop strategies to enhance overall team performance and satisfaction.
Employee Tenure¶
- Reviewing the distribution of employee tenure within the team can offer insights into your team's stability.
- Identify trends in tenure and address any patterns that may indicate potential turnover risks or opportunities for mentorship and knowledge transfer.
var = 'tenure'
plot_compare(team, var)
Self-Reported Satisfaction¶
- Consider the employee's self-reported satisfaction levels.
- A deep dive into the factors influencing satisfaction can guide efforts to boost morale and engagement.
- Address specific concerns raised by team members to improve overall job satisfaction.
var = 'satisfaction'
plot_compare(team, var)
Management Evaluation Scores¶
- Examine the last evaluation scores you chose to gauge the perceived effectiveness of the team member.
- Identify patterns in feedback and recognize areas for improvement.
- Consider conducting feedback sessions to better understand team members' expectations and concerns.
var = 'last_eval'
plot_compare(team, var)
Work Hours¶
- Analyze the distribution of work hours among team members.
- Identify any instances of excessive overtime or uneven workloads.
- Balancing work hours is crucial for maintaining a healthy work-life balance and preventing burnout.
var = 'avg_mnth_hrs'
plot_compare(team, var)
Project Involvement¶
- Examine the number of projects each team member has been involved in.
- Assess whether workload distribution is equitable and aligned with individual capacities.
- Consider redistributing projects to ensure a fair and manageable workload for each team member.
var = 'number_project'
plot_compare(team, var)
Salary Levels¶
- Review the salary levels within the team.
- Address any disparities or concerns related to compensation that may affect employee satisfaction.
- Ensure that salary levels are competitive and reflective of employees' contributions.
var = 'salary'
plot_count_compare(team)
Employee Retention Prediction¶
Utilizing a machine learning model, the HR team have developed predictions for employee attrition within your team. The model takes into account various factors to identify potential risks. Please consider the provided predictions as a proactive tool for retention strategies.
Key Recommendations:
- Implement regular check-ins to discuss job satisfaction, career aspirations, and any challenges team members may be facing.
- Consider opportunities for professional development and career growth to enhance employee engagement.
- Ensure workload distribution is fair and aligned with individual skills and capacities.
- Address any concerns raised during management evaluation scores to strengthen leadership effectiveness.
Remember, this summary serves as a starting point for discussions and actions to improve team dynamics. I am available to discuss these findings further and collaborate on tailored strategies for your team.
Employees at risk of leaving¶
The following is a summary of the results from asking the ML Model to assess a risk of leaving for current team members.
# Read dataset used to train model
df = pd.read_csv(load_path + "data_cleaned_NoOl_FE_AllFeat.csv") # 11991 rows, all data including outliers
df.sort_index(axis = 1, inplace=True) # remember to sort the columns!
# Set risk thresholds for calcs
high_risk_threshold = 0.9
medium_risk_threshold = 0.7
predict_true_threshold = 0.5
# path to load/save pickled models
model_path = "/home/hass/Documents/Learning/Salifort-Motors-Capstone-Project/04-pickle-ML-models/"
# Load the trained XGBoost model
with open(model_path + '/hr_xg1-AllFeat.pickle', 'rb') as model_file_xg:
model_file_xg = pickle.load(model_file_xg)
temp_df = df[(df['left'] == 0) ]
features_current_empl = temp_df.drop(columns='left')
i = 0
# Predictions for CURRENT EMPLOYEES using model_file_xg for XGBoost
probabilities_cur = model_file_xg.predict_proba(features_current_empl)
leave_probabilities_cur = probabilities_cur[:, 1]
stay_probabilities_cur = probabilities_cur[:, 0]
# Add the probabilities to the new data DataFrame if needed
features_current_empl['leave_probability'] = leave_probabilities_cur
features_current_empl['stay_probability'] = stay_probabilities_cur
# Sort the df
predictions_cur = features_current_empl.sort_values(by='leave_probability', ascending=False)
# Summarise predictions
selected_department = team # Set in earlier cell
# Select the specific department column based on the variable
selected_dept_column = predictions_cur['dept_'+team]
# Select all other columns excluding the department columns not selected
other_columns = predictions_cur.drop(columns=[col for col in predictions_cur.columns if col.startswith('dept')])
# Concatenate the selected department column and other columns
new_df = pd.concat([selected_dept_column, other_columns], axis=1)
predictions_cur = new_df[new_df['dept_'+team]] # filter just dept = true
high_risk_count_cur = len(predictions_cur[predictions_cur['leave_probability'] > high_risk_threshold])
high_risk_perc_cur = (high_risk_count_cur / len(predictions_cur)) * 100
medium_risk_count_cur = len(predictions_cur[predictions_cur['leave_probability'] > medium_risk_threshold])
medium_risk_perc_cur = (medium_risk_count_cur / len(predictions_cur)) * 100
predict_risk_count_cur = len(predictions_cur[predictions_cur['leave_probability'] > predict_true_threshold])
predict_risk_perc_cur = (predict_risk_count_cur / len(predictions_cur)) * 100
if i == 0:
print("\n"+ color.BOLD + f"XGBoost Summary for CURRENT {team.upper()} Employees\n" + color.END)
print('\u2500' * 55)
else:
print("\n\n"+ color.BOLD + f"XGBoost Summary for LEFT Employees\n"+ color.END)
print('\u2500' * 55)
# Print the result
print(f"\n" + color.BOLD + f"probabilities > high_risk_threshold {high_risk_threshold:.0%}" + color.END)
print(f'Count of employees with leave probability above {high_risk_threshold:.0%} : {high_risk_count_cur:.0f}')
print(f'Percentage of employees with leave probability above {high_risk_threshold:.0%} : {high_risk_perc_cur:.2f}%')
print(f"\n" + color.BOLD + f"probabilities > medium_risk_threshold {medium_risk_threshold:.0%}" + color.END)
print(f'Count of employees with leave probability above {medium_risk_threshold:.0%} : {medium_risk_count_cur:.0f}')
print(f'Percentage of employees with leave probability above {medium_risk_threshold:.0%} : {medium_risk_perc_cur:.2f}%')
print(f"\n" + color.BOLD + f"probabilities > predict_risk_threshold {predict_true_threshold:.0%}" + color.END)
print(f'Count of employees with leave probability above {predict_true_threshold:.0%} : {predict_risk_count_cur:.0f}')
print(f'Percentage of employees with leave probability above {predict_true_threshold:.0%} : {predict_risk_perc_cur:.2f}%')
XGBoost Summary for CURRENT IT Employees ─────────────────────────────────────────────────────── probabilities > high_risk_threshold 90% Count of employees with leave probability above 90% : 1 Percentage of employees with leave probability above 90% : 0.13% probabilities > medium_risk_threshold 70% Count of employees with leave probability above 70% : 3 Percentage of employees with leave probability above 70% : 0.39% probabilities > predict_risk_threshold 50% Count of employees with leave probability above 50% : 7 Percentage of employees with leave probability above 50% : 0.92%