chatbot-ratings-ab-experiments
In [ ]:
Goal :¶
- We will check whether there is difference in performance of two different versions of chatbots.
- If there is difference in two chatbots then find which chatbot performs better.
In [1]:
# ChatbotA - give response in less words
# chatbotB - give response in max words with providing links
In [2]:
# data Manipulation - first we check information about data if any problems we will fix it.
# import data_manipulation from AB_test
from AB_experiment import data_manipulation
#create alias to call data_manipulation
dm=data_manipulation()
data='chatbot_ratings.csv'
column1="group"
column2=["issue_solve","num_steps"]
quartile1=0.25
quartile3=0.75
info = True
download_df=False
filename='new'
dm.data_info(data,column1,column2,quartile1,quartile3,info,download_df,filename)
Out[2]:
{'1': ['dataframe_shape', {'Observations': 30000, 'Column': 5}], '2': ['missing_data_info', {'No missing values'}], '3': ['outliers_info', [{'variable_name num_steps': 'No outliers present'}]], '4': ['data_types', [{'object_values': "['name', 'group']"}, {'float_values': '[]'}, {'int_values': ['user_id', 'num_steps']}, {'bool_val': ['issue_solve']}]], '5': ['numerical_Variables', ['user_id', 'num_steps']], '6': ['Categorical_variables', ['name', 'group', 'issue_solve']], '7': [{'Unique values count for variable': group chatbotA 15000 chatbotB 15000}, {'Unique values count for variable': issue_solve False 15129 True 14871}, {'Unique values count for variable': num_steps 6 5014 3 4918 4 4828 5 4804 7 4767 8 1898 10 1892 9 1879}], '8': [['Descriptive statistics-numerical_Variables', user_id num_steps count 30000.000000 30000.000000 mean 501525.376600 5.751800 std 288593.552977 2.050009 min 30.000000 3.000000 25% 250807.500000 4.000000 50% 503787.000000 6.000000 75% 752260.250000 7.000000 max 999983.000000 10.000000, '********************'], ['Descriptive statistics-Categorical_variables', name group issue_solve count 30000 30000 30000 unique 25718 2 2 top Joseph Smith chatbotA False freq 14 15000 15129, '********************']], '9': ['category_stats', [ num_steps count median mean std min max group chatbotA 15000 6.0 6.4984 2.301728 3 10 chatbotB 15000 5.0 5.0052 1.411561 3 7]], '10': ['Dataframe', user_id name group issue_solve num_steps 0 92586 Christopher Smith chatbotA True 7 1 725394 Jason Vaughn chatbotA True 5 2 393251 Mark Price chatbotA True 9 3 473455 Dylan Russo chatbotA True 4 4 488141 Douglas Brown chatbotA True 3]}
In [ ]:
In [3]:
# Convert variable num_steps into binary variable because lower num steps gives more priority.
# import data_manipulation from AB_test
from AB_experiment import data_manipulation
#create alias to call data_manipulation
dm=data_manipulation()
data = 'chatbot_ratings.csv'
column_name = "num_steps"
threshold = 5
condition= 'less'
dm.convert_to_binary(data, column_name, threshold, condition, download_df=True, filename='new')
Out[3]:
{'Successfully convert varaible num_steps into binary variable'}
In [ ]:
In [4]:
#From above function we have converted variable into binary variable hence we also convert its datatype into bool for better analysis.
data='new.csv'
change_variables=['binary_num_steps']
dtype=['bool']
drop_variables=[]
download_df=True
filename='new'
dm.change_variables(data,change_variables,dtype,drop_variables,download_df,filename)
Out[4]:
{'Variable1': ['binary_num_steps', dtype('bool')]}
In [ ]:
In [5]:
#After converting variable num_steps into binary variable again check data_info
# import data_manipulation from AB_test
from AB_experiment import data_manipulation
#create alias to call data_manipulation
dm=data_manipulation()
data='new.csv'
column1="group"
column2=["issue_solve","binary_num_steps"]
quartile1=0.25
quartile3=0.75
info = True
download_df=False
filename='new'
dm.data_info(data,column1,column2,quartile1,quartile3,info,download_df,filename)
Out[5]:
{'1': ['dataframe_shape', {'Observations': 30000, 'Column': 6}], '2': ['missing_data_info', {'No missing values'}], '3': ['outliers_info', []], '4': ['data_types', [{'object_values': "['name', 'group']"}, {'float_values': '[]'}, {'int_values': ['user_id', 'num_steps']}, {'bool_val': ['issue_solve', 'binary_num_steps']}]], '5': ['numerical_Variables', ['user_id', 'num_steps']], '6': ['Categorical_variables', ['name', 'group', 'issue_solve', 'binary_num_steps']], '7': [{'Unique values count for variable': group chatbotA 15000 chatbotB 15000}, {'Unique values count for variable': issue_solve False 15129 True 14871}, {'Unique values count for variable': num_steps 6 5014 3 4918 4 4828 5 4804 7 4767 8 1898 10 1892 9 1879}, {'Unique values count for variable': binary_num_steps False 15450 True 14550}], '8': [['Descriptive statistics-numerical_Variables', user_id num_steps count 30000.000000 30000.000000 mean 501525.376600 5.751800 std 288593.552977 2.050009 min 30.000000 3.000000 25% 250807.500000 4.000000 50% 503787.000000 6.000000 75% 752260.250000 7.000000 max 999983.000000 10.000000, '********************'], ['Descriptive statistics-Categorical_variables', name group issue_solve binary_num_steps count 30000 30000 30000 30000 unique 25718 2 2 2 top Joseph Smith chatbotA False False freq 14 15000 15129 15450, '********************']], '9': ['category_stats', []], '10': ['Dataframe', user_id name group issue_solve num_steps / 0 92586 Christopher Smith chatbotA True 7 1 725394 Jason Vaughn chatbotA True 5 2 393251 Mark Price chatbotA True 9 3 473455 Dylan Russo chatbotA True 4 4 488141 Douglas Brown chatbotA True 3 binary_num_steps 0 False 1 True 2 False 3 True 4 True ]}
In [ ]:
In [6]:
# From above output info we can say that in our data there is no outliers , no missing values present
# and datatypes of all variables correct
# Now we findout sample size
In [10]:
#fist we findout baseline conversion rate for target variable issue_solve
# import stats_test from AB_test
from AB_experiment import stats_test
#create alias to call stats_test
st=stats_test()
data='new.csv'
column1="group"
column1_value='chatbotA'
#for target variable 'issue_solve'
a = st.baseline_conversion_rate(data,column1,column1_value,column2='issue_solve',bool_var=True)
#for target variable 'num_steps'
b = st.baseline_conversion_rate(data,column1,column1_value,column2='binary_num_steps',bool_var=True)
a,b
Out[10]:
({'Baseline conversion rate(p1) of group chatbotA': 0.4923}, {'Baseline conversion rate(p1) of group chatbotA': 0.3771})
In [ ]:
In [11]:
#for target variable 'issue_solve'
#import stats_test from AB_test
from AB_experiment import stats_test
#create alias to call stats_test
st=stats_test()
p1= 0.4923
mde=0.02
alpha=0.05
power=0.8
n_side=2
st.sample_size(p1,mde,alpha,power,n_side)
Out[11]:
{'Sample size': 9809}
In [12]:
#for target variable 'num_steps'
# In this problem we select Baseline conversion rate(p1) of group chatbotA for less than equal to threshold value 5': 0.3771.
# import stats_test from AB_test
from AB_experiment import stats_test
#create alias to call stats_test
st=stats_test()
p1= 0.3771
mde=0.02
alpha=0.05
power=0.8
n_side=2
st.sample_size(p1,mde,alpha,power,n_side)
Out[12]:
{'Sample size': 9245}
In [ ]:
In [13]:
# Now we check assumptions for all combinations to perform statistical tests for AB testing
# import stats_test from AB_test
from AB_experiment import stats_test
#create alias to call stats_test
st=stats_test()
#for target variable 'issue_solve'
data='new.csv'
sample_size=9809
column1="group"
column1_value1='chatbotA'
column1_value2='chatbotB'
column2="issue_solve"
alpha=0.05
paired_data=False
st.AB_Test_assumption(data, sample_size, column1, column1_value1, column1_value2, column2, alpha, paired_data)
Out[13]:
({'Target variable is boolean data type': 'Use Chi-Squared Test'}, {'Note': 'If our data involve time-to-event or survival analysis (e.g., time until a user completes a task), we can use methods such as the log-rank test'})
In [14]:
#for target variable 'num_steps'
data='new.csv'
sample_size=9245
column1="group"
column1_value1='chatbotA'
column1_value2='chatbotB'
column2="binary_num_steps"
alpha=0.05
paired_data=False
st.AB_Test_assumption(data, sample_size, column1, column1_value1, column1_value2, column2, alpha, paired_data)
Out[14]:
({'Target variable is boolean data type': 'Use Chi-Squared Test'}, {'Note': 'If our data involve time-to-event or survival analysis (e.g., time until a user completes a task), we can use methods such as the log-rank test'})
In [ ]:
By checking assumptions we use Chi-Squared Test for AB Testing¶
Define the null and alternative hypotheses :
- Null hypothesis (H0): There is no significant difference in the proportions of the two variables.
- Alternative hypothesis(Ha): There is significant difference in the proportions of the two variables.
In [15]:
#for target variable 'num_steps'
# import stats_test from AB_test
from AB_experiment import stats_test
#create alias to call stats_test
st=stats_test()
# perform chi-square test
data='new.csv'
sample_size=9809
column1='group'
column1_value1='chatbotA'
column1_value2='chatbotB'
column2='issue_solve'
alpha=0.05
reverse_experiment=False
st.chi_squared_test(data, sample_size, column1, column1_value1, column1_value2, column2, alpha, reverse_experiment)
Out[15]:
{'Test name': 'Chi-square test', 'Control group': 'chatbotA', 'Treatment group': 'chatbotB', 'Timestamp': '2023-08-29 19:26:51', 'Sample size': 9809, 'Status ': 'We do not reject null hypothesis => No significant difference between groups chatbotA and chatbotB', 'P-value': 0.26535, 'alpha ': 0.05, 'Test Statistic': 1.2406275749088074, 'Proportion of group chatbotA': 0.4908, 'Proportion of group chatbotB': 0.4988, 'Confidence interval of group chatbotA': (0.48088, 0.50067), 'Confidence interval of group chatbotB': (-0.02205, 0.00594), 'Confidence interval of difference in groups': (0.48893, 0.50872)}
In [ ]:
In [16]:
#for target variable 'num_steps'
#mann_whitney_U_test functions
from AB_experiment import stats_test
st = stats_test()
# perform chi-square test
data='new.csv'
sample_size=9245
column1='group'
column1_value1='chatbotA'
column1_value2='chatbotB'
column2='binary_num_steps'
alpha=0.05
reverse_experiment=False
st.chi_squared_test(data, sample_size, column1, column1_value1, column1_value2, column2, alpha, reverse_experiment)
Out[16]:
{'Test name': 'Chi-square test', 'Control group': 'chatbotA', 'Treatment group': 'chatbotB', 'Timestamp': '2023-08-29 19:26:56', 'Sample size': 9245, 'Status': 'We can reject H0 => group chatbotB is more successful', 'P-value': 0.0, 'alpha': 0.05, 'Test Statistic': 897.5533557611235, 'Proportion of group chatbotA': 0.375, 'Proportion of group chatbotB': 0.5953, 'Confidence interval of group chatbotA': (0.36514, 0.38488), 'Confidence interval of group chatbotB': (-0.23439, -0.20628), 'Confidence interval of difference in groups': (0.58534, 0.60535)}
Conclusion :¶
- From first test we conclude that there is no significant difference between the proportions of the two variables with respect to target variable issue_solve.
- From second test We proved that there is significant difference between the proportions of the two variables with respect to target variable binary_num_steps and group chatbotB is performs better with proportion of 0.5953
In [ ]: