# ChatbotA - give response in less words
# chatbotB - give response in max words with providing links


# data Manipulation - first we check information about data if any problems we will fix it.

# import data_manipulation from AB_test
from AB_experiment import data_manipulation

#create alias to call data_manipulation
dm=data_manipulation()

data='chatbot_ratings.csv'
column1="group"
column2=["issue_solve","num_steps"]
quartile1=0.25
quartile3=0.75
info = True
download_df=False
filename='new'

dm.data_info(data,column1,column2,quartile1,quartile3,info,download_df,filename)

{'1': ['dataframe_shape', {'Observations': 30000, 'Column': 5}],
 '2': ['missing_data_info', {'No missing values'}],
 '3': ['outliers_info', [{'variable_name num_steps': 'No outliers present'}]],
 '4': ['data_types',
  [{'object_values': "['name', 'group']"},
   {'float_values': '[]'},
   {'int_values': ['user_id', 'num_steps']},
   {'bool_val': ['issue_solve']}]],
 '5': ['numerical_Variables', ['user_id', 'num_steps']],
 '6': ['Categorical_variables', ['name', 'group', 'issue_solve']],
 '7': [{'Unique values count for variable':           group
   chatbotA  15000
   chatbotB  15000},
  {'Unique values count for variable':        issue_solve
   False        15129
   True         14871},
  {'Unique values count for variable':     num_steps
   6        5014
   3        4918
   4        4828
   5        4804
   7        4767
   8        1898
   10       1892
   9        1879}],
 '8': [['Descriptive statistics-numerical_Variables',
                user_id     num_steps
   count   30000.000000  30000.000000
   mean   501525.376600      5.751800
   std    288593.552977      2.050009
   min        30.000000      3.000000
   25%    250807.500000      4.000000
   50%    503787.000000      6.000000
   75%    752260.250000      7.000000
   max    999983.000000     10.000000,
   '********************'],
  ['Descriptive statistics-Categorical_variables',
                   name     group issue_solve
   count          30000     30000       30000
   unique         25718         2           2
   top     Joseph Smith  chatbotA       False
   freq              14     15000       15129,
   '********************']],
 '9': ['category_stats',
  [         num_steps                                 
                count median    mean       std min max
   group                                              
   chatbotA     15000    6.0  6.4984  2.301728   3  10
   chatbotB     15000    5.0  5.0052  1.411561   3   7]],
 '10': ['Dataframe',
     user_id               name     group  issue_solve  num_steps
  0    92586  Christopher Smith  chatbotA         True          7
  1   725394       Jason Vaughn  chatbotA         True          5
  2   393251         Mark Price  chatbotA         True          9
  3   473455        Dylan Russo  chatbotA         True          4
  4   488141      Douglas Brown  chatbotA         True          3]}


# Convert variable num_steps into binary variable because lower num steps gives more priority.

# import data_manipulation from AB_test
from AB_experiment import data_manipulation

#create alias to call data_manipulation
dm=data_manipulation()

data = 'chatbot_ratings.csv'
column_name = "num_steps"
threshold = 5
condition= 'less'

dm.convert_to_binary(data, column_name, threshold, condition, download_df=True, filename='new')

{'Successfully convert varaible num_steps into binary variable'}


#From above function we have converted variable into binary variable hence we also convert its datatype into bool for better analysis.

data='new.csv'
change_variables=['binary_num_steps']
dtype=['bool']
drop_variables=[]
download_df=True
filename='new'

dm.change_variables(data,change_variables,dtype,drop_variables,download_df,filename)

{'Variable1': ['binary_num_steps', dtype('bool')]}


#After converting variable num_steps into binary variable again check data_info

# import data_manipulation from AB_test
from AB_experiment import data_manipulation

#create alias to call data_manipulation
dm=data_manipulation()

data='new.csv'
column1="group"
column2=["issue_solve","binary_num_steps"]
quartile1=0.25
quartile3=0.75
info = True
download_df=False
filename='new'

dm.data_info(data,column1,column2,quartile1,quartile3,info,download_df,filename)

{'1': ['dataframe_shape', {'Observations': 30000, 'Column': 6}],
 '2': ['missing_data_info', {'No missing values'}],
 '3': ['outliers_info', []],
 '4': ['data_types',
  [{'object_values': "['name', 'group']"},
   {'float_values': '[]'},
   {'int_values': ['user_id', 'num_steps']},
   {'bool_val': ['issue_solve', 'binary_num_steps']}]],
 '5': ['numerical_Variables', ['user_id', 'num_steps']],
 '6': ['Categorical_variables',
  ['name', 'group', 'issue_solve', 'binary_num_steps']],
 '7': [{'Unique values count for variable':           group
   chatbotA  15000
   chatbotB  15000},
  {'Unique values count for variable':        issue_solve
   False        15129
   True         14871},
  {'Unique values count for variable':     num_steps
   6        5014
   3        4918
   4        4828
   5        4804
   7        4767
   8        1898
   10       1892
   9        1879},
  {'Unique values count for variable':        binary_num_steps
   False             15450
   True              14550}],
 '8': [['Descriptive statistics-numerical_Variables',
                user_id     num_steps
   count   30000.000000  30000.000000
   mean   501525.376600      5.751800
   std    288593.552977      2.050009
   min        30.000000      3.000000
   25%    250807.500000      4.000000
   50%    503787.000000      6.000000
   75%    752260.250000      7.000000
   max    999983.000000     10.000000,
   '********************'],
  ['Descriptive statistics-Categorical_variables',
                   name     group issue_solve binary_num_steps
   count          30000     30000       30000            30000
   unique         25718         2           2                2
   top     Joseph Smith  chatbotA       False            False
   freq              14     15000       15129            15450,
   '********************']],
 '9': ['category_stats', []],
 '10': ['Dataframe',
     user_id               name     group  issue_solve  num_steps  /
  0    92586  Christopher Smith  chatbotA         True          7   
  1   725394       Jason Vaughn  chatbotA         True          5   
  2   393251         Mark Price  chatbotA         True          9   
  3   473455        Dylan Russo  chatbotA         True          4   
  4   488141      Douglas Brown  chatbotA         True          3   
  
     binary_num_steps  
  0             False  
  1              True  
  2             False  
  3              True  
  4              True  ]}


# From above output info we can say that in our data there is no outliers , no missing values present 
# and datatypes of all variables correct
# Now we findout sample size


#fist we findout baseline conversion rate for target variable issue_solve
# import stats_test from AB_test
from AB_experiment import stats_test

#create alias to call stats_test
st=stats_test()

data='new.csv'
column1="group"
column1_value='chatbotA'

#for target variable 'issue_solve'
a = st.baseline_conversion_rate(data,column1,column1_value,column2='issue_solve',bool_var=True)               

#for target variable 'num_steps'
b = st.baseline_conversion_rate(data,column1,column1_value,column2='binary_num_steps',bool_var=True)
a,b

({'Baseline conversion rate(p1) of group chatbotA': 0.4923},
 {'Baseline conversion rate(p1) of group chatbotA': 0.3771})


#for target variable 'issue_solve'

#import stats_test from AB_test
from AB_experiment import stats_test

#create alias to call stats_test
st=stats_test()

p1= 0.4923
mde=0.02
alpha=0.05
power=0.8
n_side=2

st.sample_size(p1,mde,alpha,power,n_side)

{'Sample size': 9809}


#for target variable 'num_steps'
# In this problem we select Baseline conversion rate(p1) of group chatbotA for less than equal to threshold value 5': 0.3771.

# import stats_test from AB_test
from AB_experiment import stats_test

#create alias to call stats_test
st=stats_test()

p1= 0.3771
mde=0.02
alpha=0.05
power=0.8
n_side=2

st.sample_size(p1,mde,alpha,power,n_side)

{'Sample size': 9245}


# Now we check assumptions for all combinations to perform statistical tests for AB testing 

# import stats_test from AB_test
from AB_experiment import stats_test

#create alias to call stats_test
st=stats_test()

#for target variable 'issue_solve'
data='new.csv'
sample_size=9809
column1="group"
column1_value1='chatbotA'
column1_value2='chatbotB'
column2="issue_solve"
alpha=0.05
paired_data=False 

st.AB_Test_assumption(data, sample_size, column1, column1_value1, column1_value2, column2, alpha, paired_data)

({'Target variable is boolean data type': 'Use Chi-Squared Test'},
 {'Note': 'If our data involve time-to-event or survival analysis (e.g., time until a user completes a task), we can use methods such as the log-rank test'})


#for target variable 'num_steps'
data='new.csv'
sample_size=9245
column1="group"
column1_value1='chatbotA'
column1_value2='chatbotB'
column2="binary_num_steps"
alpha=0.05
paired_data=False 

st.AB_Test_assumption(data, sample_size, column1, column1_value1, column1_value2, column2, alpha, paired_data)

({'Target variable is boolean data type': 'Use Chi-Squared Test'},
 {'Note': 'If our data involve time-to-event or survival analysis (e.g., time until a user completes a task), we can use methods such as the log-rank test'})


#for target variable 'num_steps'

# import stats_test from AB_test
from AB_experiment import stats_test

#create alias to call stats_test
st=stats_test()

# perform chi-square test
data='new.csv'
sample_size=9809
column1='group'
column1_value1='chatbotA'
column1_value2='chatbotB'
column2='issue_solve'
alpha=0.05
reverse_experiment=False

st.chi_squared_test(data, sample_size, column1, column1_value1, column1_value2, column2, alpha, reverse_experiment)

{'Test name': 'Chi-square test',
 'Control group': 'chatbotA',
 'Treatment group': 'chatbotB',
 'Timestamp': '2023-08-29 19:26:51',
 'Sample size': 9809,
 'Status ': 'We do not reject null hypothesis => No significant difference between groups chatbotA and chatbotB',
 'P-value': 0.26535,
 'alpha ': 0.05,
 'Test Statistic': 1.2406275749088074,
 'Proportion of group chatbotA': 0.4908,
 'Proportion of group chatbotB': 0.4988,
 'Confidence interval of group chatbotA': (0.48088, 0.50067),
 'Confidence interval of group chatbotB': (-0.02205, 0.00594),
 'Confidence interval of difference in groups': (0.48893, 0.50872)}


#for target variable 'num_steps'

#mann_whitney_U_test functions
from AB_experiment import stats_test
st = stats_test()

# perform chi-square test
data='new.csv'
sample_size=9245
column1='group'
column1_value1='chatbotA'
column1_value2='chatbotB'
column2='binary_num_steps'
alpha=0.05
reverse_experiment=False

st.chi_squared_test(data, sample_size, column1, column1_value1, column1_value2, column2, alpha, reverse_experiment)

{'Test name': 'Chi-square test',
 'Control group': 'chatbotA',
 'Treatment group': 'chatbotB',
 'Timestamp': '2023-08-29 19:26:56',
 'Sample size': 9245,
 'Status': 'We can reject H0 => group chatbotB is more successful',
 'P-value': 0.0,
 'alpha': 0.05,
 'Test Statistic': 897.5533557611235,
 'Proportion of group chatbotA': 0.375,
 'Proportion of group chatbotB': 0.5953,
 'Confidence interval of group chatbotA': (0.36514, 0.38488),
 'Confidence interval of group chatbotB': (-0.23439, -0.20628),
 'Confidence interval of difference in groups': (0.58534, 0.60535)}

chatbot-ratings-ab-experiments

Goal :¶

By checking assumptions we use Chi-Squared Test for AB Testing¶

Conclusion :¶

Ads

Related Articles