# data Manipulation - first we check information about data if any problems we will fix it.

# import data_manipulation from AB_test
from AB_experiment import data_manipulation

#create alias to call data_manipulation
dm=data_manipulation()

data='AdSmartABdata.csv'
column1="experiment"
column2=["yes","hour"]
quartile1=0.25
quartile3=0.75
info = True
download_df=False
filename='new'

dm.data_info(data,column1,column2,quartile1,quartile3,info,download_df,filename)

{'1': ['dataframe_shape', {'Observations': 8077, 'Column': 9}],
 '2': ['missing_data_info', {'No missing values'}],
 '3': ['outliers_info',
  [{'variable_name': 'yes',
    'lower_fence': 0.0,
    'upper_fence': 0.0,
    'Number_of_obs_less_than_lower_fence': 0,
    'Number_of_obs_greater_than_upper_fence': 572,
    'lower_array': array([], dtype=int64),
    'upper_array': array([ 16,  23,  45,  65,  89, 100, 118, 128, 144, 157], dtype=int64)},
   {'variable_name hour': 'No outliers present'}]],
 '4': ['data_types',
  [{'object_values': "['auction_id', 'experiment', 'date', 'device_make', 'browser']"},
   {'float_values': '[]'},
   {'int_values': ['hour', 'platform_os', 'yes', 'no']},
   {'bool_val': []}]],
 '5': ['numerical_Variables', ['hour', 'platform_os', 'yes', 'no']],
 '6': ['Categorical_variables',
  ['auction_id', 'experiment', 'date', 'device_make', 'browser']],
 '7': [{'Unique values count for variable':          experiment
   control        4071
   exposed        4006},
  {'Unique values count for variable':             date
   2020-07-03  2015
   2020-07-09  1208
   2020-07-08  1198
   2020-07-04   903
   2020-07-10   893
   2020-07-05   890
   2020-07-06   490
   2020-07-07   480},
  {'Unique values count for variable':    platform_os
   6         7648
   5          428
   7            1},
  {'Unique values count for variable':                             browser
   Chrome Mobile                  4554
   Chrome Mobile WebView          1489
   Samsung Internet                824
   Facebook                        764
   Mobile Safari                   337
   Chrome Mobile iOS                51
   Mobile Safari UI/WKWebView       44
   Chrome                            3
   Pinterest                         3
   Opera Mobile                      3
   Opera Mini                        1
   Edge Mobile                       1
   Android                           1
   Firefox Mobile                    1
   Puffin                            1},
  {'Unique values count for variable':     yes
   0  7505
   1   572},
  {'Unique values count for variable':      no
   0  7406
   1   671}],
 '8': [['Descriptive statistics-numerical_Variables',
                 hour  platform_os          yes           no
   count  8077.000000  8077.000000  8077.000000  8077.000000
   mean     11.615080     5.947134     0.070818     0.083075
   std       5.734879     0.224333     0.256537     0.276013
   min       0.000000     5.000000     0.000000     0.000000
   25%       7.000000     6.000000     0.000000     0.000000
   50%      13.000000     6.000000     0.000000     0.000000
   75%      15.000000     6.000000     0.000000     0.000000
   max      23.000000     7.000000     1.000000     1.000000,
   '********************'],
  ['Descriptive statistics-Categorical_variables',
                                     auction_id experiment        date  /
   count                                   8077       8077        8077   
   unique                                  8077          2           8   
   top     0008ef63-77a7-448b-bd1e-075f42c55e39    control  2020-07-03   
   freq                                       1       4071        2015   
   
                  device_make        browser  
   count                 8077           8077  
   unique                 269             15  
   top     Generic Smartphone  Chrome Mobile  
   freq                  4743           4554  ,
   '********************']],
 '9': ['category_stats',
  [             yes                                   
              count median      mean       std min max
   experiment                                         
   control     4071    0.0  0.064849  0.246289   0   1
   exposed     4006    0.0  0.076885  0.266442   0   1,
               hour                                    
              count median       mean       std min max
   experiment                                          
   control     4071   15.0  12.499140  5.331095   0  23
   exposed     4006   10.0  10.716675  5.986323   0  23]],
 '10': ['Dataframe',
                               auction_id experiment        date  hour  /
  0  0008ef63-77a7-448b-bd1e-075f42c55e39    exposed  2020-07-10     8   
  1  000eabc5-17ce-4137-8efe-44734d914446    exposed  2020-07-07    10   
  2  0016d14a-ae18-4a02-a204-6ba53b52f2ed    exposed  2020-07-05     2   
  3  00187412-2932-4542-a8ef-3633901c98d9    control  2020-07-03    15   
  4  001a7785-d3fe-4e11-a344-c8735acacc2c    control  2020-07-03    15   
  
            device_make  platform_os                browser  yes  no  
  0  Generic Smartphone            6          Chrome Mobile    0   0  
  1  Generic Smartphone            6          Chrome Mobile    0   0  
  2               E5823            6  Chrome Mobile WebView    0   1  
  3   Samsung SM-A705FN            6               Facebook    0   0  
  4  Generic Smartphone            6          Chrome Mobile    0   0  ]}


#From above we can convert variable 'yes' datatype into bool for better analysis.

data='AdSmartABdata.csv'
change_variables=['yes']
dtype=['bool']
drop_variables=[]
download_df=True
filename='new'

dm.change_variables(data,change_variables,dtype,drop_variables,download_df,filename)

{'Variable1': ['yes', dtype('bool')]}


#After converting variable 'yes' into binary variable again check data_info

# import data_manipulation from AB_test
from AB_experiment import data_manipulation

#create alias to call data_manipulation
dm=data_manipulation()

data='new.csv'
column1="experiment"
column2=["yes","hour"]
quartile1=0.25
quartile3=0.75
info = True
download_df=False
filename='new'

dm.data_info(data,column1,column2,quartile1,quartile3,info,download_df,filename)

{'1': ['dataframe_shape', {'Observations': 8077, 'Column': 9}],
 '2': ['missing_data_info', {'No missing values'}],
 '3': ['outliers_info', [{'variable_name hour': 'No outliers present'}]],
 '4': ['data_types',
  [{'object_values': "['auction_id', 'experiment', 'date', 'device_make', 'browser']"},
   {'float_values': '[]'},
   {'int_values': ['hour', 'platform_os', 'no']},
   {'bool_val': ['yes']}]],
 '5': ['numerical_Variables', ['hour', 'platform_os', 'no']],
 '6': ['Categorical_variables',
  ['auction_id', 'experiment', 'date', 'device_make', 'browser', 'yes']],
 '7': [{'Unique values count for variable':          experiment
   control        4071
   exposed        4006},
  {'Unique values count for variable':             date
   2020-07-03  2015
   2020-07-09  1208
   2020-07-08  1198
   2020-07-04   903
   2020-07-10   893
   2020-07-05   890
   2020-07-06   490
   2020-07-07   480},
  {'Unique values count for variable':    platform_os
   6         7648
   5          428
   7            1},
  {'Unique values count for variable':                             browser
   Chrome Mobile                  4554
   Chrome Mobile WebView          1489
   Samsung Internet                824
   Facebook                        764
   Mobile Safari                   337
   Chrome Mobile iOS                51
   Mobile Safari UI/WKWebView       44
   Chrome                            3
   Pinterest                         3
   Opera Mobile                      3
   Opera Mini                        1
   Edge Mobile                       1
   Android                           1
   Firefox Mobile                    1
   Puffin                            1},
  {'Unique values count for variable':         yes
   False  7505
   True    572},
  {'Unique values count for variable':      no
   0  7406
   1   671}],
 '8': [['Descriptive statistics-numerical_Variables',
                 hour  platform_os           no
   count  8077.000000  8077.000000  8077.000000
   mean     11.615080     5.947134     0.083075
   std       5.734879     0.224333     0.276013
   min       0.000000     5.000000     0.000000
   25%       7.000000     6.000000     0.000000
   50%      13.000000     6.000000     0.000000
   75%      15.000000     6.000000     0.000000
   max      23.000000     7.000000     1.000000,
   '********************'],
  ['Descriptive statistics-Categorical_variables',
                                     auction_id experiment        date  /
   count                                   8077       8077        8077   
   unique                                  8077          2           8   
   top     0008ef63-77a7-448b-bd1e-075f42c55e39    control  2020-07-03   
   freq                                       1       4071        2015   
   
                  device_make        browser    yes  
   count                 8077           8077   8077  
   unique                 269             15      2  
   top     Generic Smartphone  Chrome Mobile  False  
   freq                  4743           4554   7505  ,
   '********************']],
 '9': ['category_stats',
  [            hour                                    
              count median       mean       std min max
   experiment                                          
   control     4071   15.0  12.499140  5.331095   0  23
   exposed     4006   10.0  10.716675  5.986323   0  23]],
 '10': ['Dataframe',
                               auction_id experiment        date  hour  /
  0  0008ef63-77a7-448b-bd1e-075f42c55e39    exposed  2020-07-10     8   
  1  000eabc5-17ce-4137-8efe-44734d914446    exposed  2020-07-07    10   
  2  0016d14a-ae18-4a02-a204-6ba53b52f2ed    exposed  2020-07-05     2   
  3  00187412-2932-4542-a8ef-3633901c98d9    control  2020-07-03    15   
  4  001a7785-d3fe-4e11-a344-c8735acacc2c    control  2020-07-03    15   
  
            device_make  platform_os                browser    yes  no  
  0  Generic Smartphone            6          Chrome Mobile  False   0  
  1  Generic Smartphone            6          Chrome Mobile  False   0  
  2               E5823            6  Chrome Mobile WebView  False   1  
  3   Samsung SM-A705FN            6               Facebook  False   0  
  4  Generic Smartphone            6          Chrome Mobile  False   0  ]}


# From above output info we can say that in our data there is no outliers , no missing values present 
# and datatypes of all variables correct
# Now we findout sample size


#fist we findout baseline conversion rate for target variable issue_solve
# import stats_test from AB_test
from AB_experiment import stats_test

#create alias to call stats_test
st=stats_test()

data = 'new.csv'
column1 = "experiment"
column1_value = 'control'
column2 = 'yes'
bool=True

st.baseline_conversion_rate(data,column1,column1_value,column2)

{'Baseline conversion rate(p1) of experiment control': 0.0648}


#for target variable 'issue_solve'

# import stats_test from AB_test
from AB_experiment import stats_test

#create alias to call stats_test
st=stats_test()

p1= 0.0648
mde=0.02
alpha=0.05
power=0.8
n_side=2

st.sample_size(p1,mde,alpha,power,n_side)

{'Sample size': 2477}


# Now we check assumptions for all combinations to perform statistical tests for AB testing 

# import stats_test from AB_test
from AB_experiment import stats_test

#create alias to call stats_test
st=stats_test()

#for target variable 'issue_solve'
data='new.csv'
sample_size=2477
column1="experiment"
column1_value1='control'
column1_value2='exposed'
column2="yes"
alpha=0.05
paired_data=False 

st.AB_Test_assumption(data, sample_size, column1, column1_value1, column1_value2, column2, alpha, paired_data)

({'Target variable is boolean data type': 'Use Chi-Squared Test'},
 {'Note': 'If our data involve time-to-event or survival analysis (e.g., time until a user completes a task), we can use methods such as the log-rank test'})


#for target variable 'num_steps'

#mann_whitney_U_test functions
from AB_experiment import stats_test
st = stats_test()

# perform chi-square test
data='new.csv'
sample_size=2477
column1='experiment'
column1_value1='control'
column1_value2='exposed'
column2="yes"
alpha=0.05
reverse_experiment=False

st.chi_squared_test(data, sample_size, column1, column1_value1, column1_value2, column2, alpha, reverse_experiment)

{'Test name': 'Chi-square test',
 'Control group': 'control',
 'Treatment group': 'exposed',
 'Timestamp': '2023-08-29 19:13:32',
 'Sample size': 2477,
 'Status': 'We can reject H0 => experiment exposed is more successful',
 'P-value': 0.00087,
 'alpha': 0.05,
 'Test Statistic': 11.089327824513447,
 'Proportion of group control': 0.0606,
 'Proportion of group exposed': 0.0856,
 'Confidence interval of group control': (0.05116, 0.06995),
 'Confidence interval of group exposed': (-0.03951, -0.01055),
 'Confidence interval of difference in groups': (0.07457, 0.0966)}

ad-abtesting

By checking assumptions we use Chi-Squared Test for AB Testing¶

Conclusion :¶

Ads

Related Articles