# data Manipulation - first we check information about data if any problems we will fix it.

# import data_manipulation from AB_test
from AB_experiment import data_manipulation

#create alias to call data_manipulation
dm=data_manipulation()

data='facebook_ad.csv'
column1="Campaign Name"
column2=["Purchase",'Impressions','Website_Clicks']
quartile1=0.25
quartile3=0.75
info = True
download_df=False
filename='new'

dm.data_info(data,column1,column2,quartile1,quartile3,info,download_df,filename)

{'1': ['dataframe_shape', {'Observations': 5000, 'Column': 10}],
 '2': ['missing_data_info', {'No missing values'}],
 '3': ['outliers_info',
  [{'variable_name Purchase': 'No outliers present'},
   {'variable_name Impressions': 'No outliers present'},
   {'variable_name Website_Clicks': 'No outliers present'}]],
 '4': ['data_types',
  [{'object_values': "['Campaign Name', 'Date']"},
   {'float_values': "['Spend [USD]']"},
   {'int_values': ['Impressions',
     'Reach',
     'Website_Clicks',
     'Searches',
     'View_Content',
     'Add_to_Cart',
     'Purchase']},
   {'bool_val': []}]],
 '5': ['numerical_Variables',
  ['Spend [USD]',
   'Impressions',
   'Reach',
   'Website_Clicks',
   'Searches',
   'View_Content',
   'Add_to_Cart',
   'Purchase']],
 '6': ['Categorical_variables', ['Campaign Name', 'Date']],
 '7': [{'Unique values count for variable':                   Campaign Name
   Control Campaign           2947
   Test Campaign              2053},
  {'Unique values count for variable':             Date
   2017.01.01  1038
   2017.01.02  1019
   2017.01.05  1019
   2017.01.03  1008
   2017.01.04   916}],
 '8': ['Descriptive statistics-numerical_Variables',
         Spend [USD]    Impressions          Reach  Website_Clicks     Searches  /
  count  5000.000000    5000.000000    5000.000000     5000.000000  5000.000000   
  mean   2642.167148  119870.327600   96570.507800     4649.999800  2366.600400   
  std     224.748472   18966.846045   16698.476871     1140.803534   634.505047   
  min    2201.845423   89525.000000   68241.000000     3514.000000  1365.000000   
  25%    2470.922040  102401.250000   83692.000000     3514.000000  1715.000000   
  50%    2594.203964  122257.000000   91222.500000     4219.000000  2561.500000   
  75%    2830.837193  134046.250000  116921.250000     5609.000000  3034.000000   
  max    3031.740149  160244.000000  121762.000000     8223.000000  3100.000000   
  
         View_Content  Add_to_Cart     Purchase  
  count   5000.000000  5000.000000  5000.000000  
  mean    1722.516800  1113.342600   581.958600  
  std      477.412555   323.373728   125.261832  
  min     1007.000000   566.000000   383.000000  
  25%     1396.000000   855.000000   483.750000  
  50%     1554.000000  1017.000000   540.000000  
  75%     2097.000000  1421.000000   677.000000  
  max     2949.000000  1650.000000   913.000000  ,
  '********************',
  'Descriptive statistics-Categorical_variables',
             Campaign Name        Date
  count               5000        5000
  unique                 2           5
  top     Control Campaign  2017.01.01
  freq                2947        1038,
  '********************'],
 '9': {'category_stats': [                 Purchase                                         
                       count median        mean         std  min  max
   Campaign Name                                                     
   Control Campaign     2947  653.0  610.615881  127.157296  383  913
   Test Campaign        2053  513.0  540.822211  110.176290  383  913,
                    Impressions                                                /
                          count    median           mean           std    min   
   Campaign Name                                                                
   Control Campaign        2947  128987.0  124557.532067  18347.545985  89525   
   Test Campaign           2053  106139.0  113142.031661  17783.389515  89525   
   
                             
                        max  
   Campaign Name             
   Control Campaign  160244  
   Test Campaign     160244  ,
                    Website_Clicks                                              
                             count  median         mean          std   min   max
   Campaign Name                                                                
   Control Campaign           2947  5102.0  4957.672548  1108.893747  3514  8223
   Test Campaign              2053  3711.0  4208.347784  1036.353223  3514  8223]},
 '10': ['Dataframe',
     Campaign Name        Date  Spend [USD]  Impressions   Reach  /
  0  Test Campaign  2017.01.02  3031.740149       111777   91262   
  1  Test Campaign  2017.01.01  2406.166749       127007  121762   
  2  Test Campaign  2017.01.05  2923.237073       101521   76298   
  3  Test Campaign  2017.01.02  2590.625744       160244   95165   
  4  Test Campaign  2017.01.05  2980.187470        89525   83524   
  
     Website_Clicks  Searches  View_Content  Add_to_Cart  Purchase  
  0            3514      1507          1474          878       496  
  1            4149      2863          2269         1436       913  
  2            3514      1365          1014          867       454  
  3            3738      2580          1391         1438       513  
  4            3742      1702          1546          655       461  ]}


# From above output info we can say that in our data there is no outliers , no missing values present 
# and datatypes of all variables correct
#Now we findout sample size 

#Baseline conversion rate function
from AB_experiment import stats_test
st = stats_test()

data='facebook_ad.csv'
column1='Campaign Name'
column1_value='Control Campaign'
column2='Purchase'
bool_var=False
threshold=581

st.baseline_conversion_rate(data,column1,column1_value,column2,bool_var,threshold)

{'Baseline conversion rate(p1) of Campaign Name Control Campaign for greater than or equal to threshold value 581': 0.604}


#calculate sample size using above Baseline conversion rate value
from AB_experiment import stats_test
st = stats_test()

p1=0.604
mde=0.045
data_type='Absolute'
alpha=0.05
power=0.8
n_side=2

st.sample_size(p1, mde, alpha, power, n_side, data_type)

{'Sample size': 1841}


# Now we check assumptions for all combinations to perform statistical tests for AB testing 

# import stats_test from AB_test
from AB_experiment import stats_test

#create alias to call stats_test
st=stats_test()

data='facebook_ad.csv'
sample_size=1841
group="Campaign Name"
group1_val='Control Campaign'
group2_val='Test Campaign'
target="Purchase"
alpha=0.05
paired_data=False 

st.AB_Test_assumption(data, sample_size, group, group1_val, group2_val, target, alpha, paired_data)

({'Assumption of Normality is not satisfied': 'Non-parametric test => Use Mann-Whitney U test.'},
 {'Note': 'If we are comparing more than two groups, such as in an A/B/C testing scenario, we can use Kruskal-Wallis test.'})


#mann_whitney_U_test functions
from AB_experiment import stats_test
st = stats_test()

data='facebook_ad.csv'
alpha=0.05
sample_size=1841
column1='Campaign Name'
column1_value1='Control Campaign'
column1_value2='Test Campaign'
column2='Purchase'
reverse_experiment=False
alternative='two-sided'

st.mann_whitney_U_test(data,sample_size,column1,column1_value1,column1_value2,column2,alpha,reverse_experiment,alternative)

{'Test name': 'Mann whitney U test',
 'Timestamp': '2023-08-04 17:33:40',
 'Sample size': 1841,
 'Status': 'We can reject H0 => Campaign Name Control Campaign performs better',
 'P-value': 7.673962700904577e-55,
 'alpha': 0.05,
 'Test Statistic': 2197645.0,
 'Confidence Interval': (135.0, 144.0)}

facebook-ad-ab-xperiment

By checking assumptions we perform Non-parametric test Mann-Whitney U test for AB Testing¶

Conclusion :¶

Ads

Related Articles