HomeMachine LearningMachine Learning DIYDeploying Machine Learning Web App From Scratch

Deploying Machine Learning Web App From Scratch

Deploying Machine Learning Web App From Scratch 1

Step1 : Retrieve data:

import pandas as pd
df = (pd.read_csv('../data/Customer-Value-Analysis.csv')
     .set_index('Customer')
     )

Step2 : data Pre-porcessing :

X = df.drop(['Response'], axis = 1)
y = df.Response.apply(lambda X : 0 if X == 'No' else 1)
# categorical features
cats = [var for var, var_type in X.dtypes.items() if var_type=='object']
# numerical features
nums = [var for var in X.columns if var not in cats]
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.impute import KNNImputer
from sklearn.pipeline import FeatureUnion, Pipeline 
from sklearn.compose import ColumnTransformer

#source code from : https://www.kaggle.com/schopenhacker75/complete-beginner-guide

#Custom Transformer that extracts columns passed as argument to its constructor 
class FeatureSelector(BaseEstimator, TransformerMixin ):
    #Class Constructor 
    def __init__( self, feature_names):
        self._feature_names = feature_names 
        
    #Return self nothing else to do here    
    def fit( self, X, y = None ):
        return self 
    
    #Method that describes what we need this transformer to do
    def transform( self, X, y = None ):
        return X[self._feature_names].values 


#Defining the steps in the categorical pipeline 
cat_pipeline = Pipeline( [ ( 'cat_selector', FeatureSelector(cats) ),
                          ( 'one_hot_encoder', OneHotEncoder(sparse = False ) ) ] )
    
#Defining the steps in the numerical pipeline     
num_pipeline = Pipeline([
        ( 'num_selector', FeatureSelector(nums) ),
        ('std_scaler', StandardScaler()),
    ])


#Combining numerical and categorical piepline into one full big pipeline horizontally 
#using FeatureUnion
full_pipeline = FeatureUnion( transformer_list = [ ( 'num_pipeline', num_pipeline ),
                                                  ( 'cat_pipeline', cat_pipeline )] 
                            )
from sklearn.model_selection import train_test_split, cross_validate
X_train,  X_test, y_train, y_test = train_test_split(X,y, test_size = 0.2, random_state = 42)
#fit and transform the custom transformer in train
X_train_processed = full_pipeline.fit_transform(X_train)
# transform the test with the trained tansformer
X_test_processed = full_pipeline.transform(X_test)

Step 3: Training phase:

from sklearn.metrics import log_loss
from joblib import dump, load
from sklearn.model_selection import cross_val_predict
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
#from sklearn.linear_model import SGDClassifier

names = ["Nearest Neighbors", "XGBoost", "Random Forest"]

classifiers = [
    KNeighborsClassifier(3),
    GradientBoostingClassifier(),
    RandomForestClassifier()]

scores = {}

# iterate over classifiers
for name, clf in zip(names, classifiers):
    # Cross val prediction    
    cv_preds = cross_val_predict(clf, X_train_processed, y_train, method='predict_proba')
    cv_score = log_loss(y_train, cv_preds)
    
    # holdout data 
    clf.fit(X_train_processed, y_train)
    hd_preds = clf.predict_proba(X_test_processed)
    hd_score = log_loss(y_test, hd_preds)
    
    # append the scores
    scores[name] = [cv_score, hd_score]
    #store the model
    dump(clf, f'../model/{name}.joblib')

Deploying Machine Learning Web App From Scratch 2

from sklearn.model_selection import GridSearchCV
from sklearn.metrics import make_scorer

param_grid = [
    {'n_estimators': [100, 200]},
    {'n_estimators': [50, 100, 200], 'max_features': ['log2']},
    {'bootstrap': [False], 'n_estimators': [150, 300], 'max_features': [2, 4]},
]
# about how to use the scorer strategy for the grid search:
# https://scikit-learn.org/stable/modules/model_evaluation.html#scoring
scorer = make_scorer(log_loss)

RF = RandomForestClassifier()
grid_search = GridSearchCV(RF, param_grid, cv=5,
                           scoring=scorer,
                           return_train_score=True)

grid_search.fit(X_train_processed, y_train)
sk_best = grid_search.best_estimator_

Deploying Machine Learning Web App From Scratch 4

import os
# We start with the import of standard ML librairies
import pandas as pd
import math
from joblib import load
from src.data_processing import make_full_pipeline
import pickle
# We add all Plotly and Dash necessary librairies
import dash
import plotly.graph_objects as go
import dash_core_components as dcc
import dash_html_components as html
from dash.dependencies import Input, Output

HOME_PATH = os.getcwd()
DATA_PATH = os.path.join(HOME_PATH, 'data')
MODELS_PATH = os.path.join(HOME_PATH, 'model')

df = pd.read_csv(os.path.join(DATA_PATH, "Customer-Value-Analysis.csv")).set_index('Customer')
sk_best = load(os.path.join(MODELS_PATH, 'best.joblib'))

# full_pipeline = load(os.path.join(MODELS_PATH, 'transformer.joblib'))
full_pipeline = make_full_pipeline(df)

ohe_path = os.path.join(MODELS_PATH, 'ohe_categories.pkl')
perfs_path = os.path.join(MODELS_PATH, 'sk_best_performances.pkl')

with open(ohe_path, 'rb') as input:
    ohe_categories = pickle.load(input)

categories = []
for k, l in ohe_categories.items():
    categories.append([f'{k}_{cat}' for cat in list(l)])
flatten = lambda l: [item for sublist in l for item in sublist]
categories = flatten(categories)

with open(perfs_path, 'rb') as input:
    perfs = pickle.load(input)
# scaling

cats = [var for var, var_type in df.dtypes.items() if var_type == 'object']
nums = [var for var in df.columns if var not in cats]
cats.remove('Response')

TOP = 10
# We create a DataFrame to store the features' importance and their corresponding label
df_feature_importances = pd.DataFrame(sk_best.feature_importances_ * 100, columns=["Importance"],
                                      index=nums + categories)
df_feature_importances = df_feature_importances.sort_values("Importance", ascending=False)
df_feature_importances = df_feature_importances.loc[df_feature_importances.index[:TOP]]

# We create a Features Importance Bar Chart
fig_features_importance = go.Figure()
fig_features_importance.add_trace(go.Bar(x=df_feature_importances.index,
                                         y=df_feature_importances["Importance"],
                                         marker_color='rgb(171, 226, 251)')
                                  )
fig_features_importance.update_layout(title_text='<b>Features Importance of the model<b>', title_x=0.5)

# We create a Features perfomances Bar Chart
fig_perfs = go.Figure()
fig_perfs.add_trace(go.Bar(y=list(perfs.keys()),
                           x=list(perfs.values()),
                           marker_color='rgb(171, 226, 251)',
                           orientation='h')
                    )
fig_perfs.update_layout(title_text='<b>Best Model Performances<b>', title_x=0.5)

cat_children = []
for var in cats:
    # Categorical children
    sorted_modalities = list(df[var].value_counts().index)
    cat_children.append(html.H4(children=var))
    cat_children.append(dcc.Dropdown(
        id='{}-dropdown'.format(var),
        options=[{'label': value, 'value': value} for value in sorted_modalities],
        value=sorted_modalities[0]
    ))

linear_children = []
for var in nums:
    # linear children
    linear_children.append(html.H4(children=var))
    desc = df[var].describe()
    linear_children.append(dcc.Slider(
        id='{}-dropdown'.format(var),
        min=math.floor(desc['min']),
        max=round(desc['max']),
        step=None,
        value=round(desc['mean']),
        marks={i: '{}°'.format(i) for i in
               range(int(desc['min']), int(desc['max']) + 1, max(int((desc['std'] / 1.5)), 1))}
    ))
app = dash.Dash(__name__,
# external CSS stylesheets
 external_stylesheets = [
     #"https://raw.githack.com/Athena75/IBM-Customer-Value-Dashboarding/main/assets/style.css",
     "https://rawcdn.githack.com/Athena75/IBM-Customer-Value-Dashboarding/df971ae38117d85c8512a72643ce6158cde7a4eb/assets/style.css"
 ]
)

# We apply basic HTML formatting to the layout
app.layout = html.Div(children=[
    # first row : Title
    html.Div(children=[
        html.Div(children=[html.H1(children="Simulation Tool : IBM Customer Churn")],
                 className='title'),

    ],
        style={"display": "block"}),
    # second row :
    html.Div(children=[
        # first column : fig feature importance + linear + prediction
        html.Div(children=[
            html.Div(children=[dcc.Graph(figure=fig_features_importance, className='graph')] + linear_children),
            # prediction result
            html.Div(children=[html.H2(children="Prediction:"),
                               html.H2(id="prediction_result")],
                     className='prediction')],
                 className='column'),
        # second column : fig performances categorical
        html.Div(children=[dcc.Graph(figure=fig_perfs, className='graph')] + cat_children,
                 className='column')
    ],
        className='row')
]
)

Deploying Machine Learning Web App From Scratch 5

# The callback function will provide one "Output" in the form of a string (=children)
@app.callback(Output(component_id="prediction_result", component_property="children"),
              # The values corresponding to sliders and dropdowns of respectively numerical and categorical features
              [Input('{}-dropdown'.format(var), 'value') for var in nums + cats])
# The input variable are set in the same order as the callback Inputs
def update_prediction(*X):
    # get the data input and map it to the correponding feature names
    payload = dict(zip(nums + cats, X))
    # create one line dataframe
    frame_X = pd.DataFrame(payload, index=[0])
    # pass it through the pre-fitted transformer
    X_processed = full_pipeline.transform(frame_X)

    prediction = sk_best.predict_proba(X_processed)[0]

    # And retuned to the Output of the callback function
    return " {}% No , {}% Yes".format("%.2f" % (prediction[0] * 100),
                                      "%.2f" % (prediction[1] * 100))
from src.app_dash import server
$gunicorn run:server

Deploy on heroku:

web:  gunicorn run:server
pip freeze > requirements.txt
$heroku create ibm-customer-churn-simulator

Deploying Machine Learning Web App From Scratch 6

Deploying Machine Learning Web App From Scratch 7Deploying Machine Learning Web App From Scratch 8

Deploying Machine Learning Web App From Scratch 9

Deploying Machine Learning Web App From Scratch 10

Now starts the stressful moment :///…… Wait for deployment success!!!
If everything goes well, you get this green message:

Deploying Machine Learning Web App From Scratch 11

Deploying Machine Learning Web App From Scratch 12

This article has been published from the source link without modifications to the text. Only the headline has been changed.

Source link

Most Popular