from sklearn.preprocessing import OrdinalEncoder
import pandas as pd
from pureml.decorators import dataset, model, transformer, load_data
@load_data()
def load_churn_data():
'''
Loads churn data
'''
df = pd.read_csv('../bigml_59c28831336c6604c800002a.csv')
return df
@transformer()
def encode_ordinal(df):
'''
Encode ordinal data
'''
df[df.columns] = OrdinalEncoder().fit_transform(df)
return df
@transformer()
def encode_binary(df):
'''
Encode binary data
'''
df['voice mail plan'] = df['voice mail plan'].map({'yes':1, 'no':0})
df['international plan'] = df['international plan'].map({'yes':1, 'no':0})
df['churn'] = df['churn'].map({True:1, False:0})
return df
@dataset('telecom churn', parent=['encode ordinal', 'encode binary'])
def build_dataset():
'''
None
'''
df = load_churn_data()
column_names = df.columns
df[['state', 'phone number']] = encode_ordinal(df[['state', 'phone number']])
df[['voice mail plan', 'international plan', 'churn']] = encode_binary(df[['voice mail plan', 'international plan', 'churn']])
return df
df = build_dataset()