Im trying to solve kaggle titanic challenge, but I get an error:
tuple object is not callable.
First i had an error with .loc then a fixed that with X_df=X_df.copy()
but the tuple error had been there for a long time here is my code for feature_extractor
and for the Classifier
:
import pandas as pd
import random
class FeatureExtractor():
def __init__(self):
pass
def fit(self, X_df, y):
pass
def transform(self, X_df):
X_df=X_df.copy()
#Adding family size
family_size=X_df['SibSp']+X_df['Parch']
X_df['family_size']=family_size
#Cleaning Age
mean=int(X_df['Age'].mean())
std=int(X_df['Age'].std())
X_df['Age']=X_df['Age'].fillna(random.randint(mean-std,mean+std))
#age category
mask=X_df.Age <= 16
X_df.loc[mask,'Age']=0
mask1=(X_df.Age <=32) & (X_df.Age >16)
X_df.loc[mask1,'Age']=1
mask2=(X_df.Age <=48) & (X_df.Age >32)
X_df.loc[mask2,'Age']=2
mask3=(X_df.Age <=64) & (X_df.Age >48)
X_df.loc[mask3,'Age']=3
mask4=X_df.Age>64
X_df.loc[mask4,'Age']=4
X_df_new = pd.concat(
[X_df.get(['Fare', 'Age', 'family_size']),
pd.get_dummies(X_df.Sex, prefix='Sex', drop_first=True),
pd.get_dummies(X_df.Pclass, prefix='Pclass', drop_first=True),
pd.get_dummies(X_df.Embarked, prefix='Embarked', drop_first=True)],axis=1)
XX = X_df_new.values
return XX
from sklearn.ensemble import GradientBoostingClassifier,RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors.nearest_centroid import NearestCentroid
from sklearn.linear_model import SGDClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.preprocessing import Imputer
from sklearn.impute import SimpleImputer
from sklearn.pipeline import Pipeline
from sklearn.base import BaseEstimator
class Classifier(BaseEstimator):
def __init__(self):
self.clf = Pipeline([
('imputer', SimpleImputer(strategy='median'))
('classifier', GradientBoostingClassifier(n_estimators=100,learning_rate=0.05,max_depth=2,subsample=0.6))
])
def fit(self, X, y):
self.clf.fit(X, y)
def predict_proba(self, X):
return self.clf.predict_proba(X)
Can you find any solutions because I tried a lot to solve but in vain?
Comments
Post a Comment