Common Steps to Use a Machine Learning Model
1) Load the data & Split data into X & y
import pandas
pandas.read_csv("./data/1.csv")
X = df.drop("target", axis=1) # using all columns besides target
y = df["target"] # predicting y using X
pandas.read_csv("./data/1.csv")
X = df.drop("target", axis=1) # using all columns besides target
y = df["target"] # predicting y using X
2) Model selection & Split the data into training and test sets
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y,test_size=0.2)
3) Instantiate the Model
from sklearn.ensemble import RandomForestClassifier
m= RandomForestClassifier(n_estimators=50)
m= RandomForestClassifier(n_estimators=50)
4) Fit the model using function
m.fit(X_train,y_train);
5) Make prediction
ypreds=m.predict(X_test)
7) To Evaluate Model use score() function on test and train data
m.score(X_test, y_test)
m.score(X_train,y_train)
m.score(X_train,y_train)
8) To improve the model by changing its hyperParameters
from sklearn.model_selection import cross_val_score
import numpy as np
# Use different numbers of n_estimators as hyperparameter
np.random.seed(40)for i in range(10, 100, 5):
print(f"Trying model with {i} estimators...")
m= RandomForestClassifier(n_estimators=i).fit(X_train, y_train)
print(f"Model accruacy on test data set: {m.score(X_test, y_test)}")
9) Save trained model to file using pickle
import pickle
pickle.dump(m, open("My_Random_forest_model.pkl", "wb"))
pickle.dump(m, open("My_Random_forest_model.pkl", "wb"))
10) Load a saved model and make a prediction on a single examplesaved_model = pickle.load(open("My_Random_forest_model.pkl", "rb"))
saved_model.score(X_train,y_train)#######################Example of a Machine Model###########
Items_List =final_df.ITEMS.unique() ##ITEMS is the column in dataframe having the Item_namesfor x in range(0,1): #x in Items_Listy='Item_names'fc=get_test()print(fc)fc=fc[['ds', 'yhat', 'yhat_lower', 'yhat_upper']]fc['Region_NAME']=yfc.to_csv('./Future_ITEMS/'+y+'.csv')#Future_ITEMS is a folderdef get_test():data=final_df#.loc[final_df['ITEMS']==x]param_grid = {'changepoint_prior_scale': [0.001, 0.01, 0.1, 0.5],'seasonality_prior_scale': [0.01, 0.1, 1.0, 10.0],}# Generate all combinations of parametersall_params = [dict(zip(param_grid.keys(), v)) for v in itertools.product(*param_grid.values())]rmses = [] # Store the RMSEs for each params herefor params in all_params:m = Prophet(**params).fit(data) # Fit model with given paramsdf_cv = cross_validation(m, initial='180 days', period='90 days', horizon = '365 days')print(df_cv)df_p = performance_metrics(df_cv, rolling_window=1)print(df_p)rmses.append(df_p['rmse'].values[0])# Find the best parameters tuning_results = pandas.DataFrame(all_params) tuning_results['rmse'] = rmses best_params = all_params[np.argmin(rmses)] # Initiate model with best parameters m = Prophet(changepoint_prior_scale=best_params['changepoint_prior_scale'],seasonality_prior_scale=best_params['seasonality_prior_scale']).fit(data) # make prediction future = m.make_future_dataframe(periods=1460,freq='D') forecast = m.predict(future) return forecast
No comments:
Post a Comment