Computer&Data Science Made EZ : 2024

Sunday, 11 August 2024

Common Steps for using Machine Learning Model

Common Steps to Use a Machine Learning Model

1) Load the data & Split data into X & y

import pandas
pandas.read_csv("./data/1.csv")
X = df.drop("target", axis=1) # using all columns besides target
y = df["target"] # predicting y using X

2) Model selection & Split the data into training and test sets

from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y,test_size=0.2)

3) Instantiate the Model

from sklearn.ensemble import RandomForestClassifier
m= RandomForestClassifier(n_estimators=50)

4) Fit the model using function

m.fit(X_train,y_train);

5) Make prediction

ypreds=m.predict(X_test)

7) To Evaluate Model use score() function on test and train data

m.score(X_test, y_test)
m.score(X_train,y_train)

8) To improve the model by changing its hyperParameters

from sklearn.model_selection import cross_val_score

import numpy as np

# Use different numbers of  n_estimators  as hyperparameter

np.random.seed(40)
for i in range(10, 100, 5):
print(f"Trying model with {i} estimators...")
m= RandomForestClassifier(n_estimators=i).fit(X_train, y_train)
print(f"Model accruacy on test data set: {m.score(X_test, y_test)}")

9) Save trained model to file using pickle

import pickle
pickle.dump(m, open("My_Random_forest_model.pkl", "wb"))

10) Load a saved model and make a prediction on a single example
saved_model = pickle.load(open("My_Random_forest_model.pkl", "rb"))
saved_model.score(X_train,y_train)
#######################Example of  a Machine Model###########
Items_List =final_df.ITEMS.unique() ##ITEMS is the column in dataframe having the Item_names
for x in range(0,1): #x in Items_List
    y='Item_names'
    fc=get_test()
    print(fc)
    fc=fc[['ds', 'yhat', 'yhat_lower', 'yhat_upper']]
    fc['Region_NAME']=y    
    fc.to_csv('./Future_ITEMS/'+y+'.csv')#Future_ITEMS is a folder
	
	
def get_test():
  data=final_df#.loc[final_df['ITEMS']==x]
  param_grid = {'changepoint_prior_scale': [0.001, 0.01, 0.1, 0.5],'seasonality_prior_scale': [0.01, 0.1, 1.0, 10.0],}
  # Generate all combinations of parameters
  all_params = [dict(zip(param_grid.keys(), v)) for v in itertools.product(*param_grid.values())]
  rmses = []  # Store the RMSEs for each params here
    for params in all_params:
            m = Prophet(**params).fit(data)  # Fit model with given params
            df_cv = cross_validation(m, initial='180 days', period='90 days', horizon = '365 days')
            print(df_cv)
            df_p = performance_metrics(df_cv, rolling_window=1)
            print(df_p)
            rmses.append(df_p['rmse'].values[0])
# Find the best parameters
tuning_results = pandas.DataFrame(all_params)
tuning_results['rmse'] = rmses
best_params = all_params[np.argmin(rmses)]
# Initiate model with best parameters
m = Prophet(changepoint_prior_scale=best_params['changepoint_prior_scale'], 
seasonality_prior_scale=best_params['seasonality_prior_scale']).fit(data)
# make prediction
future = m.make_future_dataframe(periods=1460,freq='D') 
forecast = m.predict(future)
return forecast

Saturday, 10 August 2024

Forcasting by Using Model in Python

Time Series Forecasting with ML Model

Following are the steps we need to following to Apply the model on the data. Below is the small example.

Step 1

Create the instance of the Prophet class

Step 2

Call the Methods

A) fit method

B) predict methods

Note:-The input to Prophet is always a dataframe with two columns: ds and y

a) The ds (datestamp) column having format like YYYY-MM-DD or YYYY-MM-DD HH:MM:SS

b) y column should be numeric on which prediction is made

c) settings for the forecasting procedure are passed into the constructor

Sample Example Code

import pandas as pd

from prophet import Prophet

df =pd.read_csv('1.csv')

print(df)

m = Prophet()

m.fit(df)

cast_future = m.make_future_dataframe(periods=365)

print(cast_future)

fcast = m.predict(cast_future)

print(fcast)

fcast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']]

fig1 = m.plot(fcast)

fig2 = m.plot_components(fcast)

Practical Example for the Model

import pandas as pd

import os,sys

source_folder = r".\\data"

data2=pd.DataFrame()

for file in os.listdir(source_folder):

data= pd.read_csv(".\\data\\"+file+" ", dtype=str)

data2=data2.append(data)

print(file,"has been appended")

data2.head()

data2=data2.drop('Unnamed: 0',axis=1)

data2.head()

data2['test']=data2['HR'] .apply(lambda x: '{0:0>4}'.format(x))

data2.head()

data2['ds']=data2['DT'].str.cat(data2['test'],sep=" ")

data2.head()

data2['ds']=pd.to_datetime(data2['ds'])

data2['EXTRA'] = data2['EXTRA'].astype('float')

data2.info()

data3 = data2.rename(columns={'EXTRA': 'y', 'PLT': 'plts'})

data3.head()

data3=data3[['ds','y','plts']]

data3=data3.dropna()

data3

data3=data3.loc[data3['y']>=0]

data3

from prophet import Prophet

def get_test(x):

#for x in range(0,3):

data4=data3.loc[data3['plts']==x]

m = Prophet()

m.fit(data4)

cast_future = m.make_future_dataframe(periods=24000,freq='60min')

fcast = m.predict(cast_future)

return fcast

list_olts =data3.olts.unique()

for x in range(0,3):

y=list_olts[x]

fc=get_test(y)

fc=fc[['ds', 'yhat', 'yhat_lower', 'yhat_upper']]

fc['PLT_NAME']=y

fc.to_csv('./Predicted_DATA/'+y+'.csv')

Tuesday, 2 July 2024

Reading Files In Python

#importing the required libraries

import pandas as pd

import shutil

from shutil import copyfile

from datetime import date,datetime, timedelta

import os

source_folder = r"D:\\SourceFiles\\"

dest_folder=r"D:\\DestinationFiles\\"

###Loop throught the source_folder getting the datepart from it filename and subtract 1 from it

###and then recombine the again with yesterday.

for file in os.listdir(source_folder):

r=file.rfind("_")

date_string=file[r:][1:9]

fname=file[:r][:]

remainingpart=file[r:][9:]

date_object = datetime.strptime(date_string, "%Y%m%d")

date_object=date_object-timedelta(days=1)

date_object=date_object.strftime("%Y%m%d")

#print(fname+"_"+date_object+remainingpart)

old_file=source_folder+file

new_file=dest_folder+fname+"_"+date_object+remainingpart

#print("oldfile_name",old_file, "Newfile name", new_file)

os.rename(old_file, new_file)

#############Reading the files from the dest_folder one by one and rewrite to Final folder

for file in os.listdir(dest_folder):

print(file)

df = pd.read_csv("D:\\DestinationFiles\\"+file, dtype=str)

print(df)

df.to_csv("D:\\Final\\"+file, index=False)

#####Adding Columns of SysDATE and File_Name in existing DataFrame Using Lists

####Inserting BlankColumn in Beginning of DataFrame

import pandas

from datetime import date,datetime, timedelta

df=pandas.read_csv("1.csv")

for i in range(0,len(df)):

date_string=df['Date'].iloc[i]

do = datetime.strptime(date_string, '%d-%b-%y')

SysDATE=do.strftime('%Y%m%d')

datelist.append(SysDATE)

filelist.append("filename_"+SysDATE)

df['SDATE']=datelist

df['File']=filelist

df.insert(0,'Unnamed 0',' ')

######Batch Programming Example### DATE Handling#########

@echo ON

rem set year=%date:~-4,4%

rem set month=%date:~-7,2%

rem set day=1%date:~-10,2%-100

rem set /A lday=%day%-1

set year=2024

set month=11

set day=1

set /A lday=%day%-1

IF %lday% LSS 10 (SET lday=0%lday%) else (SET lday=%lday%)

echo %year%%month%%lday%

IF %lday% LSS 1 IF %month% EQU 1 (

SET lday=31

SET /A month=%month%-1

)

IF %lday% LSS 1 IF %month% EQU 2 (

SET lday=31

SET /A month=%month%-1

)

IF %lday% LSS 1 IF %month% EQU 3 (

SET lday=28

SET /A month=%month%-1

)

IF %lday% LSS 1 IF %month% EQU 4 (

SET lday=31

SET /A month=%month%-1

)

IF %lday% LSS 1 IF %month% EQU 5 (

SET lday=30

SET /A month=%month%-1

)

IF %lday% LSS 1 IF %month% EQU 6 (

SET lday=31

SET /A month=%month%-1

)

IF %lday% LSS 1 IF %month% EQU 7 (

SET lday=30

SET /A month=%month%-1

)

IF %lday% LSS 1 IF %month% EQU 8 (

SET lday=31

SET /A month=%month%-1

)

IF %lday% LSS 1 IF %month% EQU 9 (

SET lday=31

SET /A month=%month%-1

)

IF %lday% LSS 1 IF %month% EQU 10 (

SET lday=30

SET /A month=%month%-1

)

IF %lday% LSS 1 IF %month% EQU 11 (

SET lday=31

SET /A month=%month%-1

)

IF %lday% LSS 1 IF %month% EQU 12 (

SET lday=30

SET /A month=%month%-1

)

echo %year%%month%%lday%

IF %lday% LSS 1 IF %month% EQU 1 (

SET month=12

SET /A year=%year%-1

)

echo %year%%month%%lday%

Wednesday, 8 May 2024

Comparing Two files and their Headers using Pandas and Lists

import pandas

import os

import re

old_col_list=[]

Source_Folder_OldFiles = "./data//Old_Columns_Files"

new_col_list=[]

Source_Folder_NewFiles = "./data//New_Columns_Files"

############################################### Loop through old files###############

for file in os.listdir(Source_Folder_OldFiles):

if(re.search("000000",file) and (

file.startswith("abc_Re_")

or file.startswith("def_Re_")

or file.startswith("ghi_Re_")

or file.startswith("jkl_Re_")

or file.startswith("mno_Re_")

or file.startswith("pqr_Re_")

or file.startswith("stu_Re_")

or file.startswith("vwx_Re_")

or file.startswith("yz_Re_") )

old_file_df=pandas.read_csv(".//data//Old_Columns_Files//"+file+"")

print(file)

old_col_list.append(old_file_df.columns)

print(old_col_list[0])

print(len(old_col_list))

############################################### Loop through New or current day files###############

for file in os.listdir(Source_Folder_NewFiles):

if(re.search("000000",file) and (

file.startswith("abc_Re_")

or file.startswith("def_Re_")

or file.startswith("ghi_Re_")

or file.startswith("jkl_Re_")

or file.startswith("mno_Re_")

or file.startswith("pqr_Re_")

or file.startswith("stu_Re_")

or file.startswith("vwx_Re_")

or file.startswith("yz_Re_") )

new_file_df=pandas.read_csv(".//data//New_Columns_Files//"+file+"")

print(file)

new_col_list.append(new_file_df.columns)

########################## Loop the logic for all files###########################

for x in range(0, 2):

print(new_col_list[x])

print(len(new_col_list))

##comparing the elements of lists, that the cols of old files with the cols of new current file

result = [a == b for a, b in zip(old_col_list[x], new_col_list[x])]

#print(all(result),result[0],result[1:])

#False False [True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True]

#find the matchig elements

matches = [i for i in old_col_list[x] if i in new_col_list[x]]

print(matches)

#find the non matching elements and measure the len of list contain non match element

no_matches = [j for j in new_col_list[x] if j not in old_col_list[x]]

print(no_matches,len(no_matches))

Computer&Data Science Made EZ

Sunday, 11 August 2024

Common Steps for using Machine Learning Model

Saturday, 10 August 2024

Forcasting by Using Model in Python

Time Series Forecasting with ML Model

Tuesday, 2 July 2024

Reading Files In Python

Wednesday, 8 May 2024

Comparing Two files and their Headers using Pandas and Lists

About Me

Blog Archive