Sunday, 11 August 2024

Common Steps for using Machine Learning Model

Common Steps to Use a Machine Learning Model

1) Load the data & Split data into X & y 

import pandas   
pandas.read_csv("./data/1.csv")
X = df.drop("target", axis=1) # using all columns besides target
y = df["target"] #  predicting y using X

2) Model selection  & Split the data into training and test sets

from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y,test_size=0.2)

3) Instantiate the Model  

from sklearn.ensemble import RandomForestClassifier
m= RandomForestClassifier(n_estimators=50)

4) Fit the model using function 

m.fit(X_train,y_train); 

5) Make prediction

ypreds=m.predict(X_test)

7) To Evaluate Model use  score() function on test and train data

m.score(X_test, y_test)
m.score(X_train,y_train)

8) To improve the model by changing its hyperParameters

from sklearn.model_selection import cross_val_score
import numpy as np
# Use different numbers of  n_estimators  as hyperparameter 
np.random.seed(40)
for i in range(10, 100, 5):
print(f"Trying model with {i} estimators...")
m= RandomForestClassifier(n_estimators=i).fit(X_train, y_train)
print(f"Model accruacy on test data set: {m.score(X_test, y_test)}")


9) Save trained model to file using pickle

import pickle
pickle.dump(m, open("
My_Random_forest_model.pkl", "wb"))
10) Load a saved model and make a prediction on a single example

saved_model = pickle.load(open("My_Random_forest_model.pkl", "rb"))
saved_model.score(X_train,y_train)

#######################Example of  a Machine Model###########

Items_List =final_df.ITEMS.unique() ##ITEMS is the column in dataframe having the Item_names
for x in range(0,1): #x in Items_List
    y='Item_names'
    fc=get_test()
    print(fc)
    fc=fc[['ds', 'yhat', 'yhat_lower', 'yhat_upper']]
    fc['Region_NAME']=y    
    fc.to_csv('./Future_ITEMS/'+y+'.csv')#Future_ITEMS is a folder
def get_test():
  data=final_df#.loc[final_df['ITEMS']==x]
  param_grid = {'changepoint_prior_scale': [0.001, 0.01, 0.1, 0.5],'seasonality_prior_scale': [0.01, 0.1, 1.0, 10.0],}
  # Generate all combinations of parameters
  all_params = [dict(zip(param_grid.keys(), v)) for v in itertools.product(*param_grid.values())]
  rmses = []  # Store the RMSEs for each params here
    for params in all_params:
            m = Prophet(**params).fit(data)  # Fit model with given params
            df_cv = cross_validation(m, initial='180 days', period='90 days', horizon = '365 days')
            print(df_cv)
            df_p = performance_metrics(df_cv, rolling_window=1)
            print(df_p)
            rmses.append(df_p['rmse'].values[0])
# Find the best parameters tuning_results = pandas.DataFrame(all_params) tuning_results['rmse'] = rmses best_params = all_params[np.argmin(rmses)] # Initiate model with best parameters m = Prophet(changepoint_prior_scale=best_params['changepoint_prior_scale'],
seasonality_prior_scale=best_params['seasonality_prior_scale']).fit(data) # make prediction future = m.make_future_dataframe(periods=1460,freq='D') forecast = m.predict(future) return forecast

Saturday, 10 August 2024

Forcasting by Using Model in Python

 Time Series Forecasting with ML Model

Following are the steps we need to following to Apply the model on the data. Below is the small example.

Step 1

Create the instance of the Prophet class 

Step 2 

Call  the Methods 

A) fit  method 

B) predict methods

Note:-The input to Prophet is always a dataframe with two columns: ds and y

a) The ds (datestamp) column having format like YYYY-MM-DD  or YYYY-MM-DD HH:MM:SS 

        b) y column should be numeric on which prediction is made

        c) settings for the forecasting procedure are passed into the constructor


Sample Example Code

import pandas as pd

from prophet import Prophet

df =pd.read_csv('1.csv')

print(df)

m = Prophet()

m.fit(df)

cast_future = m.make_future_dataframe(periods=365)

print(cast_future)

fcast = m.predict(cast_future)

print(fcast)

fcast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']]

fig1 = m.plot(fcast)

fig2 = m.plot_components(fcast)

Practical Example for the  Model
import pandas as pd
import os,sys


source_folder = r".\\data"
data2=pd.DataFrame()

for file in os.listdir(source_folder):
    data= pd.read_csv(".\\data\\"+file+" ", dtype=str)
    data2=data2.append(data)
    print(file,"has been appended")


data2.head()
data2=data2.drop('Unnamed: 0',axis=1)
data2.head()
data2['test']=data2['HR'] .apply(lambda x: '{0:0>4}'.format(x))
data2.head()
data2['ds']=data2['DT'].str.cat(data2['test'],sep=" ")
data2.head()
data2['ds']=pd.to_datetime(data2['ds'])
data2['EXTRA'] = data2['EXTRA'].astype('float')
data2.info()
data3 = data2.rename(columns={'EXTRA': 'y', 'PLT': 'plts'})
data3.head()
data3=data3[['ds','y','plts']]
data3=data3.dropna()
data3
data3=data3.loc[data3['y']>=0] 
data3
from prophet import Prophet
def get_test(x):
    #for x in range(0,3):
    data4=data3.loc[data3['plts']==x]
    m = Prophet()
    m.fit(data4)
    cast_future = m.make_future_dataframe(periods=24000,freq='60min')  
    fcast = m.predict(cast_future)
    return fcast

list_olts =data3.olts.unique()
for x in range(0,3):
    y=list_olts[x]
    fc=get_test(y)
    fc=fc[['ds', 'yhat', 'yhat_lower', 'yhat_upper']]
    fc['PLT_NAME']=y    
    fc.to_csv('./Predicted_DATA/'+y+'.csv')

Tuesday, 2 July 2024

Reading Files In Python

#importing the required libraries

 import pandas as pd

import shutil

from shutil import copyfile 

from datetime import date,datetime, timedelta 

import os

source_folder = r"D:\\SourceFiles\\"

dest_folder=r"D:\\DestinationFiles\\"

###Loop throught the source_folder getting the datepart from it filename and subtract 1 from it

###and then recombine the again with yesterday.

for file in os.listdir(source_folder):

    r=file.rfind("_")

    date_string=file[r:][1:9]

    fname=file[:r][:]

    remainingpart=file[r:][9:]

    date_object = datetime.strptime(date_string, "%Y%m%d")

    date_object=date_object-timedelta(days=1)

    date_object=date_object.strftime("%Y%m%d")

    #print(fname+"_"+date_object+remainingpart)

    old_file=source_folder+file

    new_file=dest_folder+fname+"_"+date_object+remainingpart

    #print("oldfile_name",old_file, "Newfile name", new_file)

    os.rename(old_file, new_file)

#############Reading the files from the dest_folder one by one and rewrite to Final folder

for file in os.listdir(dest_folder):

    print(file)

    df = pd.read_csv("D:\\DestinationFiles\\"+file, dtype=str)

    print(df)

    df.to_csv("D:\\Final\\"+file, index=False)

#####Adding Columns of SysDATE and File_Name in existing DataFrame Using Lists

####Inserting BlankColumn in Beginning of DataFrame 

import pandas

from datetime import date,datetime, timedelta

df=pandas.read_csv("1.csv")

for i in range(0,len(df)):

    date_string=df['Date'].iloc[i]

    do = datetime.strptime(date_string, '%d-%b-%y')

    SysDATE=do.strftime('%Y%m%d')

    datelist.append(SysDATE)

    filelist.append("filename_"+SysDATE)

    

df['SDATE']=datelist

df['File']=filelist


df.insert(0,'Unnamed 0',' ')

######Batch Programming Example### DATE Handling#########

@echo ON

rem set year=%date:~-4,4%

rem set month=%date:~-7,2%

rem set day=1%date:~-10,2%-100

rem set /A lday=%day%-1


set year=2024

set month=11

set day=1

set /A lday=%day%-1


IF %lday% LSS 10 (SET lday=0%lday%) else (SET lday=%lday%)


echo %year%%month%%lday%


IF %lday% LSS 1 IF %month% EQU 1 (

SET lday=31 

SET /A month=%month%-1

)



IF %lday% LSS 1 IF %month% EQU 2 (

SET lday=31 

SET /A month=%month%-1

)


IF %lday% LSS 1 IF %month% EQU 3 (

SET lday=28 

SET /A month=%month%-1

)




IF %lday% LSS 1 IF %month% EQU 4 (

SET lday=31 

SET /A month=%month%-1

)




IF %lday% LSS 1 IF %month% EQU 5 (

SET lday=30 

SET /A month=%month%-1

)



IF %lday% LSS 1 IF %month% EQU 6 (

SET lday=31 

SET /A month=%month%-1

)




IF %lday% LSS 1 IF %month% EQU 7 (

SET lday=30 

SET /A month=%month%-1

)



IF %lday% LSS 1 IF %month% EQU 8 (

SET lday=31 

SET /A month=%month%-1

)




IF %lday% LSS 1 IF %month% EQU 9 (

SET lday=31  

SET /A month=%month%-1

)




IF %lday% LSS 1 IF %month% EQU 10 (

SET lday=30   

SET /A month=%month%-1

)




IF %lday% LSS 1 IF %month% EQU 11 (

SET lday=31 

SET /A month=%month%-1

)


IF %lday% LSS 1 IF %month% EQU 12 (

SET lday=30   

SET /A month=%month%-1

)




echo %year%%month%%lday%




IF %lday% LSS 1 IF %month% EQU 1 (

SET month=12 

SET /A year=%year%-1

)


echo %year%%month%%lday%



Wednesday, 8 May 2024

Comparing Two files and their Headers using Pandas and Lists

import pandas

import os

import re

old_col_list=[]

Source_Folder_OldFiles = "./data//Old_Columns_Files"

new_col_list=[]

Source_Folder_NewFiles = "./data//New_Columns_Files"

############################################### Loop through old files###############       

for file in os.listdir(Source_Folder_OldFiles):

    if(re.search("000000",file) and (

       file.startswith("abc_Re_")

       or file.startswith("def_Re_") 

       or file.startswith("ghi_Re_")

       or file.startswith("jkl_Re_")

       or file.startswith("mno_Re_")

       or file.startswith("pqr_Re_")

       or file.startswith("stu_Re_")

       or file.startswith("vwx_Re_")

       or file.startswith("yz_Re_") ) 

    ):

        old_file_df=pandas.read_csv(".//data//Old_Columns_Files//"+file+"")

        print(file)

        old_col_list.append(old_file_df.columns)


print(old_col_list[0])

print(len(old_col_list))


############################################### Loop through New or current day files###############       

for file in os.listdir(Source_Folder_NewFiles):

    if(re.search("000000",file) and (

       file.startswith("abc_Re_")

       or file.startswith("def_Re_") 

       or file.startswith("ghi_Re_")

       or file.startswith("jkl_Re_")

       or file.startswith("mno_Re_")

       or file.startswith("pqr_Re_")

       or file.startswith("stu_Re_")

       or file.startswith("vwx_Re_")

       or file.startswith("yz_Re_") ) 

    ):

        new_file_df=pandas.read_csv(".//data//New_Columns_Files//"+file+"")

        print(file)

        new_col_list.append(new_file_df.columns)


        

##########################  Loop the logic for all files###########################


for x in range(0, 2):

    print(new_col_list[x])

    print(len(new_col_list))

    ##comparing the elements of lists, that the cols of old files with the cols of new current file

    result = [a == b for a, b in zip(old_col_list[x], new_col_list[x])]

    #print(all(result),result[0],result[1:])        

    #False False [True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True]


    #find the matchig elements

    matches = [i for i in old_col_list[x] if i in new_col_list[x]]

    print(matches)


    #find the non matching elements and measure the len of list contain non match element

    no_matches = [j for j  in new_col_list[x] if j not in  old_col_list[x]]

    print(no_matches,len(no_matches))