Sunday, 11 August 2024

Common Steps for using Machine Learning Model

Common Steps to Use a Machine Learning Model

1) Load the data & Split data into X & y 

import pandas   
pandas.read_csv("./data/1.csv")
X = df.drop("target", axis=1) # using all columns besides target
y = df["target"] #  predicting y using X

2) Model selection  & Split the data into training and test sets

from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y,test_size=0.2)

3) Instantiate the Model  

from sklearn.ensemble import RandomForestClassifier
m= RandomForestClassifier(n_estimators=50)

4) Fit the model using function 

m.fit(X_train,y_train); 

5) Make prediction

ypreds=m.predict(X_test)

7) To Evaluate Model use  score() function on test and train data

m.score(X_test, y_test)
m.score(X_train,y_train)

8) To improve the model by changing its hyperParameters

from sklearn.model_selection import cross_val_score
import numpy as np
# Use different numbers of  n_estimators  as hyperparameter 
np.random.seed(40)
for i in range(10, 100, 5):
print(f"Trying model with {i} estimators...")
m= RandomForestClassifier(n_estimators=i).fit(X_train, y_train)
print(f"Model accruacy on test data set: {m.score(X_test, y_test)}")


9) Save trained model to file using pickle

import pickle
pickle.dump(m, open("
My_Random_forest_model.pkl", "wb"))
10) Load a saved model and make a prediction on a single example

saved_model = pickle.load(open("My_Random_forest_model.pkl", "rb"))
saved_model.score(X_train,y_train)

#######################Example of  a Machine Model###########

Items_List =final_df.ITEMS.unique() ##ITEMS is the column in dataframe having the Item_names
for x in range(0,1): #x in Items_List
    y='Item_names'
    fc=get_test()
    print(fc)
    fc=fc[['ds', 'yhat', 'yhat_lower', 'yhat_upper']]
    fc['Region_NAME']=y    
    fc.to_csv('./Future_ITEMS/'+y+'.csv')#Future_ITEMS is a folder
def get_test():
  data=final_df#.loc[final_df['ITEMS']==x]
  param_grid = {'changepoint_prior_scale': [0.001, 0.01, 0.1, 0.5],'seasonality_prior_scale': [0.01, 0.1, 1.0, 10.0],}
  # Generate all combinations of parameters
  all_params = [dict(zip(param_grid.keys(), v)) for v in itertools.product(*param_grid.values())]
  rmses = []  # Store the RMSEs for each params here
    for params in all_params:
            m = Prophet(**params).fit(data)  # Fit model with given params
            df_cv = cross_validation(m, initial='180 days', period='90 days', horizon = '365 days')
            print(df_cv)
            df_p = performance_metrics(df_cv, rolling_window=1)
            print(df_p)
            rmses.append(df_p['rmse'].values[0])
# Find the best parameters tuning_results = pandas.DataFrame(all_params) tuning_results['rmse'] = rmses best_params = all_params[np.argmin(rmses)] # Initiate model with best parameters m = Prophet(changepoint_prior_scale=best_params['changepoint_prior_scale'],
seasonality_prior_scale=best_params['seasonality_prior_scale']).fit(data) # make prediction future = m.make_future_dataframe(periods=1460,freq='D') forecast = m.predict(future) return forecast

Saturday, 10 August 2024

Forcasting by Using Model in Python

 Time Series Forecasting with ML Model

Following are the steps we need to following to Apply the model on the data. Below is the small example.

Step 1

Create the instance of the Prophet class 

Step 2 

Call  the Methods 

A) fit  method 

B) predict methods

Note:-The input to Prophet is always a dataframe with two columns: ds and y

a) The ds (datestamp) column having format like YYYY-MM-DD  or YYYY-MM-DD HH:MM:SS 

        b) y column should be numeric on which prediction is made

        c) settings for the forecasting procedure are passed into the constructor


Sample Example Code

import pandas as pd

from prophet import Prophet

df =pd.read_csv('1.csv')

print(df)

m = Prophet()

m.fit(df)

cast_future = m.make_future_dataframe(periods=365)

print(cast_future)

fcast = m.predict(cast_future)

print(fcast)

fcast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']]

fig1 = m.plot(fcast)

fig2 = m.plot_components(fcast)

Practical Example for the  Model
import pandas as pd
import os,sys


source_folder = r".\\data"
data2=pd.DataFrame()

for file in os.listdir(source_folder):
    data= pd.read_csv(".\\data\\"+file+" ", dtype=str)
    data2=data2.append(data)
    print(file,"has been appended")


data2.head()
data2=data2.drop('Unnamed: 0',axis=1)
data2.head()
data2['test']=data2['HR'] .apply(lambda x: '{0:0>4}'.format(x))
data2.head()
data2['ds']=data2['DT'].str.cat(data2['test'],sep=" ")
data2.head()
data2['ds']=pd.to_datetime(data2['ds'])
data2['EXTRA'] = data2['EXTRA'].astype('float')
data2.info()
data3 = data2.rename(columns={'EXTRA': 'y', 'PLT': 'plts'})
data3.head()
data3=data3[['ds','y','plts']]
data3=data3.dropna()
data3
data3=data3.loc[data3['y']>=0] 
data3
from prophet import Prophet
def get_test(x):
    #for x in range(0,3):
    data4=data3.loc[data3['plts']==x]
    m = Prophet()
    m.fit(data4)
    cast_future = m.make_future_dataframe(periods=24000,freq='60min')  
    fcast = m.predict(cast_future)
    return fcast

list_olts =data3.olts.unique()
for x in range(0,3):
    y=list_olts[x]
    fc=get_test(y)
    fc=fc[['ds', 'yhat', 'yhat_lower', 'yhat_upper']]
    fc['PLT_NAME']=y    
    fc.to_csv('./Predicted_DATA/'+y+'.csv')

Tuesday, 2 July 2024

Reading Files In Python

#importing the required libraries

 import pandas as pd

import shutil

from shutil import copyfile 

from datetime import date,datetime, timedelta 

import os

source_folder = r"D:\\SourceFiles\\"

dest_folder=r"D:\\DestinationFiles\\"

###Loop throught the source_folder getting the datepart from it filename and subtract 1 from it

###and then recombine the again with yesterday.

for file in os.listdir(source_folder):

    r=file.rfind("_")

    date_string=file[r:][1:9]

    fname=file[:r][:]

    remainingpart=file[r:][9:]

    date_object = datetime.strptime(date_string, "%Y%m%d")

    date_object=date_object-timedelta(days=1)

    date_object=date_object.strftime("%Y%m%d")

    #print(fname+"_"+date_object+remainingpart)

    old_file=source_folder+file

    new_file=dest_folder+fname+"_"+date_object+remainingpart

    #print("oldfile_name",old_file, "Newfile name", new_file)

    os.rename(old_file, new_file)

#############Reading the files from the dest_folder one by one and rewrite to Final folder

for file in os.listdir(dest_folder):

    print(file)

    df = pd.read_csv("D:\\DestinationFiles\\"+file, dtype=str)

    print(df)

    df.to_csv("D:\\Final\\"+file, index=False)

#####Adding Columns of SysDATE and File_Name in existing DataFrame Using Lists

####Inserting BlankColumn in Beginning of DataFrame 

import pandas

from datetime import date,datetime, timedelta

df=pandas.read_csv("1.csv")

for i in range(0,len(df)):

    date_string=df['Date'].iloc[i]

    do = datetime.strptime(date_string, '%d-%b-%y')

    SysDATE=do.strftime('%Y%m%d')

    datelist.append(SysDATE)

    filelist.append("filename_"+SysDATE)

    

df['SDATE']=datelist

df['File']=filelist


df.insert(0,'Unnamed 0',' ')


Wednesday, 8 May 2024

Comparing Two files and their Headers using Pandas and Lists

import pandas

import os

import re

old_col_list=[]

Source_Folder_OldFiles = "./data//Old_Columns_Files"

new_col_list=[]

Source_Folder_NewFiles = "./data//New_Columns_Files"

############################################### Loop through old files###############       

for file in os.listdir(Source_Folder_OldFiles):

    if(re.search("000000",file) and (

       file.startswith("abc_Re_")

       or file.startswith("def_Re_") 

       or file.startswith("ghi_Re_")

       or file.startswith("jkl_Re_")

       or file.startswith("mno_Re_")

       or file.startswith("pqr_Re_")

       or file.startswith("stu_Re_")

       or file.startswith("vwx_Re_")

       or file.startswith("yz_Re_") ) 

    ):

        old_file_df=pandas.read_csv(".//data//Old_Columns_Files//"+file+"")

        print(file)

        old_col_list.append(old_file_df.columns)


print(old_col_list[0])

print(len(old_col_list))


############################################### Loop through New or current day files###############       

for file in os.listdir(Source_Folder_NewFiles):

    if(re.search("000000",file) and (

       file.startswith("abc_Re_")

       or file.startswith("def_Re_") 

       or file.startswith("ghi_Re_")

       or file.startswith("jkl_Re_")

       or file.startswith("mno_Re_")

       or file.startswith("pqr_Re_")

       or file.startswith("stu_Re_")

       or file.startswith("vwx_Re_")

       or file.startswith("yz_Re_") ) 

    ):

        new_file_df=pandas.read_csv(".//data//New_Columns_Files//"+file+"")

        print(file)

        new_col_list.append(new_file_df.columns)


        

##########################  Loop the logic for all files###########################


for x in range(0, 2):

    print(new_col_list[x])

    print(len(new_col_list))

    ##comparing the elements of lists, that the cols of old files with the cols of new current file

    result = [a == b for a, b in zip(old_col_list[x], new_col_list[x])]

    #print(all(result),result[0],result[1:])        

    #False False [True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True]


    #find the matchig elements

    matches = [i for i in old_col_list[x] if i in new_col_list[x]]

    print(matches)


    #find the non matching elements and measure the len of list contain non match element

    no_matches = [j for j  in new_col_list[x] if j not in  old_col_list[x]]

    print(no_matches,len(no_matches))

Wednesday, 20 December 2023

DataFrame Summary with Functions

 DataFrame Summary with Functions

import pandas as pd

pd.set_option('display.max_columns', None)

pd.set_option('display.max_rows', None)

df = pd.read_excel("D:\\1.xlsx", "SheetName", index_col='Index_Column_rowlabel', usecols='B:AG',skiprows=2)  

df.loc['No_of_Cats':'No_of_Dogs',:]

df = df.drop('Animals', axis=1)

df=df.reset_index()

df.loc[(df['Index_Column_rowlabel']=='No_of_Cats') | (df['Index_Column_rowlabel']=='No_of_Dogs')]

df=df.rename(columns={"Mobile Voice":"Date_T"})

df=df.transpose() 

df.columns = df.iloc[0]

df

####################

import pandas as pd

pd.set_option('display.max_columns', None)

pd.set_option('display.max_rows', None)

df = pd.read_excel("D:\\1.xlsx", "sheetname",index_col='Movekp' usecols='B:AGG',skiprows=2)  

#get the/quering the specific information from the sheet using loc function of the pandas

newdf=df.loc[(df['Movekp']=='Sub in Ind') | (df['Movekp']=='Sub in Pk')]

newdf

#droping the column

newdf=newdf.drop('Venture', axis=1)

#rename the columns

newdf=newdf.rename(columns={'Move`':'Date'})

#take transpose of the dataframe

newdf=newdf.transpose()

newdf=newdf.reset_index()

#assigning the value of first row to columns

newdf.columns = newdf.iloc[0]

# remove first row

newdf=newdf.tail(-1)

#save to disk

newdf.to_csv("D:\\1_data.csv")

# Information about df

newdf.info()

#importing datetime library.

from datetime import date,datetime

# converting Date column to datetime type

newdf["Date"]=pd.to_datetime(newdf["Date"])

# set the date column as index of dataframe df

newdf=newdf.set_index('Date')

# plot the graph of the dataframe df that is line

newdf.plot()

Selection under condition in data frame ,along with Group by clause in Pandas dataframe

import pandas

df=pandas.read_csv(".//abc.csv")

df['date_t']=df.ds.astype(dtype='datetime64[ms]')

df['month'] = df['date_t'].dt.month

df['year'] = df['date_t'].dt.year

df.info()

df_tmp = df.groupby(['date_t','month','year'])['yhat'].sum().reset_index().sort_values( 'yhat',ascending = False)

r=df_tmp[((df_tmp['month'] == 1)|(df_tmp['month'] == 2)) & (df_tmp['year'] == 2002)].groupby(['month','year'])['yhat'].apply(lambda grp: grp.nlargest(3).mean())

r.to_csv('./Average_of_t3.csv')


Sunday, 10 December 2023

Using tkinter Class in python To plot a graph.

from matplotlib.backends.backend_tkagg import FigureCanvasTkAgg

from matplotlib.figure import Figure 

import tkinter

# Create the default window 

win = tkinter.Tk() 

win.title("Welcome") 

win.geometry('800x600') 

  

# Create the list of options 

options_list=Flist.tolist()



# Variable to keep track of the option 

# selected in OptionMenu 

value_inside = tkinter.StringVar(win) 

  

# Set the default value of the variable 

value_inside.set("Select an Option") 

  

my_menu = tkinter.OptionMenu(win, value_inside, *options_list) 

my_menu.pack() 

  

 

def print_answers(): 

    print("Selected Option: '{}' ".format(value_inside.get())) 

    

    data_view = data3.query("`ABC name`=='{}' ".format(value_inside.get())).groupby(['ABC name','Hour']).agg({'Tra': 'sum'})

    print(data3)

    fig1=data_view.plot(kind="bar",title='Graph').get_figure();

    global plot1 

    plot1=FigureCanvasTkAgg(fig1,root)

    

    plot1.draw() 

  

    # placing the canvas on the Tkinter window 

    plot1.get_tk_widget().pack() 

    return None

  

def print_clear(): 

    print("clear the figure")

    plot1.get_tk_widget().forget()

    return None

    


# Submit button 

submit_button = tkinter.Button(root, text='Submit', command=print_answers) 

submit_button.pack() 


clear_button = tkinter.Button(root, text='Clear', command=print_clear) 

clear_button.pack() 


Saturday, 9 December 2023

Adding Colums to CSV and Write to Disk , Combine it, Then Query and Plot the Result in Python.

 import os

import pandas as pd

source_folder = r"D:\\original data"

dest_folder = r"D:\\Data_With_Time\\"

data2=pd.DataFrame()

for file in os.listdir(source_folder):

    if file.startswith("abc_file")  and file.endswith(".csv"):

        r=file.rfind("_")

        h=file[r:][9:13]

        dt=file[r:][1:9]

        fn=file

        data= pd.read_csv("D:\\original data\\"+file+" ")

        data['SHour']= h

        data['SDate']= dt

        data['filename']=fn

        data['DATA_TRA']=data['DATA_TRA']/3000

        data2=data2.append(data)

        data.to_csv("D:\\updated_"+file)

data2.to_csv("D:\\combine.csv")

data3=pd.read_csv("D:\\combine.csv")

data3=data3[['Short name','Date','Hour','Tra']]

data3 = data3.query("`name of Equipment`=='PIPE'").groupby(['name','Hour']).agg({'Tra': 'sum'})

print(data3)

data3.plot(kind="bar",title='Graph')

 #Comparing the columns of two CSVs files

import os
import pandas
mylist=[]
source_folder = "./data"
for file in os.listdir(source_folder):
    if file.startswith("abc_")  and file.endswith(".csv"):
        #print(file)
        df=pandas.read_csv(".//data//"+file+"")
        mylist.append(df.columns)
#print(mylist[1])
res = [c == d for c, d in zip(mylist[0], mylist[1])]
print(all(res))