Wednesday, 20 December 2023

DataFrame Summary with Functions

 DataFrame Summary with Functions

import pandas as pd

pd.set_option('display.max_columns', None)

pd.set_option('display.max_rows', None)

df = pd.read_excel("D:\\1.xlsx", "SheetName", index_col='Index_Column_rowlabel', usecols='B:AG',skiprows=2)  

df.loc['No_of_Cats':'No_of_Dogs',:]

df = df.drop('Animals', axis=1)

df=df.reset_index()

df.loc[(df['Index_Column_rowlabel']=='No_of_Cats') | (df['Index_Column_rowlabel']=='No_of_Dogs')]

df=df.rename(columns={"Mobile Voice":"Date_T"})

df=df.transpose() 

df.columns = df.iloc[0]

df

####################

import pandas as pd

pd.set_option('display.max_columns', None)

pd.set_option('display.max_rows', None)

df = pd.read_excel("D:\\1.xlsx", "sheetname",index_col='Movekp' usecols='B:AGG',skiprows=2)  

#get the/quering the specific information from the sheet using loc function of the pandas

newdf=df.loc[(df['Movekp']=='Sub in Ind') | (df['Movekp']=='Sub in Pk')]

newdf

#droping the column

newdf=newdf.drop('Venture', axis=1)

#rename the columns

newdf=newdf.rename(columns={'Move`':'Date'})

#take transpose of the dataframe

newdf=newdf.transpose()

newdf=newdf.reset_index()

#assigning the value of first row to columns

newdf.columns = newdf.iloc[0]

# remove first row

newdf=newdf.tail(-1)

#save to disk

newdf.to_csv("D:\\1_data.csv")

# Information about df

newdf.info()

#importing datetime library.

from datetime import date,datetime

# converting Date column to datetime type

newdf["Date"]=pd.to_datetime(newdf["Date"])

# set the date column as index of dataframe df

newdf=newdf.set_index('Date')

# plot the graph of the dataframe df that is line

newdf.plot()

Selection under condition in data frame ,along with Group by clause in Pandas dataframe

import pandas

df=pandas.read_csv(".//abc.csv")

df['date_t']=df.ds.astype(dtype='datetime64[ms]')

df['month'] = df['date_t'].dt.month

df['year'] = df['date_t'].dt.year

df.info()

df_tmp = df.groupby(['date_t','month','year'])['yhat'].sum().reset_index().sort_values( 'yhat',ascending = False)

r=df_tmp[((df_tmp['month'] == 1)|(df_tmp['month'] == 2)) & (df_tmp['year'] == 2002)].groupby(['month','year'])['yhat'].apply(lambda grp: grp.nlargest(3).mean())

r.to_csv('./Average_of_t3.csv')


Sunday, 10 December 2023

Using tkinter Class in python To plot a graph.

from matplotlib.backends.backend_tkagg import FigureCanvasTkAgg

from matplotlib.figure import Figure 

import tkinter

# Create the default window 

win = tkinter.Tk() 

win.title("Welcome") 

win.geometry('800x600') 

  

# Create the list of options 

options_list=Flist.tolist()



# Variable to keep track of the option 

# selected in OptionMenu 

value_inside = tkinter.StringVar(win) 

  

# Set the default value of the variable 

value_inside.set("Select an Option") 

  

my_menu = tkinter.OptionMenu(win, value_inside, *options_list) 

my_menu.pack() 

  

 

def print_answers(): 

    print("Selected Option: '{}' ".format(value_inside.get())) 

    

    data_view = data3.query("`ABC name`=='{}' ".format(value_inside.get())).groupby(['ABC name','Hour']).agg({'Tra': 'sum'})

    print(data3)

    fig1=data_view.plot(kind="bar",title='Graph').get_figure();

    global plot1 

    plot1=FigureCanvasTkAgg(fig1,root)

    

    plot1.draw() 

  

    # placing the canvas on the Tkinter window 

    plot1.get_tk_widget().pack() 

    return None

  

def print_clear(): 

    print("clear the figure")

    plot1.get_tk_widget().forget()

    return None

    


# Submit button 

submit_button = tkinter.Button(root, text='Submit', command=print_answers) 

submit_button.pack() 


clear_button = tkinter.Button(root, text='Clear', command=print_clear) 

clear_button.pack() 


Saturday, 9 December 2023

Adding Colums to CSV and Write to Disk , Combine it, Then Query and Plot the Result in Python.

 import os

import pandas as pd

source_folder = r"D:\\original data"

dest_folder = r"D:\\Data_With_Time\\"

data2=pd.DataFrame()

for file in os.listdir(source_folder):

    if file.startswith("abc_file")  and file.endswith(".csv"):

        r=file.rfind("_")

        h=file[r:][9:13]

        dt=file[r:][1:9]

        fn=file

        data= pd.read_csv("D:\\original data\\"+file+" ")

        data['SHour']= h

        data['SDate']= dt

        data['filename']=fn

        data['DATA_TRA']=data['DATA_TRA']/3000

        data2=data2.append(data)

        data.to_csv("D:\\updated_"+file)

data2.to_csv("D:\\combine.csv")

data3=pd.read_csv("D:\\combine.csv")

data3=data3[['Short name','Date','Hour','Tra']]

data3 = data3.query("`name of Equipment`=='PIPE'").groupby(['name','Hour']).agg({'Tra': 'sum'})

print(data3)

data3.plot(kind="bar",title='Graph')

 #Comparing the columns of two CSVs files

import os
import pandas
mylist=[]
source_folder = "./data"
for file in os.listdir(source_folder):
    if file.startswith("abc_")  and file.endswith(".csv"):
        #print(file)
        df=pandas.read_csv(".//data//"+file+"")
        mylist.append(df.columns)
#print(mylist[1])
res = [c == d for c, d in zip(mylist[0], mylist[1])]
print(all(res))