Saturday, 6 November 2021

Understanding Basic of Pandas

 Basic of Pandas data frame

import pandas

l = [["a", 12, 12], [None, 12.3, 33.], ["b", 12.3, 123], ["a", 1, 1]]

df = pandas.DataFrame(l, columns=["a", "b", "c"])
print(df)

Loading the data
df=pd.read_csv("D:\\Pandas\\data.csv")

Printing its column
print(df.columns)

Query the columns
  result_df = df.query("colname=='valueincolum'").groupby(['columnname']).agg({'columname': 'sum'})

  Example #sum_df = df.groupby(['year','time']).agg({'cricketscore': 'sum', 'Totalscore': 'sum'})

Combining the two dataframes

df3=pd.concat([df, result_df])

 Remaining the columns of a dataframe

 df.rename(columns={"Transport_Cost(PKR)":"TransferCostPKR","PerUnitPrice(PKR)":"UnitPrice_in_PKR"},inplace=True)
df.info()

 

Query the data set  using Pandas
import pandas as pd
df=pd.read_csv("data.csv")
df=df[["car_model_name","car_type_name","car_Speed_Miles"]]
df1=df[(df.car_model_name=='COONDAA') & (df.car_type_name.str.contains("CKIAA"))]
df1=df1.groupby(["car_model_name","car_type_name"]).agg({'car_Speed_Miles':'sum'})
print(df1)

Making Joins on two data set using Pandas
import pandas as pd
df1=pd.read_csv("1.csv")
df2=pd.read_csv("2.csv")
df3=pd.merge(df1,df2,on='common_column_name',how='inner')
print(df3)

Converting Object to Date and getting Month from the date in Pandas

 import pandas as pd
df=pd.read_csv("D:\\Pandas\\Practice\data.csv")
df["Date"]   =pd.to_datetime(df["Date"])
if df["Date"].dt.month==1:
    print("Jan")

Concatenating and Traverse a list  in Pandas

import pandas as pd
df=pd.read_csv('datalist.csv')
l=df['first_name']
names=''
start=0
end=start+4
for i in range(1,4):#1,2,3
    if i>1:
       start=end
       end= start+4
    
    for j in range(start,end):
        names=names+"'"+str(l[j])+"'"+","
        print(j)
print(names)

Taking the name one by one in  a list  in Pandas

sample name list:first_name_samples,Tahir,Art,Khalid,Lion,Simon,John,Lota,Sage,Kris,Minna,Table 


 import pandas as pd
store=[]
df=pd.read_csv('datalist.csv')
l=df['first_name']
names=''
start=0
p=0
end=start+4
for i in range(1,4):#1,2,3
     
    if i>1:
       start=end
       end= start+4
    
    for j in range(start,end):
        names=names+"'"+str(l[j])+"'"+","
    store.append(names)
    names=''       
print(store[2])

Reading Files name from the Folder

import os
y=[]
with os.scandir() as i:
    for entry in i:
        if entry.is_file():
            n=entry.name.split('.')
            y.append(n[0])
            print(n[0])
            print(len(y)) 

Applying SQLQueries on data frames using pandassql Package

import os
import pandas
import pandasql  
df=pandas.read_csv("D:\\Pandas\\data.csv")
query = "SELECT *  FROM df "
queryresult=pandasql.sqldf(query)
print(queryresult) 

 

Using List to Prepare a IN Clause String to Query  in SQL Script

import pandas as pd

msisdn_list=[]

batch1=''

query = """

SELECT  num

FROM tableA a,

             tableB b,

WHERE a.abc=b.abc

and num  in ({})

"""

 df = pd.read_csv('yourdata.csv')

df['num'] = df['num'].astype(str)

df['num']= df['num'].map(lambda m: m.split('.')[0])

num_list=df['num'].unique().tolist()

#num_list=str(num_list)[1:-1]

final_num_list=num_list

for j in range(0,999):

    batch1= batch1+"'"+str(final_num_list[j])+"'"+","

batch1=batch1[:-1]

print(batch1)

query= query.format(batch1)

Result= pd.read_sql(query, con)

Result.to_csv('./yourresult.csv', index=False, encoding='utf-8-sig')

 Adding File Name as column in data frame

import os
import pandas as pd
fname=[]
for f in os.listdir("D:\\New folder"):
    if f.endswith('csv'):
    print(f)
    fname.append(f)
df = pd.DataFrame()
for i in range(1,len(fname)):
    data=pd.read_csv("D:\\New folder\\"+fname[i]+" ", dtype=str)
    data['FileName']=fname[i]
    df = df.append(data)
print(df)
df.to_csv('.\\abc.csv',index=False)