Basic of Pandas data frame
import pandas
l = [["a", 12, 12], [None, 12.3, 33.], ["b", 12.3, 123], ["a", 1, 1]]df = pandas.DataFrame(l, columns=["a", "b", "c"])
print(df)
Loading the data
df=pd.read_csv("D:\\Pandas\\data.csv")
Printing its column
print(df.columns)
Query the columns
result_df = df.query("colname=='valueincolum'").groupby(['columnname']).agg({'columname': 'sum'})
Example #sum_df = df.groupby(['year','time']).agg({'cricketscore': 'sum', 'Totalscore': 'sum'})
Combining the two dataframes
df3=pd.concat([df, result_df])
Remaining the columns of a dataframe
df.rename(columns={"Transport_Cost(PKR)":"TransferCostPKR","PerUnitPrice(PKR)":"UnitPrice_in_PKR"},inplace=True)
df.info()
Query the data set using Pandas
import pandas as pd
df=pd.read_csv("data.csv")
df=df[["car_model_name","car_type_name","car_Speed_Miles"]]
df1=df[(df.car_model_name=='COONDAA') & (df.car_type_name.str.contains("CKIAA"))]
df1=df1.groupby(["car_model_name","car_type_name"]).agg({'car_Speed_Miles':'sum'})
print(df1)
Making Joins on two data set using Pandas
import pandas as pd
df1=pd.read_csv("1.csv")
df2=pd.read_csv("2.csv")
df3=pd.merge(df1,df2,on='common_column_name',how='inner')
print(df3)
Converting Object to Date and getting Month from the date in Pandas
import pandas as pd
df=pd.read_csv("D:\\Pandas\\Practice\data.csv")
df["Date"] =pd.to_datetime(df["Date"])
if df["Date"].dt.month==1:
print("Jan")
Concatenating and Traverse a list in Pandas
import pandas as pd
df=pd.read_csv('datalist.csv')
l=df['first_name']
names=''
start=0
end=start+4
for i in range(1,4):#1,2,3
if i>1:
start=end
end= start+4
for j in range(start,end):
names=names+"'"+str(l[j])+"'"+","
print(j)
print(names)
Taking the name one by one in a list in Pandas
sample name list:first_name_samples,Tahir,Art,Khalid,Lion,Simon,John,Lota,Sage,Kris,Minna,Table
import pandas as pd
store=[]
df=pd.read_csv('datalist.csv')
l=df['first_name']
names=''
start=0
p=0
end=start+4
for i in range(1,4):#1,2,3
if i>1:
start=end
end= start+4
for j in range(start,end):
names=names+"'"+str(l[j])+"'"+","
store.append(names)
names=''
print(store[2])
Reading Files name from the Folder
import os
y=[]
with os.scandir() as i:
for entry in i:
if entry.is_file():
n=entry.name.split('.')
y.append(n[0])
print(n[0])
print(len(y))
Applying SQLQueries on data frames using pandassql Package
import os
import pandas
import pandasql
df=pandas.read_csv("D:\\Pandas\\data.csv")
query = "SELECT * FROM df "
queryresult=pandasql.sqldf(query)
print(queryresult)
Using List to Prepare a IN Clause String to Query in SQL Script
import pandas as pd
msisdn_list=[]
batch1=''
query = """
SELECT num
FROM tableA a,
tableB b,
WHERE a.abc=b.abc
and num in ({})
"""
df = pd.read_csv('yourdata.csv')
df['num'] = df['num'].astype(str)
df['num']= df['num'].map(lambda m: m.split('.')[0])
num_list=df['num'].unique().tolist()
#num_list=str(num_list)[1:-1]
final_num_list=num_list
for j in range(0,999):
batch1= batch1+"'"+str(final_num_list[j])+"'"+","
batch1=batch1[:-1]
print(batch1)
query= query.format(batch1)
Result= pd.read_sql(query, con)
Result.to_csv('./yourresult.csv', index=False, encoding='utf-8-sig')
Adding File Name as column in data frame
import os
import pandas as pd
fname=[]
for f in os.listdir("D:\\New folder"):
if f.endswith('csv'):
print(f)
fname.append(f)
df = pd.DataFrame()
for i in range(1,len(fname)):
data=pd.read_csv("D:\\New folder\\"+fname[i]+" ", dtype=str)
data['FileName']=fname[i]
df = df.append(data)
print(df)
df.to_csv('.\\abc.csv',index=False)