62.Example Programs using numpy and pandas
Add two matrix and find the transpose of the result ( university question)
def readmatrix(x,r,c):
for i in range(r):
for j in range(c):
x[i][j]=int(input('enter elements row by row'))
import numpy as np
r1=int(input('rows of a'))
c1=int(input('columns of a'))
r2=int(input('rows of b'))
c2=int(input('columns of b'))
if r1!=r2 or c1!=c2:
print("cant add matrices")
Solving system of linear equations(university questions)
let 2x1+3x2 +5x3= 10
3x1-2x2+x3=3
x1+5x2+7x3=8
import numpy as np
A=np.array([[ 2 , 3, 5],
[ 3, -2 ,1],
[ 1, 5 , 7 ]])
b=np.array([10,3,8])
x=np.linalg.solve(A,b)
print(x)
Writing data to an excel file using pandas
# dictionary of lists
# creating a dataframe
df = pd.DataFrame([[1,' Linus Torvalds','Finland','Linux Kernel ',1991],
[2,'Tim Berners-Lee','England','World Wide Web',1990],
[3,'Guido van Rossum','Netherlands','Python',1991]],
columns=['SN','Name','Country','Contribution','Year'])
print("data frame with defaut index=",df)
df=df.set_index('SN')
print("data frame with SN as index=",df)
print(df)
df.to_csv('inventors.csv')
df = pd.read_csv("Automobile_data.csv",
na_values={'price':["?","n.a"],
'stroke':["?","n.a"],
'horsepower':["?","n.a"],
'peak-rpm':["?","n.a"],
'average-mileage':["?","n.a"]})
print (df)
df.to_csv("Automobile_data.csv")
def readmatrix(x,r,c):
for i in range(r):
for j in range(c):
x[i][j]=int(input('enter elements row by row'))
import numpy as np
r1=int(input('rows of a'))
c1=int(input('columns of a'))
r2=int(input('rows of b'))
c2=int(input('columns of b'))
if r1!=r2 or c1!=c2:
print("cant add matrices")
else:
A=np.zeros((r1,c1))
print("Enter the elements of A")
readmatrix(A,r1,c1)
B=np.zeros((r2,c2))
print("Enter the elements of B")
readmatrix(B,r2,c2)
print("Matrix A")
print(A)
print("Matrix B")
print(B)
C=A+B
print("sum")
print(C)
print("transpose of sum")
print(C.T)
A=np.zeros((r1,c1))
print("Enter the elements of A")
readmatrix(A,r1,c1)
B=np.zeros((r2,c2))
print("Enter the elements of B")
readmatrix(B,r2,c2)
print("Matrix A")
print(A)
print("Matrix B")
print(B)
C=A+B
print("sum")
print(C)
print("transpose of sum")
print(C.T)
let 2x1+3x2 +5x3= 10
3x1-2x2+x3=3
x1+5x2+7x3=8
import numpy as np
A=np.array([[ 2 , 3, 5],
[ 3, -2 ,1],
[ 1, 5 , 7 ]])
b=np.array([10,3,8])
x=np.linalg.solve(A,b)
print(x)
Find the eigen value and eigen vector of a matrix
import numpy as np
A=np.array([[ 2 , 3, 5],
[ 3, -2 ,1],
[ 1, 5 , 7 ]])
e,v=np.linalg.eig(A)
print(e)
[-2.81422161 0.49572305 9.31849856]print(v)
[[ 0.09368857 -0.64029415 0.61137707] [-0.89093813 -0.55826909 0.2289721 ]
[ 0.44435537 0.5275974 0.75748918]]
print(v[:,0]*e[0]) # eX
[-0.2636604 2.50729735 -1.25051449]print(A.dot(v[:,0])) # AX
[-0.2636604 2.50729735 -1.25051449]# Note that AX=eX
Creating a dataframe from a list of data and setting the index
import pandas as pd
#initialize a dataframe
df = pd.DataFrame(
[[21, 'Amol', 72, 67],
[23, 'Lini', 78, 69],
[32, 'Kiku', 74, 56],
[52, 'Ajit', 54, 76]],
columns=['rollno', 'name', 'physics', 'botony'])
print('DataFrame with default index\n', df)
#set column as index
df = df.set_index('rollno')
print('\nDataFrame with column as index\n',df)
DataFrame with default index rollno name physics botony 0 21 Amol 72 67 1 23 Lini 78 69 2 32 Kiku 74 56 3 52 Ajit 54 76 DataFrame with column as index name physics botony rollno 21 Amol 72 67 23 Lini 78 69 32 Kiku 74 56 52 Ajit 54 76
import pandas as pd
# create dataframe
df_marks = pd.DataFrame({'name': ['Somu', 'Kiku', 'Amol', 'Lini'],
'physics': [68, 74, 77, 78],
'chemistry': [84, 56, 73, 69],
'algebra': [78, 88, 82, 87]})
# create excel writer object
writer = pd.ExcelWriter('output.xlsx')
# write dataframe to excel
df_marks.to_excel(writer)
# save the excel
writer.save()
print('DataFrame is written successfully to Excel File.')
Reading data from excel file using xlrd package
# Program to extract a particular row value
import xlrd
loc = ("stud.xlsx")
wb = xlrd.open_workbook(loc)
sheet = wb.sheet_by_index(0)
#extracting column names
print(sheet.cell_value(0, 0),sheet.cell_value(0, 1),sheet.cell_value(0, 2))
for i in range(1,sheet.nrows):
print(sheet.row_values(i))
Write Python program to write the data given below to a CSV file.(university question)
SN Name Country Contribution Year
1 Linus Torvalds Finland Linux Kernel 1991
2 Tim Berners-Lee England World Wide Web 1990
3 Guido van Rossum Netherlands Python 1991
# importing pandas as pd
import pandas as pd # dictionary of lists
# creating a dataframe
df = pd.DataFrame([[1,' Linus Torvalds','Finland','Linux Kernel ',1991],
[2,'Tim Berners-Lee','England','World Wide Web',1990],
[3,'Guido van Rossum','Netherlands','Python',1991]],
columns=['SN','Name','Country','Contribution','Year'])
print("data frame with defaut index=",df)
df=df.set_index('SN')
print("data frame with SN as index=",df)
print(df)
df.to_csv('inventors.csv')
Write Python program to write the data given below to a CSV file ( university question)
Reg_no Name Sub_Mark1 Sub_Mark2 Sub_Mark3
10001 Jack 76 88 76
10002 John 77 84 79
10003 Alex 74 79 81
# importing pandas as pd
import pandas as pd
df = pd.DataFrame([[10001,'Jack',76,88,76],
[10002,'John',77,84,79],
[10003,'Alex',74,79,81]],
columns=['Reg_no','Name','Sub_mark1','Sub_mark2','Sub_mark3'])
print("data frame with defaut index=",df)
df=df.set_index('Reg_no')
print("data frame with SN as index=",df)
print(df)
df.to_csv('student.csv')
Consider a CSV file ‘employee.csv’ with the following columns(name, gender, start_date ,salary, team).Write commands to do the following using panda library. ( university question)
1. print first 7 records from employees file
2. print all employee names in alphabetical order
3. find the name of the employee with highest salary
4. list the names of male employees
5. to which all teams employees belong
import pandas as pd
df=pd.read_csv('employee.csv')
print("1.print 7 records from employee file")
print(df.head(7))
print("2.employee name in alphabetical order")
print(df.sort_values(by='name')['name'])
print("3.name of employee with highest salary")
print(df.sort_values(by='salary',ascending=False).head(1)['name'])
print("4.list the name of male employees")
print(df[df['gender']=='M']['name'])
print("5.to which all team employee belongs")
print(df['team'].unique())
Create a data frame from the dictionary of lists
import pandas as pd
# dictionary of lists
dict = {'name':["aparna", "pankaj", "sudhir", "Geeku"],
'degree': ["MBA", "BCA", "M.Tech", "MBA"],
'score':[90, 40, 80, 98]}
# creating a dataframe from a dictionary
df = pd.DataFrame(dict)
print(df)
Given a file “auto.csv” of automobile data with the fields index, company, body-style, wheel-base, length, engine-type, num-of-cylinders, horsepower, average-mileage, and price, write Python codes using Pandas to
1) Clean and Update the CSV file
2)Find the most expensive car company name
3)Print all toyota car details
4) Print total cars of all companies
5) Find the highest priced car of all companies
6)Find the average mileage of all companies
7)Sort all cars by Price columnReading the data file and showing the first five records
import pandas as pd
df = pd.read_csv("Automobile_data.csv")
df.head(5)
index | company | body-style | wheel-base | length | engine-type | num-of-cylinders | horsepower | average-mileage | price | |
---|---|---|---|---|---|---|---|---|---|---|
0 | 0 | alfa-romero | convertible | 88.6 | 168.8 | dohc | four | 111 | 21 | 13495.0 |
1 | 1 | alfa-romero | convertible | 88.6 | 168.8 | dohc | four | 111 | 21 | 16500.0 |
2 | 2 | alfa-romero | hatchback | 94.5 | 171.2 | ohcv | six | 154 | 19 | 16500.0 |
3 | 3 | audi | sedan | 99.8 | 176.6 | ohc | four | 102 | 24 | 13950.0 |
4 | 4 | audi | sedan | 99.4 | 176.6 | ohc | five | 115 | 18 | 17450.0 |
This will show last 7 rows
df.tail(7)1) Clean and Update the CSV file
Consider NaN values for n.a and ?
import pandas as pddf = pd.read_csv("Automobile_data.csv",
na_values={'price':["?","n.a"],
'stroke':["?","n.a"],
'horsepower':["?","n.a"],
'peak-rpm':["?","n.a"],
'average-mileage':["?","n.a"]})
print (df)
df.to_csv("Automobile_data.csv")
2)Find the most expensive car company name
import pandas as pd
df = pd.read_csv("Automobile_data.csv")
df = df [['company','price']][df.price==df['price'].max()]
df
df = pd.read_csv("stud.csv")
print(df)
rollno name place mark
0 101 binu ernkulam 45
1 103 ashik alleppey 35
2 102 faisal kollam 48
3 105 biju kotayam 25
4 106 anu thrisur 25
5 107 padma kylm 25
df = pd.read_csv("Automobile_data.csv")
df = df [['company','price']][df.price==df['price'].max()]
df
output
company price
35 mercedes-benz 45400.0
3) Print all toyota car details
import pandas as pd
df = pd.read_csv("Automobile_data.csv")
print(df[df['company']=='toyota'])
OR
import pandas as pd
df = pd.read_csv("Automobile_data.csv")
car_Manufacturers = df.groupby('company')
toyotaDf = car_Manufacturers.get_group('toyota')
toyotaDf
4)Print total cars of all companies
import pandas as pd
df = pd.read_csv("Automobile_data.csv")
df.groupby('company')['company'].count()
OR
import pandas as pd
df['company'].value_counts()
5) Find the highest priced car of all companies
import pandas as pd
df = pd.read_csv("Automobile_data.csv")
df.groupby('company')[['company','price']].max()
6)Find the average mileage of all companies
import pandas as pd
df = pd.read_csv("Automobile_data.csv")
df.groupby('company')[['company','average-mileage']].mean()
7)Sort all cars by Price column
import pandas as pd
df = pd.read_csv("Automobile_data.csv")
df.sort_values(by=['price', 'horsepower'], ascending=False)[['company','price']]
Create a stud.csv file containing rollno,name,place and mark of students. Use this file and do the following ( university question)
Read and display the file
contents
df = pd.read_csv("stud.csv")
print(df)
rollno name place mark
0 101 binu ernkulam 45
1 103 ashik alleppey 35
2 102 faisal kollam 48
3 105 biju kotayam 25
4 106 anu thrisur 25
5 107 padma kylm 25
display the top 10 rows
df.head(10)
display the bottom 5 rows
df.tail(5)
Set rollno as index
df=df.set_index('rollno')
print(df)
name place mark
rollno
101 binu ernkulam 45
103 ashik alleppey 35
102 faisal kollam 48
105 biju kotayam 25
106 anu thrisur 25
107 padma kylm 25
df=df.to_string(index=False)
print(df)
name mark
binu 45
ashik 35
faisal 48
biju 25
anu 25
padma 25
df=df.sort_values('name')
print(df)
name mark
rollno
106 anu 25
103 ashik 35
105 biju 25
101 binu 45
102 faisal 48
107 padma 25
df=df.set_index('rollno')
print(df)
name place mark
rollno
101 binu ernkulam 45
103 ashik alleppey 35
102 faisal kollam 48
105 biju kotayam 25
106 anu thrisur 25
107 padma kylm 25
Display name and mark
df=df.to_string(index=False)
print(df)
name mark
binu 45
ashik 35
faisal 48
biju 25
anu 25
padma 25
rollno,Name and mark in the order
of name
df=df.sort_values('name')
print(df)
name mark
rollno
106 anu 25
103 ashik 35
105 biju 25
101 binu 45
102 faisal 48
107 padma 25
Display the rollno,name, mark in the descending order of mark
df=df.sort_values(by='mark',ascending=False)
print(df)
name mark
rollno
102 faisal 48
101 binu 45
103 ashik 35
106 anu 25
105 biju 25
107 padma 25
Find the average mark,median and mode
print(df['mark'].mean())
print(df['mark'].median())
print(df['mark'].mode())
33.833333333333336
30.0
25
df=df.sort_values(by='mark',ascending=False)
print(df)
name mark
rollno
102 faisal 48
101 binu 45
103 ashik 35
106 anu 25
105 biju 25
107 padma 25
Find the average mark,median and mode
print(df['mark'].mean())
print(df['mark'].median())
print(df['mark'].mode())
33.833333333333336
30.0
25
Find minimum and maximum marks
print(df['mark'].min())
print(df['mark'].max())
25
48
print(df['mark'].min())
print(df['mark'].max())
25
48
variance and standard deviation of marks
print(df['mark'].var())
print(df['mark'].std())
112.16666666666667 10.59087657687817
details of passed students mark>=25
df[df['mark']>25]display the histogram of marks
import matplotlib.pyplot as plt
plt.hist(df['mark'])
or
df.hist(column='mark')
remove the place column
df.drop(['place'],axis=1,inplace=True)
print(df)
rollno name mark 0 101 binu 45
1 103 ashik 35
2 102 faisal 48
3 105 biju 25
4 106 ann 25
5 107 padma 25
Set the mark of student with rollno 103 to 45
df.at[df['rollno']==103,'mark']=45
df.loc[df['rollno']==103,'mark']=45
display the column name and types
print(df.dtypes)
display the column names
print(df.columns)
Find the number of rows and columns
print(df.shape)
Write Python program to write the following University topper data of CSE branch to a CSV file.( University Question)
RegNo Name Semester College CGPA
ABC123 Ganesh Kumar S8 ABC 9.8
ECH265 John Mathew S7 ECH 9.9
FET345 Reena K S6 FET 9.7
GMT734 Adil M S5 GMT 9.75
import pandas as pd
# dictionary of lists
# creating a dataframe
df = pd.DataFrame([['ABC123','Ganesh Kumar','S8','ABC',9.8],
['ECH265','John Mathew','S7','ECH',9.9],
['FET345','Reena K','S6','FET',9.7],
['GMT734','Adil M','S5','GMT',9.75]],
columns=['Roll.No','Name','Semester','College','CGPA'])
print("Data frame with defaut index")
print(df)
df=df.set_index('Roll.No')
print("Data frame with Roll.No as index")
print(df)
df.to_csv('toppers.csv')
Output:
Data frame with defaut index Roll.No Name Semester College CGPA 0 ABC123 Ganesh Kumar S8 ABC 9.80 1 ECH265 John Mathew S7 ECH 9.90 2 FET345 Reena K S6 FET 9.70 3 GMT734 Adil M S5 GMT 9.75 Data frame with Roll.No as index Name Semester College CGPA Roll.No ABC123 Ganesh Kumar S8 ABC 9.80 ECH265 John Mathew S7 ECH 9.90 FET345 Reena K S6 FET 9.70 GMT734 Adil M S5 GMT 9.75
This can also be done with csv package
import csv
datarows =[['ABC123','Ganesh Kumar','S8','ABC',9.8],
['ECH265','John Mathew','S7','ECH',9.9],
['FET345','Reena K','S6','FET',9.7],
['GMT734','Adil M','S5','GMT',9.75]]
titles=['Roll.No','Name','Semester','College','CGPA']
file=open('cstoppers.csv',"w")
csvwriter=csv.writer(file)
csvwriter.writerow(titles)
csvwriter.writerows(datarows)
print("CSV file cstoppers.csv created")
file.close()
Write a Pandas program to read a CSV file named ‘Diamond.csv’ with fields carat, cut, color, clarity, depth, and price and to print the following: ( University Question)
1. Number of rows and columns
2. First five rows
import pandas as pd
df=pd.read_csv('Diamond.csv')
print("Number of rows and Columns")
print(df.shape)
print("First Five rows")
print(df.head(5))
Temperature(°C) on different dates is stored in a CSV file as ‘Weather_data.csv’ with the fields date, temperature and humidity. ( University question)
1.Draw a plot of the weather report with date as the x-axis and temperature as the y-axis.
2. Draw a scatter plot of the weather report with date as the x-axis and humidity as the y-axis.
Give appropriate titles and labels in the plot.
import pandas as pd
import matplotlib.pyplot as plt
df=pd.read_csv('weather_data.csv')
print(df)
plt.plot(df['date'],df['temperature'])
plt.xlabel("Date")
plt.ylabel("Temperature")
plt.title("Date Vs Temp")
plt.show()
plt.scatter(df['date'],df['humidity'])
plt.xlabel("Date")
plt.ylabel("Humidity")
plt.title("Date Vs Humidity")
plt.show()
Note:You can also use plot() method of dataframe object
import pandas as pd
df=pd.read_csv('weather_data.csv')
print(df)
df.plot(x='date',y='Temperature')
df.plot(kind='scatter',x='date',y='humidity')
Write Python program to write the data given below to a CSV file named student.csv(University Question)
fields = ['Name', 'Branch', 'Year', 'CGPA']
rows = [ ['Nikhil', 'CSE', '2', '8.0'],
['Sanchit', 'CSE', '2', '9.1'],
['Aditya', 'IT', '2', '9.3'],
['Sagar', 'IT', '1', '9.5']]
import csv
fields = ['Name', 'Branch', 'Year', 'CGPA']
rows = [ ['Nikhil', 'CSE', '2', '8.0'],
['Sanchit', 'CSE', '2', '9.1'],
['Aditya', 'IT', '2', '9.3'],
['Sagar', 'IT', '1', '9.5']]
file=open('student.csv',"w")
csvwriter=csv.writer(file)
csvwriter.writerow(fields)
csvwriter.writerows(rows)
print("CSV file student.csv created")
file.close()
Consider the above student.csv file with fields Name, Branch, Year, CGPA . Write python code using pandas to
1) To find the average CGPA of the students
2) To display the details of all students having CGPA > 9
3) To display the details of all CSE students with CGPA > 9
4) To display the details of student with maximum CGPA
5) To display average CGPA of each branch
import pandas as pd
data=pd.read_csv('student.csv')
meangrade=data['CGPA'].mean()
print(data)
print("1.Average Grade=",meangrade)
print("2.CGPA>9",data[data['CGPA']>9])
print("3.CS CGPA>9",data[ (data['Branch']=='CSE') & (data['CGPA']>=9)])
max=data['CGPA'].max()
print("4.mac cgpa",data[data['CGPA']==max])
x=data.groupby('Branch')['CGPA'].mean()
print("5.Average cgpa of each branch",x)
Comments
Post a Comment