62.Example Programs using numpy and pandas

Add two matrix and find the transpose of the result ( university question)
def readmatrix(x,r,c):
   for i in range(r):
        for j in range(c):
            x[i][j]=int(input('enter elements row by row'))
import numpy as np
r1=int(input('rows of a'))
c1=int(input('columns of a'))
r2=int(input('rows of b'))
c2=int(input('columns of b'))
if r1!=r2 or c1!=c2:
   print("cant add matrices")

else:
   A=np.zeros((r1,c1))
   print("Enter the elements of A")
   readmatrix(A,r1,c1)
   B=np.zeros((r2,c2))
   print("Enter the elements of B")
   readmatrix(B,r2,c2)
   print("Matrix A")
   print(A)
   print("Matrix B")
   print(B)
   C=A+B
   print("sum")
   print(C)
   print("transpose of sum")
   print(C.T)

Solving system of linear equations(university questions)
let 2x1+3x2 +5x3= 10
3x1-2x2+x3=3
x1+5x2+7x3=8

import numpy as np
A=np.array([[ 2 , 3, 5],
[ 3, -2 ,1],
[ 1, 5 , 7 ]])
b=np.array([10,3,8])
x=np.linalg.solve(A,b)
print(x)

Find the eigen value and eigen vector of a matrix

import numpy as np

A=np.array([[ 2 , 3, 5],

[ 3, -2 ,1],

[ 1, 5 , 7 ]])

e,v=np.linalg.eig(A)

print(e)

[-2.81422161 0.49572305 9.31849856]

print(v)

[[ 0.09368857 -0.64029415 0.61137707]

[-0.89093813 -0.55826909 0.2289721 ]

[ 0.44435537 0.5275974 0.75748918]]

print(v[:,0]*e[0]) # eX

[-0.2636604 2.50729735 -1.25051449]

print(A.dot(v[:,0])) # AX

[-0.2636604 2.50729735 -1.25051449]

# Note that AX=eX

Creating a dataframe from a list of data and setting the index

import pandas as pd

#initialize a dataframe

df = pd.DataFrame(

[[21, 'Amol', 72, 67],

[23, 'Lini', 78, 69],

[32, 'Kiku', 74, 56],

[52, 'Ajit', 54, 76]],

columns=['rollno', 'name', 'physics', 'botony'])

print('DataFrame with default index\n', df)

#set column as index

df = df.set_index('rollno')

print('\nDataFrame with column as index\n',df)

DataFrame with default index
    rollno  name  physics  botony
0      21  Amol       72      67
1      23  Lini       78      69
2      32  Kiku       74      56
3      52  Ajit       54      76

DataFrame with column as index
         name  physics  botony
rollno                       
21      Amol       72      67
23      Lini       78      69
32      Kiku       74      56
52      Ajit       54      76

Writing data to an excel file using pandas

import pandas as pd

# create dataframe

df_marks = pd.DataFrame({'name': ['Somu', 'Kiku', 'Amol', 'Lini'],

'physics': [68, 74, 77, 78],

'chemistry': [84, 56, 73, 69],

'algebra': [78, 88, 82, 87]})

# create excel writer object

writer = pd.ExcelWriter('output.xlsx')

# write dataframe to excel

df_marks.to_excel(writer)

# save the excel

writer.save()

print('DataFrame is written successfully to Excel File.')

Reading data from excel file using xlrd package

# Program to extract a particular row value

import xlrd

loc = ("stud.xlsx")

wb = xlrd.open_workbook(loc)

sheet = wb.sheet_by_index(0)

#extracting column names

print(sheet.cell_value(0, 0),sheet.cell_value(0, 1),sheet.cell_value(0, 2))

for i in range(1,sheet.nrows):

print(sheet.row_values(i))

Write Python program to write the data given below to a CSV file.(university question)

SN Name Country Contribution Year

1 Linus Torvalds Finland Linux Kernel 1991

2 Tim Berners-Lee England World Wide Web 1990

3 Guido van Rossum Netherlands Python 1991

# importing pandas as pd

import pandas as pd
# dictionary of lists
# creating a dataframe
df = pd.DataFrame([[1,' Linus Torvalds','Finland','Linux Kernel ',1991],
   [2,'Tim Berners-Lee','England','World Wide Web',1990],
   [3,'Guido van Rossum','Netherlands','Python',1991]],
   columns=['SN','Name','Country','Contribution','Year'])
print("data frame with defaut index=",df)
df=df.set_index('SN')
print("data frame with SN as index=",df)
print(df)
df.to_csv('inventors.csv')

Write Python program to write the data given below to a CSV file ( university question)

Reg_no Name Sub_Mark1 Sub_Mark2 Sub_Mark3

10001 Jack 76 88 76

10002 John 77 84 79

10003 Alex 74 79 81

# importing pandas as pd

import pandas as pd

df = pd.DataFrame([[10001,'Jack',76,88,76],

[10002,'John',77,84,79],

[10003,'Alex',74,79,81]],

columns=['Reg_no','Name','Sub_mark1','Sub_mark2','Sub_mark3'])

print("data frame with defaut index=",df)

df=df.set_index('Reg_no')

print("data frame with SN as index=",df)

print(df)

df.to_csv('student.csv')

Consider a CSV file ‘employee.csv’ with the following columns(name, gender, start_date ,salary, team).Write commands to do the following using panda library. ( university question)

1. print first 7 records from employees file

2. print all employee names in alphabetical order

3. find the name of the employee with highest salary

4. list the names of male employees

5. to which all teams employees belong

import pandas as pd

df=pd.read_csv('employee.csv')

print("1.print 7 records from employee file")

print(df.head(7))

print("2.employee name in alphabetical order")

print(df.sort_values(by='name')['name'])

print("3.name of employee with highest salary")

print(df.sort_values(by='salary',ascending=False).head(1)['name'])

print("4.list the name of male employees")

print(df[df['gender']=='M']['name'])

print("5.to which all team employee belongs")

print(df['team'].unique())

Create a data frame from the dictionary of lists

import pandas as pd

# dictionary of lists

dict = {'name':["aparna", "pankaj", "sudhir", "Geeku"],

'degree': ["MBA", "BCA", "M.Tech", "MBA"],

'score':[90, 40, 80, 98]}

# creating a dataframe from a dictionary

df = pd.DataFrame(dict)

print(df)

Given a file “auto.csv” of automobile data with the fields index, company, body-style, wheel-base, length, engine-type, num-of-cylinders, horsepower, average-mileage, and price, write Python codes using Pandas to

1) Clean and Update the CSV file

2)Find the most expensive car company name

3)Print all toyota car details

4) Print total cars of all companies

5) Find the highest priced car of all companies

6)Find the average mileage of all companies

7)Sort all cars by Price column

Reading the data file and showing the first five records

import pandas as pd

df = pd.read_csv("Automobile_data.csv")

df.head(5)

index	company	body-style	wheel-base	length	engine-type	num-of-cylinders	horsepower	average-mileage	price
0	0	alfa-romero	convertible	88.6	168.8	dohc	four	111	21	13495.0
1	1	alfa-romero	convertible	88.6	168.8	dohc	four	111	21	16500.0
2	2	alfa-romero	hatchback	94.5	171.2	ohcv	six	154	19	16500.0
3	3	audi	sedan	99.8	176.6	ohc	four	102	24	13950.0
4	4	audi	sedan	99.4	176.6	ohc	five	115	18	17450.0

This will show last 7 rows

df.tail(7)

1) Clean and Update the CSV file

Consider NaN values for n.a and ?

import pandas as pd
df = pd.read_csv("Automobile_data.csv",
na_values={'price':["?","n.a"],
'stroke':["?","n.a"],
'horsepower':["?","n.a"],
'peak-rpm':["?","n.a"],
'average-mileage':["?","n.a"]})
print (df)
df.to_csv("Automobile_data.csv")

2)Find the most expensive car company name

import pandas as pd
df = pd.read_csv("Automobile_data.csv")
df = df [['company','price']][df.price==df['price'].max()]
df

output

company price

35 mercedes-benz 45400.0

3) Print all toyota car details

import pandas as pd

df = pd.read_csv("Automobile_data.csv")

print(df[df['company']=='toyota'])

import pandas as pd

df = pd.read_csv("Automobile_data.csv")

car_Manufacturers = df.groupby('company')

toyotaDf = car_Manufacturers.get_group('toyota')

toyotaDf

4)Print total cars of all companies

import pandas as pd

df = pd.read_csv("Automobile_data.csv")

df.groupby('company')['company'].count()

import pandas as pd

df['company'].value_counts()

5) Find the highest priced car of all companies

import pandas as pd

df = pd.read_csv("Automobile_data.csv")

df.groupby('company')[['company','price']].max()

6)Find the average mileage of all companies

import pandas as pd

df = pd.read_csv("Automobile_data.csv")

df.groupby('company')[['company','average-mileage']].mean()

7)Sort all cars by Price column

import pandas as pd

df = pd.read_csv("Automobile_data.csv")

df.sort_values(by=['price', 'horsepower'], ascending=False)[['company','price']]

Create a stud.csv file containing rollno,name,place and mark of students. Use this file and do the following ( university question)

Read and display the file contents

import pandas as pd
df = pd.read_csv("stud.csv")
print(df)
rollno name place mark
0 101 binu ernkulam 45
1 103 ashik alleppey 35
2 102 faisal kollam 48
3 105 biju kotayam 25
4 106 anu thrisur 25
5 107 padma kylm 25

display the top 10 rows

df.head(10)

display the bottom 5 rows

df.tail(5)

Set rollno as index
df=df.set_index('rollno')
print(df)
               name place mark
rollno
101        binu ernkulam 45
103        ashik alleppey 35
102          faisal kollam 48
105          biju kotayam 25
106       anu thrisur 25
107       padma kylm 25

Display name and mark

df=df[['name','mark']]
df=df.to_string(index=False)
print(df)
name        mark
binu           45
ashik             35
faisal         48
biju          25
anu          25
padma       25

rollno,Name and mark in the order of name

df=df[['name','mark']]
df=df.sort_values('name')
print(df)
           name mark
rollno
106    anu 25
103    ashik 35
105   biju    25
101    binu   45
102    faisal 48
107    padma 25

Display the rollno,name, mark in the descending order of mark
df=df.sort_values(by='mark',ascending=False)
print(df)
               name mark
rollno
102       faisal   48
101        binu 45
103        ashik    35
106        anu    25
105        biju        25
107        padma   25

Find the average mark,median and mode
print(df['mark'].mean())
print(df['mark'].median())
print(df['mark'].mode())
33.833333333333336
30.0
25

Find minimum and maximum marks
print(df['mark'].min())
print(df['mark'].max())
25
48

variance and standard deviation of marks

print(df['mark'].var())

print(df['mark'].std())

112.16666666666667

10.59087657687817

details of passed students mark>=25

df[df['mark']>25]

display the histogram of marks

import matplotlib.pyplot as plt

plt.hist(df['mark'])

df.hist(column='mark')

remove the place column

df.drop(['place'],axis=1,inplace=True)

print(df)

rollno name mark

0 101 binu 45

1 103 ashik 35

2 102 faisal 48

3 105 biju 25

4 106 ann 25

5 107 padma 25

Set the mark of student with rollno 103 to 45

df.at[df['rollno']==103,'mark']=45

df.loc[df['rollno']==103,'mark']=45

display the column name and types

print(df.dtypes)

display the column names

print(df.columns)

Find the number of rows and columns

print(df.shape)

Write Python program to write the following University topper data of CSE branch to a CSV file.( University Question)

RegNo Name Semester College CGPA

ABC123 Ganesh Kumar S8 ABC 9.8

ECH265 John Mathew S7 ECH 9.9

FET345 Reena K S6 FET 9.7

GMT734 Adil M S5 GMT 9.75

import pandas as pd

# dictionary of lists

# creating a dataframe

df = pd.DataFrame([['ABC123','Ganesh Kumar','S8','ABC',9.8],

['ECH265','John Mathew','S7','ECH',9.9],

['FET345','Reena K','S6','FET',9.7],

['GMT734','Adil M','S5','GMT',9.75]],

columns=['Roll.No','Name','Semester','College','CGPA'])

print("Data frame with defaut index")

print(df)

df=df.set_index('Roll.No')

print("Data frame with Roll.No as index")

print(df)

df.to_csv('toppers.csv')

Output:

Data frame with defaut index
  Roll.No          Name Semester College  CGPA
0  ABC123  Ganesh Kumar       S8     ABC  9.80
1  ECH265   John Mathew       S7     ECH  9.90
2  FET345       Reena K       S6     FET  9.70
3  GMT734        Adil M       S5     GMT  9.75
Data frame with Roll.No as index
                 Name Semester College  CGPA
Roll.No                                     
ABC123   Ganesh Kumar       S8     ABC  9.80
ECH265    John Mathew       S7     ECH  9.90
FET345        Reena K       S6     FET  9.70
GMT734         Adil M       S5     GMT  9.75

This can also be done with csv package

import csv

datarows =[['ABC123','Ganesh Kumar','S8','ABC',9.8],

['ECH265','John Mathew','S7','ECH',9.9],

['FET345','Reena K','S6','FET',9.7],

['GMT734','Adil M','S5','GMT',9.75]]

titles=['Roll.No','Name','Semester','College','CGPA']

file=open('cstoppers.csv',"w")

csvwriter=csv.writer(file)

csvwriter.writerow(titles)

csvwriter.writerows(datarows)

print("CSV file cstoppers.csv created")

file.close()

Write a Pandas program to read a CSV file named ‘Diamond.csv’ with fields carat, cut, color, clarity, depth, and price and to print the following: ( University Question)

1. Number of rows and columns

2. First five rows

import pandas as pd

df=pd.read_csv('Diamond.csv')

print("Number of rows and Columns")

print(df.shape)

print("First Five rows")

print(df.head(5))

Temperature(°C) on different dates is stored in a CSV file as ‘Weather_data.csv’ with the fields date, temperature and humidity. ( University question)

1.Draw a plot of the weather report with date as the x-axis and temperature as the y-axis.

2. Draw a scatter plot of the weather report with date as the x-axis and humidity as the y-axis.

Give appropriate titles and labels in the plot.

import pandas as pd

import matplotlib.pyplot as plt

df=pd.read_csv('weather_data.csv')

print(df)

plt.plot(df['date'],df['temperature'])

plt.xlabel("Date")

plt.ylabel("Temperature")

plt.title("Date Vs Temp")

plt.show()

plt.scatter(df['date'],df['humidity'])

plt.xlabel("Date")

plt.ylabel("Humidity")

plt.title("Date Vs Humidity")

plt.show()

Note:You can also use plot() method of dataframe object

import pandas as pd

df=pd.read_csv('weather_data.csv')

print(df)

df.plot(x='date',y='Temperature')

df.plot(kind='scatter',x='date',y='humidity')

Write Python program to write the data given below to a CSV file named student.csv(University Question)

fields = ['Name', 'Branch', 'Year', 'CGPA']

rows = [ ['Nikhil', 'CSE', '2', '8.0'],

['Sanchit', 'CSE', '2', '9.1'],

['Aditya', 'IT', '2', '9.3'],

['Sagar', 'IT', '1', '9.5']]

import csv

fields = ['Name', 'Branch', 'Year', 'CGPA']

rows = [ ['Nikhil', 'CSE', '2', '8.0'],

['Sanchit', 'CSE', '2', '9.1'],

['Aditya', 'IT', '2', '9.3'],

['Sagar', 'IT', '1', '9.5']]

file=open('student.csv',"w")

csvwriter=csv.writer(file)

csvwriter.writerow(fields)

csvwriter.writerows(rows)

print("CSV file student.csv created")

file.close()

Consider the above student.csv file with fields Name, Branch, Year, CGPA . Write python code using pandas to

1) To find the average CGPA of the students

2) To display the details of all students having CGPA > 9

3) To display the details of all CSE students with CGPA > 9

4) To display the details of student with maximum CGPA

5) To display average CGPA of each branch

import pandas as pd

data=pd.read_csv('student.csv')

meangrade=data['CGPA'].mean()

print(data)

print("1.Average Grade=",meangrade)

print("2.CGPA>9",data[data['CGPA']>9])

print("3.CS CGPA>9",data[ (data['Branch']=='CSE') & (data['CGPA']>=9)])

max=data['CGPA'].max()

print("4.mac cgpa",data[data['CGPA']==max])

x=data.groupby('Branch')['CGPA'].mean()

print("5.Average cgpa of each branch",x)

Search This Blog

Python for Machine Learning - CST 283 KTU Minor- Dr Binu V P

62.Example Programs using numpy and pandas

Comments

Post a Comment

Popular posts from this blog

Python For Machine Learning - CST 283 - KTU Minor Notes- Dr Binu V P

KTU Python for machine learning Sample Question Paper and Answer Key Dec 2020

1.Python Introduction