62.Example Programs using numpy and pandas

Add two matrix and find the transpose of the result ( university question)
def readmatrix(x,r,c):
    for i in range(r):
        for j in range(c):
            x[i][j]=int(input('enter elements row by row'))
import numpy as np
r1=int(input('rows of a'))
c1=int(input('columns of a'))
r2=int(input('rows of b'))
c2=int(input('columns of b'))
if r1!=r2 or c1!=c2:
    print("cant add matrices")
else:
    A=np.zeros((r1,c1))
    print("Enter the elements of A")
    readmatrix(A,r1,c1)
    B=np.zeros((r2,c2))
    print("Enter the elements of B")
    readmatrix(B,r2,c2)
    print("Matrix A")
    print(A)
    print("Matrix B")
    print(B)
    C=A+B
    print("sum")
    print(C)
    print("transpose of sum")
    print(C.T)

Solving system of linear equations(university questions)
let 2x1+3x2 +5x3= 10
3x1-2x2+x3=3
x1+5x2+7x3=8


import numpy as np
A=np.array([[ 2 , 3, 5],
[ 3, -2 ,1],
[ 1, 5 , 7 ]])
b=np.array([10,3,8])
x=np.linalg.solve(A,b)
print(x)

Find the eigen value and eigen vector of a matrix
import numpy as np
A=np.array([[ 2 , 3, 5],
[ 3, -2 ,1],
[ 1, 5 , 7 ]])
e,v=np.linalg.eig(A)
print(e)
[-2.81422161 0.49572305 9.31849856]
print(v)
[[ 0.09368857 -0.64029415 0.61137707] 
[-0.89093813 -0.55826909 0.2289721 ] 
[ 0.44435537 0.5275974 0.75748918]]
print(v[:,0]*e[0]) # eX
[-0.2636604 2.50729735 -1.25051449]
print(A.dot(v[:,0])) # AX
[-0.2636604 2.50729735 -1.25051449]
# Note that AX=eX


Creating a dataframe from a list of data and setting the index
import pandas as pd
#initialize a dataframe
df = pd.DataFrame(
[[21, 'Amol', 72, 67],
[23, 'Lini', 78, 69],
[32, 'Kiku', 74, 56],
[52, 'Ajit', 54, 76]],
columns=['rollno', 'name', 'physics', 'botony'])
print('DataFrame with default index\n', df)
#set column as index
df = df.set_index('rollno')
print('\nDataFrame with column as index\n',df)
DataFrame with default index
    rollno  name  physics  botony
0      21  Amol       72      67
1      23  Lini       78      69
2      32  Kiku       74      56
3      52  Ajit       54      76

DataFrame with column as index
         name  physics  botony
rollno                       
21      Amol       72      67
23      Lini       78      69
32      Kiku       74      56
52      Ajit       54      76

Writing data to an excel file using pandas
import pandas as pd
# create dataframe
df_marks = pd.DataFrame({'name': ['Somu', 'Kiku', 'Amol', 'Lini'],
     'physics': [68, 74, 77, 78],
     'chemistry': [84, 56, 73, 69],
     'algebra': [78, 88, 82, 87]})

# create excel writer object
writer = pd.ExcelWriter('output.xlsx')
# write dataframe to excel
df_marks.to_excel(writer)
# save the excel
writer.save()
print('DataFrame is written successfully to Excel File.')

Reading data from excel file using xlrd package
# Program to extract a particular row value
import xlrd
loc = ("stud.xlsx")
wb = xlrd.open_workbook(loc)
sheet = wb.sheet_by_index(0)
#extracting column names
print(sheet.cell_value(0, 0),sheet.cell_value(0, 1),sheet.cell_value(0, 2))
for i in range(1,sheet.nrows):
    print(sheet.row_values(i))

Write Python program to write the data given below to a CSV file.(university question)
SN Name                         Country          Contribution              Year
1     Linus Torvalds          Finland         Linux Kernel              1991
2     Tim Berners-Lee       England        World Wide Web       1990
3     Guido van Rossum    Netherlands   Python                      1991

# importing pandas as pd 
import pandas as pd
# dictionary of lists
# creating a dataframe
df = pd.DataFrame([[1,' Linus Torvalds','Finland','Linux Kernel ',1991],
    [2,'Tim Berners-Lee','England','World Wide Web',1990],
    [3,'Guido van Rossum','Netherlands','Python',1991]],
    columns=['SN','Name','Country','Contribution','Year'])
print("data frame with defaut index=",df)
df=df.set_index('SN')
print("data frame with SN as index=",df)
print(df)
df.to_csv('inventors.csv')

Write Python program to write the data given below to a CSV file ( university question)
Reg_no     Name Sub_Mark1 Sub_Mark2 Sub_Mark3
10001         Jack     76                 88                     76
10002         John     77                 84                     79
10003         Alex     74                 79                     81

# importing pandas as pd 
import pandas as pd
df = pd.DataFrame([[10001,'Jack',76,88,76],
      [10002,'John',77,84,79],
      [10003,'Alex',74,79,81]],
columns=['Reg_no','Name','Sub_mark1','Sub_mark2','Sub_mark3'])
print("data frame with defaut index=",df)
df=df.set_index('Reg_no')
print("data frame with SN as index=",df)
print(df)
df.to_csv('student.csv')

Consider a CSV file ‘employee.csv’ with the following columns(name, gender, start_date ,salary, team).Write commands to do the following using panda library. ( university question)
1. print first 7 records from employees file
2. print all employee names in alphabetical order
3. find the name of the employee with highest salary
4. list the names of male employees
5. to which all teams employees belong
import pandas as pd
df=pd.read_csv('employee.csv')
print("1.print 7 records from  employee file")
print(df.head(7))
print("2.employee name in alphabetical order")
print(df.sort_values(by='name')['name'])
print("3.name of employee with highest salary")
print(df.sort_values(by='salary',ascending=False).head(1)['name'])
print("4.list the name of male employees")
print(df[df['gender']=='M']['name'])
print("5.to which all team employee belongs")
print(df['team'].unique())

Create a data frame from the dictionary of lists
import pandas as pd 
# dictionary of lists 
dict = {'name':["aparna", "pankaj", "sudhir", "Geeku"],
 'degree': ["MBA", "BCA", "M.Tech", "MBA"], 
'score':[90, 40, 80, 98]} 
# creating a dataframe from a dictionary 
df = pd.DataFrame(dict) 
print(df)

Given a file “auto.csv” of automobile data with the fields index, company, body-style, wheel-base, length, engine-type, num-of-cylinders, horsepower, average-mileage, and price, write Python codes using Pandas to
1) Clean and Update the CSV file
2)Find the most expensive car company name
3)Print all toyota car details
4) Print total cars of all companies
5) Find the highest priced car of all companies
6)Find the average mileage of all companies
7)Sort all cars by Price column

Reading the data file and showing the first five records
import pandas as pd
df = pd.read_csv("Automobile_data.csv")
df.head(5)
indexcompanybody-stylewheel-baselengthengine-typenum-of-cylindershorsepoweraverage-mileageprice
00alfa-romeroconvertible88.6168.8dohcfour1112113495.0
11alfa-romeroconvertible88.6168.8dohcfour1112116500.0
22alfa-romerohatchback94.5171.2ohcvsix1541916500.0
33audisedan99.8176.6ohcfour1022413950.0
44audisedan99.4176.6ohcfive1151817450.0
This will show last 7 rows
df.tail(7)

1) Clean and Update the CSV file
Consider NaN values for n.a and  ?
import pandas as pd
df = pd.read_csv("Automobile_data.csv",
na_values={'price':["?","n.a"],
'stroke':["?","n.a"],
'horsepower':["?","n.a"],
'peak-rpm':["?","n.a"],
'average-mileage':["?","n.a"]})
print (df)
df.to_csv("Automobile_data.csv")

2)Find the most expensive car company name
import pandas as pd
df = pd.read_csv("Automobile_data.csv")
df = df [['company','price']][df.price==df['price'].max()]
df
output
        company            price
35     mercedes-benz     45400.0

3) Print all toyota car details
import pandas as pd
df = pd.read_csv("Automobile_data.csv")
print(df[df['company']=='toyota'])
  OR
import pandas as pd
df = pd.read_csv("Automobile_data.csv")
car_Manufacturers = df.groupby('company')
toyotaDf = car_Manufacturers.get_group('toyota')
toyotaDf

4)Print total cars of all companies
import pandas as pd
df = pd.read_csv("Automobile_data.csv")
df.groupby('company')['company'].count()
        OR
import pandas as pd
df['company'].value_counts()

5) Find the highest priced car of all companies
import pandas as pd
df = pd.read_csv("Automobile_data.csv")
df.groupby('company')[['company','price']].max()

6)Find the average mileage of all companies
import pandas as pd
df = pd.read_csv("Automobile_data.csv")
df.groupby('company')[['company','average-mileage']].mean()

7)Sort all cars by Price column
import pandas as pd
df = pd.read_csv("Automobile_data.csv")
df.sort_values(by=['price', 'horsepower'], ascending=False)[['company','price']]

Create a stud.csv file containing rollno,name,place and mark of students. Use this file and do the following ( university question)

Read and display the file contents

import pandas as pd
df = pd.read_csv("stud.csv")
print(df)
rollno name place mark
0 101 binu ernkulam 45
1 103 ashik alleppey 35
2 102 faisal kollam 48
3 105 biju kotayam 25
4 106 anu thrisur 25
5 107 padma kylm 25

display the top 10 rows
df.head(10)

display the bottom 5 rows
df.tail(5)

Set rollno as index
df=df.set_index('rollno')
print(df)
                name place mark
rollno
101          binu ernkulam 45
103          ashik alleppey 35
102          faisal kollam 48
105          biju kotayam 25
106          anu thrisur 25
107         padma kylm 25

Display name and mark 

df=df[['name','mark']]
df=df.to_string(index=False)
print(df)
name          mark
binu               45
ashik             35
faisal             48
biju                25
anu                25
padma           25

rollno,Name and mark in the order of name

df=df[['name','mark']]
df=df.sort_values('name')
print(df)
            name mark
rollno
106     anu       25
103     ashik    35
105     biju      25
101     binu     45
102     faisal    48
107     padma  25

Display the rollno,name, mark in the descending order of mark
df=df.sort_values(by='mark',ascending=False)
print(df)
                name mark
rollno
102          faisal     48
101          binu       45
103          ashik     35
106          anu        25
105          biju        25
107          padma    25

Find the average mark,median and mode
print(df['mark'].mean())
print(df['mark'].median())
print(df['mark'].mode())
33.833333333333336
30.0
25

Find minimum and maximum marks
print(df['mark'].min())
print(df['mark'].max())
25
48

variance and standard deviation of marks  
print(df['mark'].var())
print(df['mark'].std())
112.16666666666667 
10.59087657687817

details of passed students mark>=25
df[df['mark']>25]

display the histogram of marks
import matplotlib.pyplot as plt
plt.hist(df['mark'])
or
df.hist(column='mark')


remove the place column
df.drop(['place'],axis=1,inplace=True)
print(df)
rollno name mark 
0 101 binu 45 
1 103 ashik 35 
2 102 faisal 48 
3 105 biju 25 
4 106 ann 25 
5 107 padma 25

Set the mark of student with rollno 103 to 45
df.at[df['rollno']==103,'mark']=45
df.loc[df['rollno']==103,'mark']=45

display the column name and types
print(df.dtypes)

display the column names
print(df.columns)

Find the number of rows and columns
print(df.shape)

Write Python program to write the following University topper data of CSE branch to a CSV file.( University Question)

RegNo         Name                      Semester     College      CGPA
ABC123    Ganesh Kumar        S8                ABC            9.8
ECH265    John Mathew           S7                ECH           9.9
FET345     Reena K                    S6                FET            9.7
GMT734   Adil M                       S5                GMT           9.75

import pandas as pd 
# dictionary of lists 
# creating a dataframe 
df = pd.DataFrame([['ABC123','Ganesh Kumar','S8','ABC',9.8],
      ['ECH265','John Mathew','S7','ECH',9.9],
      ['FET345','Reena K','S6','FET',9.7],
      ['GMT734','Adil M','S5','GMT',9.75]],
columns=['Roll.No','Name','Semester','College','CGPA'])
print("Data frame with defaut index")
print(df)
df=df.set_index('Roll.No')
print("Data frame with Roll.No as index")
print(df)
df.to_csv('toppers.csv')

Output:
Data frame with defaut index
  Roll.No          Name Semester College  CGPA
0  ABC123  Ganesh Kumar       S8     ABC  9.80
1  ECH265   John Mathew       S7     ECH  9.90
2  FET345       Reena K       S6     FET  9.70
3  GMT734        Adil M       S5     GMT  9.75
Data frame with Roll.No as index
                 Name Semester College  CGPA
Roll.No                                     
ABC123   Ganesh Kumar       S8     ABC  9.80
ECH265    John Mathew       S7     ECH  9.90
FET345        Reena K       S6     FET  9.70
GMT734         Adil M       S5     GMT  9.75

This can also be done with csv package

import csv
datarows =[['ABC123','Ganesh Kumar','S8','ABC',9.8],
          ['ECH265','John Mathew','S7','ECH',9.9],
          ['FET345','Reena K','S6','FET',9.7],
          ['GMT734','Adil M','S5','GMT',9.75]]
titles=['Roll.No','Name','Semester','College','CGPA']
file=open('cstoppers.csv',"w")
csvwriter=csv.writer(file)
csvwriter.writerow(titles)
csvwriter.writerows(datarows)
print("CSV file cstoppers.csv created")
file.close()

Write a Pandas program to read a CSV file named ‘Diamond.csv’ with fields carat, cut, color, clarity, depth, and price and to print the following: ( University Question)
1. Number of rows and columns
2. First five rows

import pandas as pd
df=pd.read_csv('Diamond.csv')
print("Number of rows and Columns")
print(df.shape)

print("First Five rows")
print(df.head(5))

Temperature(°C) on different dates is stored in a CSV file as ‘Weather_data.csv’ with the fields date, temperature and humidity. ( University question)
1.Draw a plot of the weather report with date as the x-axis and temperature as the y-axis.
2. Draw a scatter plot of the weather report with date as the x-axis and humidity as the y-axis.
Give appropriate titles and labels in the plot.

import pandas as pd
import matplotlib.pyplot as plt
df=pd.read_csv('weather_data.csv')
print(df)

plt.plot(df['date'],df['temperature'])
plt.xlabel("Date")
plt.ylabel("Temperature")
plt.title("Date Vs Temp")
plt.show()

plt.scatter(df['date'],df['humidity'])
plt.xlabel("Date")
plt.ylabel("Humidity")
plt.title("Date Vs Humidity")
plt.show()

Note:You can also use plot() method of dataframe object

import pandas as pd
df=pd.read_csv('weather_data.csv')
print(df)
df.plot(x='date',y='Temperature')

df.plot(kind='scatter',x='date',y='humidity')

Write Python program to write the data given below to a CSV file named student.csv(University Question)
fields = ['Name', 'Branch', 'Year', 'CGPA']
rows = [ ['Nikhil', 'CSE', '2', '8.0'],
['Sanchit', 'CSE', '2', '9.1'],
['Aditya', 'IT', '2', '9.3'],
['Sagar', 'IT', '1', '9.5']]

import csv
fields = ['Name', 'Branch', 'Year', 'CGPA']
rows = [ ['Nikhil', 'CSE', '2', '8.0'],
['Sanchit', 'CSE', '2', '9.1'],
['Aditya', 'IT', '2', '9.3'],
['Sagar', 'IT', '1', '9.5']]
file=open('student.csv',"w")
csvwriter=csv.writer(file)
csvwriter.writerow(fields)
csvwriter.writerows(rows)
print("CSV file student.csv created")
file.close()

Consider the above student.csv file with fields Name, Branch, Year, CGPA . Write python code using pandas to
1) To find the average CGPA of the students
2) To display the details of all students having CGPA > 9
3) To display the details of all CSE students with CGPA > 9
4) To display the details of student with maximum CGPA
5) To display average CGPA of each branch

import pandas as pd 
data=pd.read_csv('student.csv') 
meangrade=data['CGPA'].mean()
print(data)
print("1.Average Grade=",meangrade)
print("2.CGPA>9",data[data['CGPA']>9])
print("3.CS CGPA>9",data[ (data['Branch']=='CSE') & (data['CGPA']>=9)])
max=data['CGPA'].max()
print("4.mac cgpa",data[data['CGPA']==max])
x=data.groupby('Branch')['CGPA'].mean()
print("5.Average cgpa of each branch",x)

Comments

Popular posts from this blog

Python For Machine Learning - CST 283 - KTU Minor Notes- Dr Binu V P

46.Classes and Objects in Python- Accessors and mutators

KTU Python for machine learning Sample Question Paper and Answer Key Dec 2020