Site icon Machine Learning Projects

Python Programs to join, merge and concatenate Dataframes – 2023

Machine Learning Projects

Hey guys, in this blog we will see Python Programs to join, merge, and concatenate Dataframes.

Joining Dataframes

Joining on Index

#Joining
# importing pandas module
import pandas as pd 
  
# Define a dictionary containing employee data 
data1 = {'Name':['Jai', 'Princi', 'Gaurav', 'Anuj'], 
        'Age':[27, 24, 22, 32]} 
    
# Define a dictionary containing employee data 
data2 = {'Address':['Allahabad', 'Kannuaj', 'Allahabad', 'Kannuaj'], 
        'Qualification':['MCA', 'Phd', 'Bcom', 'B.hons']} 
  
# Convert the dictionary into DataFrame  
df1 = pd.DataFrame(data1,index=['K0', 'K1', 'K2', 'K3'])
  
# Convert the dictionary into DataFrame  
df2 = pd.DataFrame(data2, index=['K0', 'K2', 'K3', 'K4'])
 
 
print(df1, "\n\n", df2)  

# joining dataframe
df = df1.join(df2)
df

Here we have simply used df1.join(df2) which says that join df2 with df1. Keep all the data of df1 and only matching indices from df2.

Using the “how” parameter

We will use how = ‘outer’ in order to get a union of the two dataframes.

# joining dataframe
df = df1.join(df2,how='outer')

We will use how = ‘inner’ in order to get an intersection of the two dataframes.

# joining dataframe
df = df1.join(df2,how='inner')

Using the “on” parameter

Joining dataframe using on in an argument.

# importing pandas module
import pandas as pd 
  
# Define a dictionary containing employee data 
data1 = {'Name':['Jai', 'Princi', 'Gaurav', 'Anuj'], 
        'Age':[27, 24, 22, 32],
        'Key':['K0', 'K1', 'K2', 'K3']} 
    
# Define a dictionary containing employee data 
data2 = {'Address':['Allahabad', 'Kannuaj', 'Allahabad', 'Kannuaj'], 
        'Qualification':['MCA', 'Phd', 'Bcom', 'B.hons']} 
  
# Convert the dictionary into DataFrame  
df1 = pd.DataFrame(data1)
  
# Convert the dictionary into DataFrame  
df2 = pd.DataFrame(data2, index=['K0', 'K2', 'K3', 'K4'])
 
 
print(df1, "\n\n", df2) 

# using on argument in join
df = df1.join(df2, on='Key')
 
df

Merging Dataframes

Example 1: Merging dataframe using one join key

# Merging
# importing pandas module
import pandas as pd 
 
    
# Define a dictionary containing employee data 
data1 = {'key': ['K0', 'K1', 'K2', 'K3'],
         'Name':['Jai', 'Princi', 'Gaurav', 'Anuj'], 
        'Age':[27, 24, 22, 32],} 
   
    
# Convert the dictionary into DataFrame  
df1 = pd.DataFrame(data1)
print(df1)
print('\n\n')


# Define a dictionary containing employee data 
data2 = {'key': ['K0', 'K1', 'K2', 'K3'],
         'Address':['Nagpur', 'Kanpur', 'Allahabad', 'Kannuaj'], 
        'Qualification':['Btech', 'B.A', 'Bcom', 'B.hons']} 


# Convert the dictionary into DataFrame  
df2 = pd.DataFrame(data2) 
print(df2)
print('\n\n')


df = pd.merge(df1,df2,on='key')
df

Here we are using pandas.merge(df1,df2,on=’key’) which says that joining both the dataframes df1 and df2 on the key column.

Example 2: Merging dataframe using multiple join keys

# Merging on multiple keys
# importing pandas module
import pandas as pd 
 
# Define a dictionary containing employee data 
data1 = {'key': ['K0', 'K1', 'K2', 'K3'],
         'key1': ['K0', 'K1', 'K0', 'K1'],
         'Name':['Jai', 'Princi', 'Gaurav', 'Anuj'], 
        'Age':[27, 24, 22, 32],} 
   
# Define a dictionary containing employee data 
data2 = {'key': ['K0', 'K1', 'K2', 'K3'],
         'key1': ['K0', 'K0', 'K0', 'K0'],
         'Address':['Nagpur', 'Kanpur', 'Allahabad', 'Kannuaj'], 
        'Qualification':['Btech', 'B.A', 'Bcom', 'B.hons']} 
 
# Convert the dictionary into DataFrame  
df1 = pd.DataFrame(data1)
 
# Convert the dictionary into DataFrame  
df2 = pd.DataFrame(data2) 
  
 
print(df1, "\n\n", df2) 

# merging dataframe using multiple keys
df = pd.merge(df1, df2, on=['key', 'key1'])
 
df

Types of Merges

MERGE METHODJOIN NAMEDESCRIPTION
leftLEFT OUTER JOINUse keys from the left frame only
rightRIGHT OUTER JOINUse keys from the right frame only
outerFULL OUTER JOINUse union of keys from both frames
innerINNER JOINUse intersection of keys from both frames
# importing pandas module
import pandas as pd 
 
# Define a dictionary containing employee data 
data1 = {'key': ['K0', 'K1', 'K2', 'K3'],
         'key1': ['K0', 'K1', 'K0', 'K1'],
         'Name':['Jai', 'Princi', 'Gaurav', 'Anuj'], 
        'Age':[27, 24, 22, 32],} 
   
# Define a dictionary containing employee data 
data2 = {'key': ['K0', 'K1', 'K2', 'K3'],
         'key1': ['K0', 'K0', 'K0', 'K0'],
         'Address':['Nagpur', 'Kanpur', 'Allahabad', 'Kannuaj'], 
        'Qualification':['Btech', 'B.A', 'Bcom', 'B.hons']} 
 
# Convert the dictionary into DataFrame  
df1 = pd.DataFrame(data1)
 
# Convert the dictionary into DataFrame  
df2 = pd.DataFrame(data2) 
  

Left Merge

 # using keys from left frame
df = pd.merge(df1, df2, how='left', on=['key', 'key1'])
 

Right Merge

 # using keys from right frame
df = pd.merge(df1, df2, how='right', on=['key', 'key1'])
 

Outer Merge

# getting union  of keys
df = pd.merge(df1, df2, how='outer', on=['key', 'key1'])
 

Inner Merge

# getting intersection of keys
df = pd.merge(df1, df2, how='inner', on=['key', 'key1'])

Concatenating Dataframes

# Concatenating
# importing pandas module
import pandas as pd 
 
# Define a dictionary 1 containing employee data 
data1 = {'Name':['Abhishek', 'Shubham', 'Aman', 'Vivek'], 
        'Age':[22, 24, 26, 28], 
        'Address':['Nagpur', 'Kanpur', 'Allahabad', 'Kannuaj'], 
        'Qualification':['Btech', 'B.A', 'Bcom', 'B.hons']} 

# Convert the dictionary 1 into DataFrame  
df1 = pd.DataFrame(data1,index=[0, 1, 2, 3])
print(df1)
print('\n\n')

# Define a dictionary 2 containing employee data 
data2 = {'Name':['Nishant', 'Sanchit', 'Aniket', 'Amit'], 
        'Age':[19, 15, 15, 32], 
        'Address':['Nagpur', 'Kanpur', 'Allahabad', 'Kannuaj'], 
        'Qualification':['Msc', 'MA', 'MCA', 'Phd']} 
 
# Convert the dictionary 2 into DataFrame  
df2 = pd.DataFrame(data2, index=[4, 5, 6, 7])
print(df2)
print('\n\n')


df = pd.concat([df1,df2])
df

Here we are using pd.concat([df1,df2]) which says that concatenate both the dataframes row wise (default axis=0).

Concatenating DataFrame by setting logic on axes

# importing pandas module
import pandas as pd 
 
# Define a dictionary containing employee data 
data1 = {'Name':['Jai', 'Princi', 'Gaurav', 'Anuj'], 
        'Age':[27, 24, 22, 32], 
        'Address':['Nagpur', 'Kanpur', 'Allahabad', 'Kannuaj'], 
        'Qualification':['Msc', 'MA', 'MCA', 'Phd'],
        'Mobile No': [97, 91, 58, 76]} 
   
# Define a dictionary containing employee data 
data2 = {'Name':['Gaurav', 'Anuj', 'Dhiraj', 'Hitesh'], 
        'Age':[22, 32, 12, 52], 
        'Address':['Allahabad', 'Kannuaj', 'Allahabad', 'Kannuaj'], 
        'Qualification':['MCA', 'Phd', 'Bcom', 'B.hons'],
        'Salary':[1000, 2000, 3000, 4000]} 
 
# Convert the dictionary into DataFrame  
df1 = pd.DataFrame(data1,index=[0, 1, 2, 3])
 
# Convert the dictionary into DataFrame  
df2 = pd.DataFrame(data2, index=[2, 3, 6, 7]) 
 
print(df1, "\n\n", df2) 

# applying concat with axes
# join = 'inner'
df = pd.concat([df1, df2], axis=1, join='inner')
 
df
# using a .concat for
# union of dataframe
res2 = pd.concat([df, df1], axis=1, sort=False)
 
res2

Concatenating DataFrame by ignoring indexes

# importing pandas module
import pandas as pd 
  
# Define a dictionary containing employee data 
data1 = {'Name':['Jai', 'Princi', 'Gaurav', 'Anuj'], 
        'Age':[27, 24, 22, 32], 
        'Address':['Nagpur', 'Kanpur', 'Allahabad', 'Kannuaj'], 
        'Qualification':['Msc', 'MA', 'MCA', 'Phd'],
        'Mobile No': [97, 91, 58, 76]} 
    
# Define a dictionary containing employee data 
data2 = {'Name':['Gaurav', 'Anuj', 'Dhiraj', 'Hitesh'], 
        'Age':[22, 32, 12, 52], 
        'Address':['Allahabad', 'Kannuaj', 'Allahabad', 'Kannuaj'], 
        'Qualification':['MCA', 'Phd', 'Bcom', 'B.hons'],
        'Salary':[1000, 2000, 3000, 4000]} 
  
# Convert the dictionary into DataFrame  
df = pd.DataFrame(data1,index=[0, 1, 2, 3])
  
# Convert the dictionary into DataFrame  
df1 = pd.DataFrame(data2, index=[2, 3, 6, 7]) 
  
  
print(df, "\n\n", df1) 

# using ignore_index
res = pd.concat([df, df1], ignore_index=True)
 
res

Concatenating DataFrame with group keys

# importing pandas module
import pandas as pd 
 
# Define a dictionary containing employee data 
data1 = {'Name':['Jai', 'Princi', 'Gaurav', 'Anuj'], 
        'Age':[27, 24, 22, 32], 
        'Address':['Nagpur', 'Kanpur', 'Allahabad', 'Kannuaj'], 
        'Qualification':['Msc', 'MA', 'MCA', 'Phd']} 
   
# Define a dictionary containing employee data 
data2 = {'Name':['Abhi', 'Ayushi', 'Dhiraj', 'Hitesh'], 
        'Age':[17, 14, 12, 52], 
        'Address':['Nagpur', 'Kanpur', 'Allahabad', 'Kannuaj'], 
        'Qualification':['Btech', 'B.A', 'Bcom', 'B.hons']} 
 
# Convert the dictionary into DataFrame  
df = pd.DataFrame(data1,index=[0, 1, 2, 3])
 
# Convert the dictionary into DataFrame  
df1 = pd.DataFrame(data2, index=[4, 5, 6, 7])
 
print(df, "\n\n", df1) 

# using keys 
frames = [df, df1 ]
 
res = pd.concat(frames, keys=['x', 'y'])
res

Check out our other python programming examples

Exit mobile version