OpenPyXL

Installing OpenPyXL module:

pip install openpyxl

Work with Excel Documents via Python and OpenPyXL module

import openpyxl
wb = openpyxl.load_workbook('tmp.xlsx')
wb.get_sheet_names() # ['Sheet1', 'Sheet2']
sheet = wb.get_sheet_by_name('Sheet1')
print(sheet) # <Worksheet "Sheet1">
print(type(sheet)) # <class 'openpyxl.worksheet.worksheet.Worksheet'>
print(sheet.title) # 'Sheet1'
anotherSheet = wb.get_active_sheet()
print(anotherSheet) # <Worksheet "Sheet1">
print(sheet['A1']) # <Cell Sheet1.A1>
print(sheet['A1'].value) # 'aaa111'
c = sheet['B1']
val = 'Row ' + str(c.row) + ', Column ' + c.column + ' is ' + c.value
print(val) # Row 1, Column B is b11b1b1
print('Cell ' + c.coordinate + ' is ' + c.value) # Cell B1 is b11b1b1

print(sheet.cell(row=1, column=2)) # <Cell Sheet1.B1>
print(sheet.cell(row=1, column=2).value) # 'b11b1b1'
for i in range(1, 4):
    print(i, sheet.cell(row=i, column=2).value)
# (1, u'b11b1b1')
# (2, u'b2222')
# (3, u'b3333')

Working with xlsx file:

import openpyxl, pprint
print('Opening workbook...')
wb = openpyxl.load_workbook('censuspopdata.xlsx')
# sheet = wb.get_sheet_by_name('Population by Census Tract')
sheet = wb.worksheets[0]
row_count = sheet.max_row
column_count = sheet.max_column
countyData = {}
# Fill in countyData with each county's population and tracts.
print('Reading rows...')
for row in range(2, row_count):
    # Each row in the spreadsheet has data for one census tract.
    state  = sheet['B' + str(row)].value
    county = sheet['C' + str(row)].value
    pop    = sheet['D' + str(row)].value

    # Make sure the key for this state exists.
    countyData.setdefault(state, {})
    # Make sure the key for this county in this state exists.
    countyData[state].setdefault(county, {'tracts': 0, 'pop': 0})

    # Each row represents one census tract, so increment by one.
    countyData[state][county]['tracts'] += 1
    # Increase the county pop by the pop in this census tract.
    countyData[state][county]['pop'] += int(pop)

# Open a new text file and write the contents of countyData to it.
print('Writing results...')
resultFile = open('census2010.py', 'w')
resultFile.write('allData = ' + pprint.pformat(countyData))
resultFile.close()
print('Done.')

Updating xlsx file:

import openpyxl
wb = openpyxl.load_workbook('data.xlsx')
sheet = wb.get_active_sheet()
sheet.title = 'Updated Sheet Title'

wb.create_sheet(index=2, title='Sheet 2')
sheet2 = wb.get_sheet_by_name('Sheet 2')
sheet2['A1'] = 22
sheet2['B1'] = 33
sheet2['C1'] = int(sheet2['A1'].value) + int(sheet2['B1'].value)
wb.save('data_copy.xlsx')

Loop thru rows of xlsx file:

import openpyxl
wb = openpyxl.load_workbook('data_list.xlsx')
sheet = wb.get_sheet_by_name('Sheet1')

PRICE_UPDATES = {'Lemon': 15, 'Orange': 25}

# Loop through the rows and update the prices in cells
# for row_num in range(2, sheet.get_highest_row()): # for Python 3
for row_num in range(2, sheet.max_row): # Skip the first row
    product_name = sheet.cell(row=row_num, column=1).value
    if product_name in PRICE_UPDATES:
        sheet.cell(row=row_num, column=2).value = PRICE_UPDATES[product_name]

wb.save('data_list_copy.xlsx')

Leave a Comment