Installing OpenPyXL module:
pip install openpyxl
Work with Excel Documents via Python and OpenPyXL module
import openpyxl wb = openpyxl.load_workbook('tmp.xlsx') wb.get_sheet_names() # ['Sheet1', 'Sheet2'] sheet = wb.get_sheet_by_name('Sheet1') print(sheet) # <Worksheet "Sheet1"> print(type(sheet)) # <class 'openpyxl.worksheet.worksheet.Worksheet'> print(sheet.title) # 'Sheet1' anotherSheet = wb.get_active_sheet() print(anotherSheet) # <Worksheet "Sheet1"> print(sheet['A1']) # <Cell Sheet1.A1> print(sheet['A1'].value) # 'aaa111' c = sheet['B1'] val = 'Row ' + str(c.row) + ', Column ' + c.column + ' is ' + c.value print(val) # Row 1, Column B is b11b1b1 print('Cell ' + c.coordinate + ' is ' + c.value) # Cell B1 is b11b1b1 print(sheet.cell(row=1, column=2)) # <Cell Sheet1.B1> print(sheet.cell(row=1, column=2).value) # 'b11b1b1' for i in range(1, 4): print(i, sheet.cell(row=i, column=2).value) # (1, u'b11b1b1') # (2, u'b2222') # (3, u'b3333')
Working with xlsx file:
import openpyxl, pprint print('Opening workbook...') wb = openpyxl.load_workbook('censuspopdata.xlsx') # sheet = wb.get_sheet_by_name('Population by Census Tract') sheet = wb.worksheets[0] row_count = sheet.max_row column_count = sheet.max_column countyData = {} # Fill in countyData with each county's population and tracts. print('Reading rows...') for row in range(2, row_count): # Each row in the spreadsheet has data for one census tract. state = sheet['B' + str(row)].value county = sheet['C' + str(row)].value pop = sheet['D' + str(row)].value # Make sure the key for this state exists. countyData.setdefault(state, {}) # Make sure the key for this county in this state exists. countyData[state].setdefault(county, {'tracts': 0, 'pop': 0}) # Each row represents one census tract, so increment by one. countyData[state][county]['tracts'] += 1 # Increase the county pop by the pop in this census tract. countyData[state][county]['pop'] += int(pop) # Open a new text file and write the contents of countyData to it. print('Writing results...') resultFile = open('census2010.py', 'w') resultFile.write('allData = ' + pprint.pformat(countyData)) resultFile.close() print('Done.')
Updating xlsx file:
import openpyxl wb = openpyxl.load_workbook('data.xlsx') sheet = wb.get_active_sheet() sheet.title = 'Updated Sheet Title' wb.create_sheet(index=2, title='Sheet 2') sheet2 = wb.get_sheet_by_name('Sheet 2') sheet2['A1'] = 22 sheet2['B1'] = 33 sheet2['C1'] = int(sheet2['A1'].value) + int(sheet2['B1'].value) wb.save('data_copy.xlsx')
Loop thru rows of xlsx file:
import openpyxl wb = openpyxl.load_workbook('data_list.xlsx') sheet = wb.get_sheet_by_name('Sheet1') PRICE_UPDATES = {'Lemon': 15, 'Orange': 25} # Loop through the rows and update the prices in cells # for row_num in range(2, sheet.get_highest_row()): # for Python 3 for row_num in range(2, sheet.max_row): # Skip the first row product_name = sheet.cell(row=row_num, column=1).value if product_name in PRICE_UPDATES: sheet.cell(row=row_num, column=2).value = PRICE_UPDATES[product_name] wb.save('data_list_copy.xlsx')