Thursday, January 7, 2016

Python + xlwt + xlrd: Auto-formatting specific text in an Excel document

A short Python script I wrote for a friend which highlights (in red) all text in an Excel file which matches a certain regular expression. Tested with: Python 2.7.10, xlrd 0.9.4, xlwt 1.0.0
import xlrd
import xlwt
import re

book = xlrd.open_workbook("input.xls")
sh = book.sheet_by_index(0)

wb = xlwt.Workbook()
ws = wb.add_sheet('Sheet1')

red_font = xlwt.easyfont('color_index red')
normal_font = xlwt.easyfont('')
style = xlwt.XFStyle()
style.alignment.wrap = 1

for rx in range(sh.nrows):
 cell = sh.cell(rx, 0) # Text is contained in Column 0 (first column)
 text = cell.value

 strings = list()
 match_count = 0
 pattern = '\\bwenn\\b|\\bWenn\\b'
 indexes = [m.start() for m in re.finditer(pattern, text)]
 matches = [m.group(0) for m in re.finditer(pattern, text)]
 # Word found
 if len(indexes) > 0:
  prevIndex = 0
  for i, c in enumerate(text):
   # Are we at the start of matched text?
   if i in indexes:
    match_length = len(matches[match_count])
    strings.append(((text[prevIndex:i]), normal_font))
    strings.append((text[i:i + match_length], red_font))
    prevIndex = i + match_length
    match_count += 1
  if prevIndex > 0 and prevIndex <= (len(text) - 1):
   strings.append((text[prevIndex:len(text)], normal_font))
 # Word not found in cell
 else:
  strings.append((text, normal_font))

 # Write data
 ws.write_rich_text(rx, 0, strings, style)

first_col = ws.col(0)
first_col.width = 256 * 100 # 100 characters wide 
wb.save('output.xls')

No comments:

Post a Comment