Update to Perforce WIP, added and removed files.
#============================================================================
#Name : readHTML.py
#Part of : Helium
#Copyright (c) 2009 Nokia Corporation and/or its subsidiary(-ies).
#All rights reserved.
#This component and the accompanying materials are made available
#under the terms of the License "Eclipse Public License v1.0"
#which accompanies this distribution, and is available
#at the URL "http://www.eclipse.org/legal/epl-v10.html".
#
#Initial Contributors:
#Nokia Corporation - initial contribution.
#
#Contributors:
#
#Description:
#===============================================================================
# Name: readHTML.py
# Synopsis: This script creates a CSV file from the Log File Summary (.html)
import htmllib
import sys
import formatter
import re
class HTMLComponent(object):
"""
Represents a component in the log file summary
"""
def __init__(self):
self._name = ''
self._errorCount = 0
self._warningCount = 0
def getCSV(self):
return self._name + ',' + str(self._errorCount) + ',' + str(self._warningCount)
def __setName(self, n):
self._name = n
def __setErrorCount(self, n):
self._errorCount = n
def __setWarningCount(self, n):
self._warningCount = n
name = property(None, __setName)
errorCount = property(None, __setErrorCount)
warningCount = property(None, __setWarningCount)
class LogHTMLParser(htmllib.HTMLParser):
"""
Parse the scan2log HTML file into CSV
"""
def __init__(self, verbose=0):
self.anchors = {}
f = formatter.NullFormatter()
htmllib.HTMLParser.__init__(self, f, verbose)
self.state = -1 #represents column, 0 is first
self.printFlag = False
self.errorCount = 0
self.warningCount = 0
self._components = []
self.component = None
def __getComponents(self):
return self._components
components = property(__getComponents)
def handle_data(self, text):
text = text.strip()
#ignore plain text links that appear eg. [9]
p = re.compile('\[[0-9]*\]')
if not text or p.match(text):
return
#start of area to parse
if (text == 'Component'):
self.state = 0
#end of area to parse
if (text == 'By Command'):
self.state = -1
#reset column if we get lost
#if (self.state > 0 and not text.isdigit()):
# self.state = 0
if (self.state == 0):
self.component = HTMLComponent()
self.component.name = text
if (text.isdigit() and self.state == 2):
self.errorCount += int(text)
self.component.errorCount = text
if (text.isdigit() and self.state == 3):
self.warningCount += int(text)
self.component.warningCount = text
#if there are more than/ equal 5 errors or 50 warnings we print this row
if (text.isdigit() and ((self.state == 2 and (int(text) >= 5)) or
(self.state == 3 and (int(text) >= 50)))):
self.printFlag = True
if (self.state == 5):
if (self.printFlag):
self.components.append(self.component)
self.printFlag = False
if (self.state >= 0):
self.state += 1
self.state %= 6
def main():
if len(sys.argv) != 3:
print "Usage: readHTML.pl LogFile.html errors.csv"
sys.exit(1)
parser = LogHTMLParser()
inputFile = file( sys.argv[1], 'rb' )
outFile = file( sys.argv[2], 'w' )
outFile.write("Component,Errors (more than 5),Warnings (more than 50)\n")
parser.feed(inputFile.read())
for c in parser.components:
outFile.write(c.getCSV() + "\n")
outFile.write("Total," + str(parser.errorCount) + "," + str(parser.warningCount) + "\n")
inputFile.close()
outFile.close()
parser.close()
if __name__ == '__main__' :
main()