WebKitTools/Scripts/webkitpy/style/checkers/cpp.py
changeset 0 4f2f89ce4247
equal deleted inserted replaced
-1:000000000000 0:4f2f89ce4247
       
     1 #!/usr/bin/python
       
     2 # -*- coding: utf-8 -*-
       
     3 #
       
     4 # Copyright (C) 2009 Google Inc. All rights reserved.
       
     5 # Copyright (C) 2009 Torch Mobile Inc.
       
     6 # Copyright (C) 2009 Apple Inc. All rights reserved.
       
     7 # Copyright (C) 2010 Chris Jerdonek (cjerdonek@webkit.org)
       
     8 #
       
     9 # Redistribution and use in source and binary forms, with or without
       
    10 # modification, are permitted provided that the following conditions are
       
    11 # met:
       
    12 #
       
    13 #    * Redistributions of source code must retain the above copyright
       
    14 # notice, this list of conditions and the following disclaimer.
       
    15 #    * Redistributions in binary form must reproduce the above
       
    16 # copyright notice, this list of conditions and the following disclaimer
       
    17 # in the documentation and/or other materials provided with the
       
    18 # distribution.
       
    19 #    * Neither the name of Google Inc. nor the names of its
       
    20 # contributors may be used to endorse or promote products derived from
       
    21 # this software without specific prior written permission.
       
    22 #
       
    23 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
       
    24 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
       
    25 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
       
    26 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
       
    27 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
       
    28 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
       
    29 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
       
    30 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
       
    31 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
       
    32 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
       
    33 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
       
    34 
       
    35 # This is the modified version of Google's cpplint. The original code is
       
    36 # http://google-styleguide.googlecode.com/svn/trunk/cpplint/cpplint.py
       
    37 
       
    38 """Support for check-webkit-style."""
       
    39 
       
    40 import codecs
       
    41 import math  # for log
       
    42 import os
       
    43 import os.path
       
    44 import re
       
    45 import sre_compile
       
    46 import string
       
    47 import sys
       
    48 import unicodedata
       
    49 
       
    50 
       
    51 # Headers that we consider STL headers.
       
    52 _STL_HEADERS = frozenset([
       
    53     'algobase.h', 'algorithm', 'alloc.h', 'bitset', 'deque', 'exception',
       
    54     'function.h', 'functional', 'hash_map', 'hash_map.h', 'hash_set',
       
    55     'hash_set.h', 'iterator', 'list', 'list.h', 'map', 'memory', 'pair.h',
       
    56     'pthread_alloc', 'queue', 'set', 'set.h', 'sstream', 'stack',
       
    57     'stl_alloc.h', 'stl_relops.h', 'type_traits.h',
       
    58     'utility', 'vector', 'vector.h',
       
    59     ])
       
    60 
       
    61 
       
    62 # Non-STL C++ system headers.
       
    63 _CPP_HEADERS = frozenset([
       
    64     'algo.h', 'builtinbuf.h', 'bvector.h', 'cassert', 'cctype',
       
    65     'cerrno', 'cfloat', 'ciso646', 'climits', 'clocale', 'cmath',
       
    66     'complex', 'complex.h', 'csetjmp', 'csignal', 'cstdarg', 'cstddef',
       
    67     'cstdio', 'cstdlib', 'cstring', 'ctime', 'cwchar', 'cwctype',
       
    68     'defalloc.h', 'deque.h', 'editbuf.h', 'exception', 'fstream',
       
    69     'fstream.h', 'hashtable.h', 'heap.h', 'indstream.h', 'iomanip',
       
    70     'iomanip.h', 'ios', 'iosfwd', 'iostream', 'iostream.h', 'istream.h',
       
    71     'iterator.h', 'limits', 'map.h', 'multimap.h', 'multiset.h',
       
    72     'numeric', 'ostream.h', 'parsestream.h', 'pfstream.h', 'PlotFile.h',
       
    73     'procbuf.h', 'pthread_alloc.h', 'rope', 'rope.h', 'ropeimpl.h',
       
    74     'SFile.h', 'slist', 'slist.h', 'stack.h', 'stdexcept',
       
    75     'stdiostream.h', 'streambuf.h', 'stream.h', 'strfile.h', 'string',
       
    76     'strstream', 'strstream.h', 'tempbuf.h', 'tree.h', 'typeinfo', 'valarray',
       
    77     ])
       
    78 
       
    79 
       
    80 # Assertion macros.  These are defined in base/logging.h and
       
    81 # testing/base/gunit.h.  Note that the _M versions need to come first
       
    82 # for substring matching to work.
       
    83 _CHECK_MACROS = [
       
    84     'DCHECK', 'CHECK',
       
    85     'EXPECT_TRUE_M', 'EXPECT_TRUE',
       
    86     'ASSERT_TRUE_M', 'ASSERT_TRUE',
       
    87     'EXPECT_FALSE_M', 'EXPECT_FALSE',
       
    88     'ASSERT_FALSE_M', 'ASSERT_FALSE',
       
    89     ]
       
    90 
       
    91 # Replacement macros for CHECK/DCHECK/EXPECT_TRUE/EXPECT_FALSE
       
    92 _CHECK_REPLACEMENT = dict([(m, {}) for m in _CHECK_MACROS])
       
    93 
       
    94 for op, replacement in [('==', 'EQ'), ('!=', 'NE'),
       
    95                         ('>=', 'GE'), ('>', 'GT'),
       
    96                         ('<=', 'LE'), ('<', 'LT')]:
       
    97     _CHECK_REPLACEMENT['DCHECK'][op] = 'DCHECK_%s' % replacement
       
    98     _CHECK_REPLACEMENT['CHECK'][op] = 'CHECK_%s' % replacement
       
    99     _CHECK_REPLACEMENT['EXPECT_TRUE'][op] = 'EXPECT_%s' % replacement
       
   100     _CHECK_REPLACEMENT['ASSERT_TRUE'][op] = 'ASSERT_%s' % replacement
       
   101     _CHECK_REPLACEMENT['EXPECT_TRUE_M'][op] = 'EXPECT_%s_M' % replacement
       
   102     _CHECK_REPLACEMENT['ASSERT_TRUE_M'][op] = 'ASSERT_%s_M' % replacement
       
   103 
       
   104 for op, inv_replacement in [('==', 'NE'), ('!=', 'EQ'),
       
   105                             ('>=', 'LT'), ('>', 'LE'),
       
   106                             ('<=', 'GT'), ('<', 'GE')]:
       
   107     _CHECK_REPLACEMENT['EXPECT_FALSE'][op] = 'EXPECT_%s' % inv_replacement
       
   108     _CHECK_REPLACEMENT['ASSERT_FALSE'][op] = 'ASSERT_%s' % inv_replacement
       
   109     _CHECK_REPLACEMENT['EXPECT_FALSE_M'][op] = 'EXPECT_%s_M' % inv_replacement
       
   110     _CHECK_REPLACEMENT['ASSERT_FALSE_M'][op] = 'ASSERT_%s_M' % inv_replacement
       
   111 
       
   112 
       
   113 # These constants define types of headers for use with
       
   114 # _IncludeState.check_next_include_order().
       
   115 _CONFIG_HEADER = 0
       
   116 _PRIMARY_HEADER = 1
       
   117 _OTHER_HEADER = 2
       
   118 _MOC_HEADER = 3
       
   119 
       
   120 
       
   121 # The regexp compilation caching is inlined in all regexp functions for
       
   122 # performance reasons; factoring it out into a separate function turns out
       
   123 # to be noticeably expensive.
       
   124 _regexp_compile_cache = {}
       
   125 
       
   126 
       
   127 def match(pattern, s):
       
   128     """Matches the string with the pattern, caching the compiled regexp."""
       
   129     if not pattern in _regexp_compile_cache:
       
   130         _regexp_compile_cache[pattern] = sre_compile.compile(pattern)
       
   131     return _regexp_compile_cache[pattern].match(s)
       
   132 
       
   133 
       
   134 def search(pattern, s):
       
   135     """Searches the string for the pattern, caching the compiled regexp."""
       
   136     if not pattern in _regexp_compile_cache:
       
   137         _regexp_compile_cache[pattern] = sre_compile.compile(pattern)
       
   138     return _regexp_compile_cache[pattern].search(s)
       
   139 
       
   140 
       
   141 def sub(pattern, replacement, s):
       
   142     """Substitutes occurrences of a pattern, caching the compiled regexp."""
       
   143     if not pattern in _regexp_compile_cache:
       
   144         _regexp_compile_cache[pattern] = sre_compile.compile(pattern)
       
   145     return _regexp_compile_cache[pattern].sub(replacement, s)
       
   146 
       
   147 
       
   148 def subn(pattern, replacement, s):
       
   149     """Substitutes occurrences of a pattern, caching the compiled regexp."""
       
   150     if not pattern in _regexp_compile_cache:
       
   151         _regexp_compile_cache[pattern] = sre_compile.compile(pattern)
       
   152     return _regexp_compile_cache[pattern].subn(replacement, s)
       
   153 
       
   154 
       
   155 def up_to_unmatched_closing_paren(s):
       
   156     """Splits a string into two parts up to first unmatched ')'.
       
   157 
       
   158     Args:
       
   159       s: a string which is a substring of line after '('
       
   160       (e.g., "a == (b + c))").
       
   161 
       
   162     Returns:
       
   163       A pair of strings (prefix before first unmatched ')',
       
   164       remainder of s after first unmatched ')'), e.g.,
       
   165       up_to_unmatched_closing_paren("a == (b + c)) { ")
       
   166       returns "a == (b + c)", " {".
       
   167       Returns None, None if there is no unmatched ')'
       
   168 
       
   169     """
       
   170     i = 1
       
   171     for pos, c in enumerate(s):
       
   172       if c == '(':
       
   173         i += 1
       
   174       elif c == ')':
       
   175         i -= 1
       
   176         if i == 0:
       
   177           return s[:pos], s[pos + 1:]
       
   178     return None, None
       
   179 
       
   180 class _IncludeState(dict):
       
   181     """Tracks line numbers for includes, and the order in which includes appear.
       
   182 
       
   183     As a dict, an _IncludeState object serves as a mapping between include
       
   184     filename and line number on which that file was included.
       
   185 
       
   186     Call check_next_include_order() once for each header in the file, passing
       
   187     in the type constants defined above. Calls in an illegal order will
       
   188     raise an _IncludeError with an appropriate error message.
       
   189 
       
   190     """
       
   191     # self._section will move monotonically through this set. If it ever
       
   192     # needs to move backwards, check_next_include_order will raise an error.
       
   193     _INITIAL_SECTION = 0
       
   194     _CONFIG_SECTION = 1
       
   195     _PRIMARY_SECTION = 2
       
   196     _OTHER_SECTION = 3
       
   197 
       
   198     _TYPE_NAMES = {
       
   199         _CONFIG_HEADER: 'WebCore config.h',
       
   200         _PRIMARY_HEADER: 'header this file implements',
       
   201         _OTHER_HEADER: 'other header',
       
   202         _MOC_HEADER: 'moc file',
       
   203         }
       
   204     _SECTION_NAMES = {
       
   205         _INITIAL_SECTION: "... nothing.",
       
   206         _CONFIG_SECTION: "WebCore config.h.",
       
   207         _PRIMARY_SECTION: 'a header this file implements.',
       
   208         _OTHER_SECTION: 'other header.',
       
   209         }
       
   210 
       
   211     def __init__(self):
       
   212         dict.__init__(self)
       
   213         self._section = self._INITIAL_SECTION
       
   214         self._visited_primary_section = False
       
   215         self.header_types = dict();
       
   216 
       
   217     def visited_primary_section(self):
       
   218         return self._visited_primary_section
       
   219 
       
   220     def check_next_include_order(self, header_type, file_is_header):
       
   221         """Returns a non-empty error message if the next header is out of order.
       
   222 
       
   223         This function also updates the internal state to be ready to check
       
   224         the next include.
       
   225 
       
   226         Args:
       
   227           header_type: One of the _XXX_HEADER constants defined above.
       
   228           file_is_header: Whether the file that owns this _IncludeState is itself a header
       
   229 
       
   230         Returns:
       
   231           The empty string if the header is in the right order, or an
       
   232           error message describing what's wrong.
       
   233 
       
   234         """
       
   235         if header_type == _CONFIG_HEADER and file_is_header:
       
   236             return 'Header file should not contain WebCore config.h.'
       
   237         if header_type == _PRIMARY_HEADER and file_is_header:
       
   238             return 'Header file should not contain itself.'
       
   239         if header_type == _MOC_HEADER:
       
   240             return ''
       
   241 
       
   242         error_message = ''
       
   243         if self._section != self._OTHER_SECTION:
       
   244             before_error_message = ('Found %s before %s' %
       
   245                                     (self._TYPE_NAMES[header_type],
       
   246                                      self._SECTION_NAMES[self._section + 1]))
       
   247         after_error_message = ('Found %s after %s' %
       
   248                                 (self._TYPE_NAMES[header_type],
       
   249                                  self._SECTION_NAMES[self._section]))
       
   250 
       
   251         if header_type == _CONFIG_HEADER:
       
   252             if self._section >= self._CONFIG_SECTION:
       
   253                 error_message = after_error_message
       
   254             self._section = self._CONFIG_SECTION
       
   255         elif header_type == _PRIMARY_HEADER:
       
   256             if self._section >= self._PRIMARY_SECTION:
       
   257                 error_message = after_error_message
       
   258             elif self._section < self._CONFIG_SECTION:
       
   259                 error_message = before_error_message
       
   260             self._section = self._PRIMARY_SECTION
       
   261             self._visited_primary_section = True
       
   262         else:
       
   263             assert header_type == _OTHER_HEADER
       
   264             if not file_is_header and self._section < self._PRIMARY_SECTION:
       
   265                 error_message = before_error_message
       
   266             self._section = self._OTHER_SECTION
       
   267 
       
   268         return error_message
       
   269 
       
   270 
       
   271 class _FunctionState(object):
       
   272     """Tracks current function name and the number of lines in its body.
       
   273 
       
   274     Attributes:
       
   275       min_confidence: The minimum confidence level to use while checking style.
       
   276 
       
   277     """
       
   278 
       
   279     _NORMAL_TRIGGER = 250  # for --v=0, 500 for --v=1, etc.
       
   280     _TEST_TRIGGER = 400    # about 50% more than _NORMAL_TRIGGER.
       
   281 
       
   282     def __init__(self, min_confidence):
       
   283         self.min_confidence = min_confidence
       
   284         self.current_function = ''
       
   285         self.in_a_function = False
       
   286         self.lines_in_function = 0
       
   287 
       
   288     def begin(self, function_name):
       
   289         """Start analyzing function body.
       
   290 
       
   291         Args:
       
   292             function_name: The name of the function being tracked.
       
   293         """
       
   294         self.in_a_function = True
       
   295         self.lines_in_function = 0
       
   296         self.current_function = function_name
       
   297 
       
   298     def count(self):
       
   299         """Count line in current function body."""
       
   300         if self.in_a_function:
       
   301             self.lines_in_function += 1
       
   302 
       
   303     def check(self, error, line_number):
       
   304         """Report if too many lines in function body.
       
   305 
       
   306         Args:
       
   307           error: The function to call with any errors found.
       
   308           line_number: The number of the line to check.
       
   309         """
       
   310         if match(r'T(EST|est)', self.current_function):
       
   311             base_trigger = self._TEST_TRIGGER
       
   312         else:
       
   313             base_trigger = self._NORMAL_TRIGGER
       
   314         trigger = base_trigger * 2 ** self.min_confidence
       
   315 
       
   316         if self.lines_in_function > trigger:
       
   317             error_level = int(math.log(self.lines_in_function / base_trigger, 2))
       
   318             # 50 => 0, 100 => 1, 200 => 2, 400 => 3, 800 => 4, 1600 => 5, ...
       
   319             if error_level > 5:
       
   320                 error_level = 5
       
   321             error(line_number, 'readability/fn_size', error_level,
       
   322                   'Small and focused functions are preferred:'
       
   323                   ' %s has %d non-comment lines'
       
   324                   ' (error triggered by exceeding %d lines).'  % (
       
   325                       self.current_function, self.lines_in_function, trigger))
       
   326 
       
   327     def end(self):
       
   328         """Stop analizing function body."""
       
   329         self.in_a_function = False
       
   330 
       
   331 
       
   332 class _IncludeError(Exception):
       
   333     """Indicates a problem with the include order in a file."""
       
   334     pass
       
   335 
       
   336 
       
   337 def is_c_or_objective_c(file_extension):
       
   338    """Return whether the file extension corresponds to C or Objective-C.
       
   339 
       
   340    Args:
       
   341      file_extension: The file extension without the leading dot.
       
   342 
       
   343    """
       
   344    return file_extension in ['c', 'm']
       
   345 
       
   346 
       
   347 class FileInfo:
       
   348     """Provides utility functions for filenames.
       
   349 
       
   350     FileInfo provides easy access to the components of a file's path
       
   351     relative to the project root.
       
   352     """
       
   353 
       
   354     def __init__(self, filename):
       
   355         self._filename = filename
       
   356 
       
   357     def full_name(self):
       
   358         """Make Windows paths like Unix."""
       
   359         return os.path.abspath(self._filename).replace('\\', '/')
       
   360 
       
   361     def repository_name(self):
       
   362         """Full name after removing the local path to the repository.
       
   363 
       
   364         If we have a real absolute path name here we can try to do something smart:
       
   365         detecting the root of the checkout and truncating /path/to/checkout from
       
   366         the name so that we get header guards that don't include things like
       
   367         "C:\Documents and Settings\..." or "/home/username/..." in them and thus
       
   368         people on different computers who have checked the source out to different
       
   369         locations won't see bogus errors.
       
   370         """
       
   371         fullname = self.full_name()
       
   372 
       
   373         if os.path.exists(fullname):
       
   374             project_dir = os.path.dirname(fullname)
       
   375 
       
   376             if os.path.exists(os.path.join(project_dir, ".svn")):
       
   377                 # If there's a .svn file in the current directory, we
       
   378                 # recursively look up the directory tree for the top
       
   379                 # of the SVN checkout
       
   380                 root_dir = project_dir
       
   381                 one_up_dir = os.path.dirname(root_dir)
       
   382                 while os.path.exists(os.path.join(one_up_dir, ".svn")):
       
   383                     root_dir = os.path.dirname(root_dir)
       
   384                     one_up_dir = os.path.dirname(one_up_dir)
       
   385 
       
   386                 prefix = os.path.commonprefix([root_dir, project_dir])
       
   387                 return fullname[len(prefix) + 1:]
       
   388 
       
   389             # Not SVN? Try to find a git top level directory by
       
   390             # searching up from the current path.
       
   391             root_dir = os.path.dirname(fullname)
       
   392             while (root_dir != os.path.dirname(root_dir)
       
   393                    and not os.path.exists(os.path.join(root_dir, ".git"))):
       
   394                 root_dir = os.path.dirname(root_dir)
       
   395                 if os.path.exists(os.path.join(root_dir, ".git")):
       
   396                     prefix = os.path.commonprefix([root_dir, project_dir])
       
   397                     return fullname[len(prefix) + 1:]
       
   398 
       
   399         # Don't know what to do; header guard warnings may be wrong...
       
   400         return fullname
       
   401 
       
   402     def split(self):
       
   403         """Splits the file into the directory, basename, and extension.
       
   404 
       
   405         For 'chrome/browser/browser.cpp', Split() would
       
   406         return ('chrome/browser', 'browser', '.cpp')
       
   407 
       
   408         Returns:
       
   409           A tuple of (directory, basename, extension).
       
   410         """
       
   411 
       
   412         googlename = self.repository_name()
       
   413         project, rest = os.path.split(googlename)
       
   414         return (project,) + os.path.splitext(rest)
       
   415 
       
   416     def base_name(self):
       
   417         """File base name - text after the final slash, before the final period."""
       
   418         return self.split()[1]
       
   419 
       
   420     def extension(self):
       
   421         """File extension - text following the final period."""
       
   422         return self.split()[2]
       
   423 
       
   424     def no_extension(self):
       
   425         """File has no source file extension."""
       
   426         return '/'.join(self.split()[0:2])
       
   427 
       
   428     def is_source(self):
       
   429         """File has a source file extension."""
       
   430         return self.extension()[1:] in ('c', 'cc', 'cpp', 'cxx')
       
   431 
       
   432 
       
   433 # Matches standard C++ escape esequences per 2.13.2.3 of the C++ standard.
       
   434 _RE_PATTERN_CLEANSE_LINE_ESCAPES = re.compile(
       
   435     r'\\([abfnrtv?"\\\']|\d+|x[0-9a-fA-F]+)')
       
   436 # Matches strings.  Escape codes should already be removed by ESCAPES.
       
   437 _RE_PATTERN_CLEANSE_LINE_DOUBLE_QUOTES = re.compile(r'"[^"]*"')
       
   438 # Matches characters.  Escape codes should already be removed by ESCAPES.
       
   439 _RE_PATTERN_CLEANSE_LINE_SINGLE_QUOTES = re.compile(r"'.'")
       
   440 # Matches multi-line C++ comments.
       
   441 # This RE is a little bit more complicated than one might expect, because we
       
   442 # have to take care of space removals tools so we can handle comments inside
       
   443 # statements better.
       
   444 # The current rule is: We only clear spaces from both sides when we're at the
       
   445 # end of the line. Otherwise, we try to remove spaces from the right side,
       
   446 # if this doesn't work we try on left side but only if there's a non-character
       
   447 # on the right.
       
   448 _RE_PATTERN_CLEANSE_LINE_C_COMMENTS = re.compile(
       
   449     r"""(\s*/\*.*\*/\s*$|
       
   450             /\*.*\*/\s+|
       
   451          \s+/\*.*\*/(?=\W)|
       
   452             /\*.*\*/)""", re.VERBOSE)
       
   453 
       
   454 
       
   455 def is_cpp_string(line):
       
   456     """Does line terminate so, that the next symbol is in string constant.
       
   457 
       
   458     This function does not consider single-line nor multi-line comments.
       
   459 
       
   460     Args:
       
   461       line: is a partial line of code starting from the 0..n.
       
   462 
       
   463     Returns:
       
   464       True, if next character appended to 'line' is inside a
       
   465       string constant.
       
   466     """
       
   467 
       
   468     line = line.replace(r'\\', 'XX')  # after this, \\" does not match to \"
       
   469     return ((line.count('"') - line.count(r'\"') - line.count("'\"'")) & 1) == 1
       
   470 
       
   471 
       
   472 def find_next_multi_line_comment_start(lines, line_index):
       
   473     """Find the beginning marker for a multiline comment."""
       
   474     while line_index < len(lines):
       
   475         if lines[line_index].strip().startswith('/*'):
       
   476             # Only return this marker if the comment goes beyond this line
       
   477             if lines[line_index].strip().find('*/', 2) < 0:
       
   478                 return line_index
       
   479         line_index += 1
       
   480     return len(lines)
       
   481 
       
   482 
       
   483 def find_next_multi_line_comment_end(lines, line_index):
       
   484     """We are inside a comment, find the end marker."""
       
   485     while line_index < len(lines):
       
   486         if lines[line_index].strip().endswith('*/'):
       
   487             return line_index
       
   488         line_index += 1
       
   489     return len(lines)
       
   490 
       
   491 
       
   492 def remove_multi_line_comments_from_range(lines, begin, end):
       
   493     """Clears a range of lines for multi-line comments."""
       
   494     # Having // dummy comments makes the lines non-empty, so we will not get
       
   495     # unnecessary blank line warnings later in the code.
       
   496     for i in range(begin, end):
       
   497         lines[i] = '// dummy'
       
   498 
       
   499 
       
   500 def remove_multi_line_comments(lines, error):
       
   501     """Removes multiline (c-style) comments from lines."""
       
   502     line_index = 0
       
   503     while line_index < len(lines):
       
   504         line_index_begin = find_next_multi_line_comment_start(lines, line_index)
       
   505         if line_index_begin >= len(lines):
       
   506             return
       
   507         line_index_end = find_next_multi_line_comment_end(lines, line_index_begin)
       
   508         if line_index_end >= len(lines):
       
   509             error(line_index_begin + 1, 'readability/multiline_comment', 5,
       
   510                   'Could not find end of multi-line comment')
       
   511             return
       
   512         remove_multi_line_comments_from_range(lines, line_index_begin, line_index_end + 1)
       
   513         line_index = line_index_end + 1
       
   514 
       
   515 
       
   516 def cleanse_comments(line):
       
   517     """Removes //-comments and single-line C-style /* */ comments.
       
   518 
       
   519     Args:
       
   520       line: A line of C++ source.
       
   521 
       
   522     Returns:
       
   523       The line with single-line comments removed.
       
   524     """
       
   525     comment_position = line.find('//')
       
   526     if comment_position != -1 and not is_cpp_string(line[:comment_position]):
       
   527         line = line[:comment_position]
       
   528     # get rid of /* ... */
       
   529     return _RE_PATTERN_CLEANSE_LINE_C_COMMENTS.sub('', line)
       
   530 
       
   531 
       
   532 class CleansedLines(object):
       
   533     """Holds 3 copies of all lines with different preprocessing applied to them.
       
   534 
       
   535     1) elided member contains lines without strings and comments,
       
   536     2) lines member contains lines without comments, and
       
   537     3) raw member contains all the lines without processing.
       
   538     All these three members are of <type 'list'>, and of the same length.
       
   539     """
       
   540 
       
   541     def __init__(self, lines):
       
   542         self.elided = []
       
   543         self.lines = []
       
   544         self.raw_lines = lines
       
   545         self._num_lines = len(lines)
       
   546         for line_number in range(len(lines)):
       
   547             self.lines.append(cleanse_comments(lines[line_number]))
       
   548             elided = self.collapse_strings(lines[line_number])
       
   549             self.elided.append(cleanse_comments(elided))
       
   550 
       
   551     def num_lines(self):
       
   552         """Returns the number of lines represented."""
       
   553         return self._num_lines
       
   554 
       
   555     @staticmethod
       
   556     def collapse_strings(elided):
       
   557         """Collapses strings and chars on a line to simple "" or '' blocks.
       
   558 
       
   559         We nix strings first so we're not fooled by text like '"http://"'
       
   560 
       
   561         Args:
       
   562           elided: The line being processed.
       
   563 
       
   564         Returns:
       
   565           The line with collapsed strings.
       
   566         """
       
   567         if not _RE_PATTERN_INCLUDE.match(elided):
       
   568             # Remove escaped characters first to make quote/single quote collapsing
       
   569             # basic.  Things that look like escaped characters shouldn't occur
       
   570             # outside of strings and chars.
       
   571             elided = _RE_PATTERN_CLEANSE_LINE_ESCAPES.sub('', elided)
       
   572             elided = _RE_PATTERN_CLEANSE_LINE_SINGLE_QUOTES.sub("''", elided)
       
   573             elided = _RE_PATTERN_CLEANSE_LINE_DOUBLE_QUOTES.sub('""', elided)
       
   574         return elided
       
   575 
       
   576 
       
   577 def close_expression(clean_lines, line_number, pos):
       
   578     """If input points to ( or { or [, finds the position that closes it.
       
   579 
       
   580     If lines[line_number][pos] points to a '(' or '{' or '[', finds the the
       
   581     line_number/pos that correspond to the closing of the expression.
       
   582 
       
   583     Args:
       
   584       clean_lines: A CleansedLines instance containing the file.
       
   585       line_number: The number of the line to check.
       
   586       pos: A position on the line.
       
   587 
       
   588     Returns:
       
   589       A tuple (line, line_number, pos) pointer *past* the closing brace, or
       
   590       (line, len(lines), -1) if we never find a close.  Note we ignore
       
   591       strings and comments when matching; and the line we return is the
       
   592       'cleansed' line at line_number.
       
   593     """
       
   594 
       
   595     line = clean_lines.elided[line_number]
       
   596     start_character = line[pos]
       
   597     if start_character not in '({[':
       
   598         return (line, clean_lines.num_lines(), -1)
       
   599     if start_character == '(':
       
   600         end_character = ')'
       
   601     if start_character == '[':
       
   602         end_character = ']'
       
   603     if start_character == '{':
       
   604         end_character = '}'
       
   605 
       
   606     num_open = line.count(start_character) - line.count(end_character)
       
   607     while line_number < clean_lines.num_lines() and num_open > 0:
       
   608         line_number += 1
       
   609         line = clean_lines.elided[line_number]
       
   610         num_open += line.count(start_character) - line.count(end_character)
       
   611     # OK, now find the end_character that actually got us back to even
       
   612     endpos = len(line)
       
   613     while num_open >= 0:
       
   614         endpos = line.rfind(')', 0, endpos)
       
   615         num_open -= 1                 # chopped off another )
       
   616     return (line, line_number, endpos + 1)
       
   617 
       
   618 
       
   619 def check_for_copyright(lines, error):
       
   620     """Logs an error if no Copyright message appears at the top of the file."""
       
   621 
       
   622     # We'll say it should occur by line 10. Don't forget there's a
       
   623     # dummy line at the front.
       
   624     for line in xrange(1, min(len(lines), 11)):
       
   625         if re.search(r'Copyright', lines[line], re.I):
       
   626             break
       
   627     else:                       # means no copyright line was found
       
   628         error(0, 'legal/copyright', 5,
       
   629               'No copyright message found.  '
       
   630               'You should have a line: "Copyright [year] <Copyright Owner>"')
       
   631 
       
   632 
       
   633 def get_header_guard_cpp_variable(filename):
       
   634     """Returns the CPP variable that should be used as a header guard.
       
   635 
       
   636     Args:
       
   637       filename: The name of a C++ header file.
       
   638 
       
   639     Returns:
       
   640       The CPP variable that should be used as a header guard in the
       
   641       named file.
       
   642 
       
   643     """
       
   644 
       
   645     # Restores original filename in case that style checker is invoked from Emacs's
       
   646     # flymake.
       
   647     filename = re.sub(r'_flymake\.h$', '.h', filename)
       
   648 
       
   649     return sub(r'[-.\s]', '_', os.path.basename(filename))
       
   650 
       
   651 
       
   652 def check_for_header_guard(filename, lines, error):
       
   653     """Checks that the file contains a header guard.
       
   654 
       
   655     Logs an error if no #ifndef header guard is present.  For other
       
   656     headers, checks that the full pathname is used.
       
   657 
       
   658     Args:
       
   659       filename: The name of the C++ header file.
       
   660       lines: An array of strings, each representing a line of the file.
       
   661       error: The function to call with any errors found.
       
   662     """
       
   663 
       
   664     cppvar = get_header_guard_cpp_variable(filename)
       
   665 
       
   666     ifndef = None
       
   667     ifndef_line_number = 0
       
   668     define = None
       
   669     for line_number, line in enumerate(lines):
       
   670         line_split = line.split()
       
   671         if len(line_split) >= 2:
       
   672             # find the first occurrence of #ifndef and #define, save arg
       
   673             if not ifndef and line_split[0] == '#ifndef':
       
   674                 # set ifndef to the header guard presented on the #ifndef line.
       
   675                 ifndef = line_split[1]
       
   676                 ifndef_line_number = line_number
       
   677             if not define and line_split[0] == '#define':
       
   678                 define = line_split[1]
       
   679             if define and ifndef:
       
   680                 break
       
   681 
       
   682     if not ifndef or not define or ifndef != define:
       
   683         error(0, 'build/header_guard', 5,
       
   684               'No #ifndef header guard found, suggested CPP variable is: %s' %
       
   685               cppvar)
       
   686         return
       
   687 
       
   688     # The guard should be File_h.
       
   689     if ifndef != cppvar:
       
   690         error(ifndef_line_number, 'build/header_guard', 5,
       
   691               '#ifndef header guard has wrong style, please use: %s' % cppvar)
       
   692 
       
   693 
       
   694 def check_for_unicode_replacement_characters(lines, error):
       
   695     """Logs an error for each line containing Unicode replacement characters.
       
   696 
       
   697     These indicate that either the file contained invalid UTF-8 (likely)
       
   698     or Unicode replacement characters (which it shouldn't).  Note that
       
   699     it's possible for this to throw off line numbering if the invalid
       
   700     UTF-8 occurred adjacent to a newline.
       
   701 
       
   702     Args:
       
   703       lines: An array of strings, each representing a line of the file.
       
   704       error: The function to call with any errors found.
       
   705     """
       
   706     for line_number, line in enumerate(lines):
       
   707         if u'\ufffd' in line:
       
   708             error(line_number, 'readability/utf8', 5,
       
   709                   'Line contains invalid UTF-8 (or Unicode replacement character).')
       
   710 
       
   711 
       
   712 def check_for_new_line_at_eof(lines, error):
       
   713     """Logs an error if there is no newline char at the end of the file.
       
   714 
       
   715     Args:
       
   716       lines: An array of strings, each representing a line of the file.
       
   717       error: The function to call with any errors found.
       
   718     """
       
   719 
       
   720     # The array lines() was created by adding two newlines to the
       
   721     # original file (go figure), then splitting on \n.
       
   722     # To verify that the file ends in \n, we just have to make sure the
       
   723     # last-but-two element of lines() exists and is empty.
       
   724     if len(lines) < 3 or lines[-2]:
       
   725         error(len(lines) - 2, 'whitespace/ending_newline', 5,
       
   726               'Could not find a newline character at the end of the file.')
       
   727 
       
   728 
       
   729 def check_for_multiline_comments_and_strings(clean_lines, line_number, error):
       
   730     """Logs an error if we see /* ... */ or "..." that extend past one line.
       
   731 
       
   732     /* ... */ comments are legit inside macros, for one line.
       
   733     Otherwise, we prefer // comments, so it's ok to warn about the
       
   734     other.  Likewise, it's ok for strings to extend across multiple
       
   735     lines, as long as a line continuation character (backslash)
       
   736     terminates each line. Although not currently prohibited by the C++
       
   737     style guide, it's ugly and unnecessary. We don't do well with either
       
   738     in this lint program, so we warn about both.
       
   739 
       
   740     Args:
       
   741       clean_lines: A CleansedLines instance containing the file.
       
   742       line_number: The number of the line to check.
       
   743       error: The function to call with any errors found.
       
   744     """
       
   745     line = clean_lines.elided[line_number]
       
   746 
       
   747     # Remove all \\ (escaped backslashes) from the line. They are OK, and the
       
   748     # second (escaped) slash may trigger later \" detection erroneously.
       
   749     line = line.replace('\\\\', '')
       
   750 
       
   751     if line.count('/*') > line.count('*/'):
       
   752         error(line_number, 'readability/multiline_comment', 5,
       
   753               'Complex multi-line /*...*/-style comment found. '
       
   754               'Lint may give bogus warnings.  '
       
   755               'Consider replacing these with //-style comments, '
       
   756               'with #if 0...#endif, '
       
   757               'or with more clearly structured multi-line comments.')
       
   758 
       
   759     if (line.count('"') - line.count('\\"')) % 2:
       
   760         error(line_number, 'readability/multiline_string', 5,
       
   761               'Multi-line string ("...") found.  This lint script doesn\'t '
       
   762               'do well with such strings, and may give bogus warnings.  They\'re '
       
   763               'ugly and unnecessary, and you should use concatenation instead".')
       
   764 
       
   765 
       
   766 _THREADING_LIST = (
       
   767     ('asctime(', 'asctime_r('),
       
   768     ('ctime(', 'ctime_r('),
       
   769     ('getgrgid(', 'getgrgid_r('),
       
   770     ('getgrnam(', 'getgrnam_r('),
       
   771     ('getlogin(', 'getlogin_r('),
       
   772     ('getpwnam(', 'getpwnam_r('),
       
   773     ('getpwuid(', 'getpwuid_r('),
       
   774     ('gmtime(', 'gmtime_r('),
       
   775     ('localtime(', 'localtime_r('),
       
   776     ('rand(', 'rand_r('),
       
   777     ('readdir(', 'readdir_r('),
       
   778     ('strtok(', 'strtok_r('),
       
   779     ('ttyname(', 'ttyname_r('),
       
   780     )
       
   781 
       
   782 
       
   783 def check_posix_threading(clean_lines, line_number, error):
       
   784     """Checks for calls to thread-unsafe functions.
       
   785 
       
   786     Much code has been originally written without consideration of
       
   787     multi-threading. Also, engineers are relying on their old experience;
       
   788     they have learned posix before threading extensions were added. These
       
   789     tests guide the engineers to use thread-safe functions (when using
       
   790     posix directly).
       
   791 
       
   792     Args:
       
   793       clean_lines: A CleansedLines instance containing the file.
       
   794       line_number: The number of the line to check.
       
   795       error: The function to call with any errors found.
       
   796     """
       
   797     line = clean_lines.elided[line_number]
       
   798     for single_thread_function, multithread_safe_function in _THREADING_LIST:
       
   799         index = line.find(single_thread_function)
       
   800         # Comparisons made explicit for clarity -- pylint: disable-msg=C6403
       
   801         if index >= 0 and (index == 0 or (not line[index - 1].isalnum()
       
   802                                           and line[index - 1] not in ('_', '.', '>'))):
       
   803             error(line_number, 'runtime/threadsafe_fn', 2,
       
   804                   'Consider using ' + multithread_safe_function +
       
   805                   '...) instead of ' + single_thread_function +
       
   806                   '...) for improved thread safety.')
       
   807 
       
   808 
       
   809 # Matches invalid increment: *count++, which moves pointer instead of
       
   810 # incrementing a value.
       
   811 _RE_PATTERN_INVALID_INCREMENT = re.compile(
       
   812     r'^\s*\*\w+(\+\+|--);')
       
   813 
       
   814 
       
   815 def check_invalid_increment(clean_lines, line_number, error):
       
   816     """Checks for invalid increment *count++.
       
   817 
       
   818     For example following function:
       
   819     void increment_counter(int* count) {
       
   820         *count++;
       
   821     }
       
   822     is invalid, because it effectively does count++, moving pointer, and should
       
   823     be replaced with ++*count, (*count)++ or *count += 1.
       
   824 
       
   825     Args:
       
   826       clean_lines: A CleansedLines instance containing the file.
       
   827       line_number: The number of the line to check.
       
   828       error: The function to call with any errors found.
       
   829     """
       
   830     line = clean_lines.elided[line_number]
       
   831     if _RE_PATTERN_INVALID_INCREMENT.match(line):
       
   832         error(line_number, 'runtime/invalid_increment', 5,
       
   833               'Changing pointer instead of value (or unused value of operator*).')
       
   834 
       
   835 
       
   836 class _ClassInfo(object):
       
   837     """Stores information about a class."""
       
   838 
       
   839     def __init__(self, name, line_number):
       
   840         self.name = name
       
   841         self.line_number = line_number
       
   842         self.seen_open_brace = False
       
   843         self.is_derived = False
       
   844         self.virtual_method_line_number = None
       
   845         self.has_virtual_destructor = False
       
   846         self.brace_depth = 0
       
   847 
       
   848 
       
   849 class _ClassState(object):
       
   850     """Holds the current state of the parse relating to class declarations.
       
   851 
       
   852     It maintains a stack of _ClassInfos representing the parser's guess
       
   853     as to the current nesting of class declarations. The innermost class
       
   854     is at the top (back) of the stack. Typically, the stack will either
       
   855     be empty or have exactly one entry.
       
   856     """
       
   857 
       
   858     def __init__(self):
       
   859         self.classinfo_stack = []
       
   860 
       
   861     def check_finished(self, error):
       
   862         """Checks that all classes have been completely parsed.
       
   863 
       
   864         Call this when all lines in a file have been processed.
       
   865         Args:
       
   866           error: The function to call with any errors found.
       
   867         """
       
   868         if self.classinfo_stack:
       
   869             # Note: This test can result in false positives if #ifdef constructs
       
   870             # get in the way of brace matching. See the testBuildClass test in
       
   871             # cpp_style_unittest.py for an example of this.
       
   872             error(self.classinfo_stack[0].line_number, 'build/class', 5,
       
   873                   'Failed to find complete declaration of class %s' %
       
   874                   self.classinfo_stack[0].name)
       
   875 
       
   876 
       
   877 class _FileState(object):
       
   878     def __init__(self):
       
   879         self._did_inside_namespace_indent_warning = False
       
   880 
       
   881     def set_did_inside_namespace_indent_warning(self):
       
   882         self._did_inside_namespace_indent_warning = True
       
   883 
       
   884     def did_inside_namespace_indent_warning(self):
       
   885         return self._did_inside_namespace_indent_warning
       
   886 
       
   887 def check_for_non_standard_constructs(clean_lines, line_number,
       
   888                                       class_state, error):
       
   889     """Logs an error if we see certain non-ANSI constructs ignored by gcc-2.
       
   890 
       
   891     Complain about several constructs which gcc-2 accepts, but which are
       
   892     not standard C++.  Warning about these in lint is one way to ease the
       
   893     transition to new compilers.
       
   894     - put storage class first (e.g. "static const" instead of "const static").
       
   895     - "%lld" instead of %qd" in printf-type functions.
       
   896     - "%1$d" is non-standard in printf-type functions.
       
   897     - "\%" is an undefined character escape sequence.
       
   898     - text after #endif is not allowed.
       
   899     - invalid inner-style forward declaration.
       
   900     - >? and <? operators, and their >?= and <?= cousins.
       
   901     - classes with virtual methods need virtual destructors (compiler warning
       
   902         available, but not turned on yet.)
       
   903 
       
   904     Additionally, check for constructor/destructor style violations as it
       
   905     is very convenient to do so while checking for gcc-2 compliance.
       
   906 
       
   907     Args:
       
   908       clean_lines: A CleansedLines instance containing the file.
       
   909       line_number: The number of the line to check.
       
   910       class_state: A _ClassState instance which maintains information about
       
   911                    the current stack of nested class declarations being parsed.
       
   912       error: A callable to which errors are reported, which takes parameters:
       
   913              line number, error level, and message
       
   914     """
       
   915 
       
   916     # Remove comments from the line, but leave in strings for now.
       
   917     line = clean_lines.lines[line_number]
       
   918 
       
   919     if search(r'printf\s*\(.*".*%[-+ ]?\d*q', line):
       
   920         error(line_number, 'runtime/printf_format', 3,
       
   921               '%q in format strings is deprecated.  Use %ll instead.')
       
   922 
       
   923     if search(r'printf\s*\(.*".*%\d+\$', line):
       
   924         error(line_number, 'runtime/printf_format', 2,
       
   925               '%N$ formats are unconventional.  Try rewriting to avoid them.')
       
   926 
       
   927     # Remove escaped backslashes before looking for undefined escapes.
       
   928     line = line.replace('\\\\', '')
       
   929 
       
   930     if search(r'("|\').*\\(%|\[|\(|{)', line):
       
   931         error(line_number, 'build/printf_format', 3,
       
   932               '%, [, (, and { are undefined character escapes.  Unescape them.')
       
   933 
       
   934     # For the rest, work with both comments and strings removed.
       
   935     line = clean_lines.elided[line_number]
       
   936 
       
   937     if search(r'\b(const|volatile|void|char|short|int|long'
       
   938               r'|float|double|signed|unsigned'
       
   939               r'|schar|u?int8|u?int16|u?int32|u?int64)'
       
   940               r'\s+(auto|register|static|extern|typedef)\b',
       
   941               line):
       
   942         error(line_number, 'build/storage_class', 5,
       
   943               'Storage class (static, extern, typedef, etc) should be first.')
       
   944 
       
   945     if match(r'\s*#\s*endif\s*[^/\s]+', line):
       
   946         error(line_number, 'build/endif_comment', 5,
       
   947               'Uncommented text after #endif is non-standard.  Use a comment.')
       
   948 
       
   949     if match(r'\s*class\s+(\w+\s*::\s*)+\w+\s*;', line):
       
   950         error(line_number, 'build/forward_decl', 5,
       
   951               'Inner-style forward declarations are invalid.  Remove this line.')
       
   952 
       
   953     if search(r'(\w+|[+-]?\d+(\.\d*)?)\s*(<|>)\?=?\s*(\w+|[+-]?\d+)(\.\d*)?', line):
       
   954         error(line_number, 'build/deprecated', 3,
       
   955               '>? and <? (max and min) operators are non-standard and deprecated.')
       
   956 
       
   957     # Track class entry and exit, and attempt to find cases within the
       
   958     # class declaration that don't meet the C++ style
       
   959     # guidelines. Tracking is very dependent on the code matching Google
       
   960     # style guidelines, but it seems to perform well enough in testing
       
   961     # to be a worthwhile addition to the checks.
       
   962     classinfo_stack = class_state.classinfo_stack
       
   963     # Look for a class declaration
       
   964     class_decl_match = match(
       
   965         r'\s*(template\s*<[\w\s<>,:]*>\s*)?(class|struct)\s+(\w+(::\w+)*)', line)
       
   966     if class_decl_match:
       
   967         classinfo_stack.append(_ClassInfo(class_decl_match.group(3), line_number))
       
   968 
       
   969     # Everything else in this function uses the top of the stack if it's
       
   970     # not empty.
       
   971     if not classinfo_stack:
       
   972         return
       
   973 
       
   974     classinfo = classinfo_stack[-1]
       
   975 
       
   976     # If the opening brace hasn't been seen look for it and also
       
   977     # parent class declarations.
       
   978     if not classinfo.seen_open_brace:
       
   979         # If the line has a ';' in it, assume it's a forward declaration or
       
   980         # a single-line class declaration, which we won't process.
       
   981         if line.find(';') != -1:
       
   982             classinfo_stack.pop()
       
   983             return
       
   984         classinfo.seen_open_brace = (line.find('{') != -1)
       
   985         # Look for a bare ':'
       
   986         if search('(^|[^:]):($|[^:])', line):
       
   987             classinfo.is_derived = True
       
   988         if not classinfo.seen_open_brace:
       
   989             return  # Everything else in this function is for after open brace
       
   990 
       
   991     # The class may have been declared with namespace or classname qualifiers.
       
   992     # The constructor and destructor will not have those qualifiers.
       
   993     base_classname = classinfo.name.split('::')[-1]
       
   994 
       
   995     # Look for single-argument constructors that aren't marked explicit.
       
   996     # Technically a valid construct, but against style.
       
   997     args = match(r'(?<!explicit)\s+%s\s*\(([^,()]+)\)'
       
   998                  % re.escape(base_classname),
       
   999                  line)
       
  1000     if (args
       
  1001         and args.group(1) != 'void'
       
  1002         and not match(r'(const\s+)?%s\s*&' % re.escape(base_classname),
       
  1003                       args.group(1).strip())):
       
  1004         error(line_number, 'runtime/explicit', 5,
       
  1005               'Single-argument constructors should be marked explicit.')
       
  1006 
       
  1007     # Look for methods declared virtual.
       
  1008     if search(r'\bvirtual\b', line):
       
  1009         classinfo.virtual_method_line_number = line_number
       
  1010         # Only look for a destructor declaration on the same line. It would
       
  1011         # be extremely unlikely for the destructor declaration to occupy
       
  1012         # more than one line.
       
  1013         if search(r'~%s\s*\(' % base_classname, line):
       
  1014             classinfo.has_virtual_destructor = True
       
  1015 
       
  1016     # Look for class end.
       
  1017     brace_depth = classinfo.brace_depth
       
  1018     brace_depth = brace_depth + line.count('{') - line.count('}')
       
  1019     if brace_depth <= 0:
       
  1020         classinfo = classinfo_stack.pop()
       
  1021         # Try to detect missing virtual destructor declarations.
       
  1022         # For now, only warn if a non-derived class with virtual methods lacks
       
  1023         # a virtual destructor. This is to make it less likely that people will
       
  1024         # declare derived virtual destructors without declaring the base
       
  1025         # destructor virtual.
       
  1026         if ((classinfo.virtual_method_line_number is not None)
       
  1027             and (not classinfo.has_virtual_destructor)
       
  1028             and (not classinfo.is_derived)):  # Only warn for base classes
       
  1029             error(classinfo.line_number, 'runtime/virtual', 4,
       
  1030                   'The class %s probably needs a virtual destructor due to '
       
  1031                   'having virtual method(s), one declared at line %d.'
       
  1032                   % (classinfo.name, classinfo.virtual_method_line_number))
       
  1033     else:
       
  1034         classinfo.brace_depth = brace_depth
       
  1035 
       
  1036 
       
  1037 def check_spacing_for_function_call(line, line_number, error):
       
  1038     """Checks for the correctness of various spacing around function calls.
       
  1039 
       
  1040     Args:
       
  1041       line: The text of the line to check.
       
  1042       line_number: The number of the line to check.
       
  1043       error: The function to call with any errors found.
       
  1044     """
       
  1045 
       
  1046     # Since function calls often occur inside if/for/foreach/while/switch
       
  1047     # expressions - which have their own, more liberal conventions - we
       
  1048     # first see if we should be looking inside such an expression for a
       
  1049     # function call, to which we can apply more strict standards.
       
  1050     function_call = line    # if there's no control flow construct, look at whole line
       
  1051     for pattern in (r'\bif\s*\((.*)\)\s*{',
       
  1052                     r'\bfor\s*\((.*)\)\s*{',
       
  1053                     r'\bforeach\s*\((.*)\)\s*{',
       
  1054                     r'\bwhile\s*\((.*)\)\s*[{;]',
       
  1055                     r'\bswitch\s*\((.*)\)\s*{'):
       
  1056         matched = search(pattern, line)
       
  1057         if matched:
       
  1058             function_call = matched.group(1)    # look inside the parens for function calls
       
  1059             break
       
  1060 
       
  1061     # Except in if/for/foreach/while/switch, there should never be space
       
  1062     # immediately inside parens (eg "f( 3, 4 )").  We make an exception
       
  1063     # for nested parens ( (a+b) + c ).  Likewise, there should never be
       
  1064     # a space before a ( when it's a function argument.  I assume it's a
       
  1065     # function argument when the char before the whitespace is legal in
       
  1066     # a function name (alnum + _) and we're not starting a macro. Also ignore
       
  1067     # pointers and references to arrays and functions coz they're too tricky:
       
  1068     # we use a very simple way to recognize these:
       
  1069     # " (something)(maybe-something)" or
       
  1070     # " (something)(maybe-something," or
       
  1071     # " (something)[something]"
       
  1072     # Note that we assume the contents of [] to be short enough that
       
  1073     # they'll never need to wrap.
       
  1074     if (  # Ignore control structures.
       
  1075         not search(r'\b(if|for|foreach|while|switch|return|new|delete)\b', function_call)
       
  1076         # Ignore pointers/references to functions.
       
  1077         and not search(r' \([^)]+\)\([^)]*(\)|,$)', function_call)
       
  1078         # Ignore pointers/references to arrays.
       
  1079         and not search(r' \([^)]+\)\[[^\]]+\]', function_call)):
       
  1080         if search(r'\w\s*\([ \t](?!\s*\\$)', function_call):      # a ( used for a fn call
       
  1081             error(line_number, 'whitespace/parens', 4,
       
  1082                   'Extra space after ( in function call')
       
  1083         elif search(r'\([ \t]+(?!(\s*\\)|\()', function_call):
       
  1084             error(line_number, 'whitespace/parens', 2,
       
  1085                   'Extra space after (')
       
  1086         if (search(r'\w\s+\(', function_call)
       
  1087             and not search(r'#\s*define|typedef', function_call)):
       
  1088             error(line_number, 'whitespace/parens', 4,
       
  1089                   'Extra space before ( in function call')
       
  1090         # If the ) is followed only by a newline or a { + newline, assume it's
       
  1091         # part of a control statement (if/while/etc), and don't complain
       
  1092         if search(r'[^)\s]\s+\)(?!\s*$|{\s*$)', function_call):
       
  1093             error(line_number, 'whitespace/parens', 2,
       
  1094                   'Extra space before )')
       
  1095 
       
  1096 
       
  1097 def is_blank_line(line):
       
  1098     """Returns true if the given line is blank.
       
  1099 
       
  1100     We consider a line to be blank if the line is empty or consists of
       
  1101     only white spaces.
       
  1102 
       
  1103     Args:
       
  1104       line: A line of a string.
       
  1105 
       
  1106     Returns:
       
  1107       True, if the given line is blank.
       
  1108     """
       
  1109     return not line or line.isspace()
       
  1110 
       
  1111 
       
  1112 def check_for_function_lengths(clean_lines, line_number, function_state, error):
       
  1113     """Reports for long function bodies.
       
  1114 
       
  1115     For an overview why this is done, see:
       
  1116     http://google-styleguide.googlecode.com/svn/trunk/cppguide.xml#Write_Short_Functions
       
  1117 
       
  1118     Uses a simplistic algorithm assuming other style guidelines
       
  1119     (especially spacing) are followed.
       
  1120     Only checks unindented functions, so class members are unchecked.
       
  1121     Trivial bodies are unchecked, so constructors with huge initializer lists
       
  1122     may be missed.
       
  1123     Blank/comment lines are not counted so as to avoid encouraging the removal
       
  1124     of vertical space and commments just to get through a lint check.
       
  1125     NOLINT *on the last line of a function* disables this check.
       
  1126 
       
  1127     Args:
       
  1128       clean_lines: A CleansedLines instance containing the file.
       
  1129       line_number: The number of the line to check.
       
  1130       function_state: Current function name and lines in body so far.
       
  1131       error: The function to call with any errors found.
       
  1132     """
       
  1133     lines = clean_lines.lines
       
  1134     line = lines[line_number]
       
  1135     raw = clean_lines.raw_lines
       
  1136     raw_line = raw[line_number]
       
  1137     joined_line = ''
       
  1138 
       
  1139     starting_func = False
       
  1140     regexp = r'(\w(\w|::|\*|\&|\s)*)\('  # decls * & space::name( ...
       
  1141     match_result = match(regexp, line)
       
  1142     if match_result:
       
  1143         # If the name is all caps and underscores, figure it's a macro and
       
  1144         # ignore it, unless it's TEST or TEST_F.
       
  1145         function_name = match_result.group(1).split()[-1]
       
  1146         if function_name == 'TEST' or function_name == 'TEST_F' or (not match(r'[A-Z_]+$', function_name)):
       
  1147             starting_func = True
       
  1148 
       
  1149     if starting_func:
       
  1150         body_found = False
       
  1151         for start_line_number in xrange(line_number, clean_lines.num_lines()):
       
  1152             start_line = lines[start_line_number]
       
  1153             joined_line += ' ' + start_line.lstrip()
       
  1154             if search(r'(;|})', start_line):  # Declarations and trivial functions
       
  1155                 body_found = True
       
  1156                 break                              # ... ignore
       
  1157             if search(r'{', start_line):
       
  1158                 body_found = True
       
  1159                 function = search(r'((\w|:)*)\(', line).group(1)
       
  1160                 if match(r'TEST', function):    # Handle TEST... macros
       
  1161                     parameter_regexp = search(r'(\(.*\))', joined_line)
       
  1162                     if parameter_regexp:             # Ignore bad syntax
       
  1163                         function += parameter_regexp.group(1)
       
  1164                 else:
       
  1165                     function += '()'
       
  1166                 function_state.begin(function)
       
  1167                 break
       
  1168         if not body_found:
       
  1169             # No body for the function (or evidence of a non-function) was found.
       
  1170             error(line_number, 'readability/fn_size', 5,
       
  1171                   'Lint failed to find start of function body.')
       
  1172     elif match(r'^\}\s*$', line):  # function end
       
  1173         if not search(r'\bNOLINT\b', raw_line):
       
  1174             function_state.check(error, line_number)
       
  1175         function_state.end()
       
  1176     elif not match(r'^\s*$', line):
       
  1177         function_state.count()  # Count non-blank/non-comment lines.
       
  1178 
       
  1179 
       
  1180 def check_spacing(file_extension, clean_lines, line_number, error):
       
  1181     """Checks for the correctness of various spacing issues in the code.
       
  1182 
       
  1183     Things we check for: spaces around operators, spaces after
       
  1184     if/for/while/switch, no spaces around parens in function calls, two
       
  1185     spaces between code and comment, don't start a block with a blank
       
  1186     line, don't end a function with a blank line, don't have too many
       
  1187     blank lines in a row.
       
  1188 
       
  1189     Args:
       
  1190       file_extension: The current file extension, without the leading dot.
       
  1191       clean_lines: A CleansedLines instance containing the file.
       
  1192       line_number: The number of the line to check.
       
  1193       error: The function to call with any errors found.
       
  1194     """
       
  1195 
       
  1196     raw = clean_lines.raw_lines
       
  1197     line = raw[line_number]
       
  1198 
       
  1199     # Before nixing comments, check if the line is blank for no good
       
  1200     # reason.  This includes the first line after a block is opened, and
       
  1201     # blank lines at the end of a function (ie, right before a line like '}').
       
  1202     if is_blank_line(line):
       
  1203         elided = clean_lines.elided
       
  1204         previous_line = elided[line_number - 1]
       
  1205         previous_brace = previous_line.rfind('{')
       
  1206         # FIXME: Don't complain if line before blank line, and line after,
       
  1207         #        both start with alnums and are indented the same amount.
       
  1208         #        This ignores whitespace at the start of a namespace block
       
  1209         #        because those are not usually indented.
       
  1210         if (previous_brace != -1 and previous_line[previous_brace:].find('}') == -1
       
  1211             and previous_line[:previous_brace].find('namespace') == -1):
       
  1212             # OK, we have a blank line at the start of a code block.  Before we
       
  1213             # complain, we check if it is an exception to the rule: The previous
       
  1214             # non-empty line has the parameters of a function header that are indented
       
  1215             # 4 spaces (because they did not fit in a 80 column line when placed on
       
  1216             # the same line as the function name).  We also check for the case where
       
  1217             # the previous line is indented 6 spaces, which may happen when the
       
  1218             # initializers of a constructor do not fit into a 80 column line.
       
  1219             exception = False
       
  1220             if match(r' {6}\w', previous_line):  # Initializer list?
       
  1221                 # We are looking for the opening column of initializer list, which
       
  1222                 # should be indented 4 spaces to cause 6 space indentation afterwards.
       
  1223                 search_position = line_number - 2
       
  1224                 while (search_position >= 0
       
  1225                        and match(r' {6}\w', elided[search_position])):
       
  1226                     search_position -= 1
       
  1227                 exception = (search_position >= 0
       
  1228                              and elided[search_position][:5] == '    :')
       
  1229             else:
       
  1230                 # Search for the function arguments or an initializer list.  We use a
       
  1231                 # simple heuristic here: If the line is indented 4 spaces; and we have a
       
  1232                 # closing paren, without the opening paren, followed by an opening brace
       
  1233                 # or colon (for initializer lists) we assume that it is the last line of
       
  1234                 # a function header.  If we have a colon indented 4 spaces, it is an
       
  1235                 # initializer list.
       
  1236                 exception = (match(r' {4}\w[^\(]*\)\s*(const\s*)?(\{\s*$|:)',
       
  1237                                    previous_line)
       
  1238                              or match(r' {4}:', previous_line))
       
  1239 
       
  1240             if not exception:
       
  1241                 error(line_number, 'whitespace/blank_line', 2,
       
  1242                       'Blank line at the start of a code block.  Is this needed?')
       
  1243         # This doesn't ignore whitespace at the end of a namespace block
       
  1244         # because that is too hard without pairing open/close braces;
       
  1245         # however, a special exception is made for namespace closing
       
  1246         # brackets which have a comment containing "namespace".
       
  1247         #
       
  1248         # Also, ignore blank lines at the end of a block in a long if-else
       
  1249         # chain, like this:
       
  1250         #   if (condition1) {
       
  1251         #     // Something followed by a blank line
       
  1252         #
       
  1253         #   } else if (condition2) {
       
  1254         #     // Something else
       
  1255         #   }
       
  1256         if line_number + 1 < clean_lines.num_lines():
       
  1257             next_line = raw[line_number + 1]
       
  1258             if (next_line
       
  1259                 and match(r'\s*}', next_line)
       
  1260                 and next_line.find('namespace') == -1
       
  1261                 and next_line.find('} else ') == -1):
       
  1262                 error(line_number, 'whitespace/blank_line', 3,
       
  1263                       'Blank line at the end of a code block.  Is this needed?')
       
  1264 
       
  1265     # Next, we complain if there's a comment too near the text
       
  1266     comment_position = line.find('//')
       
  1267     if comment_position != -1:
       
  1268         # Check if the // may be in quotes.  If so, ignore it
       
  1269         # Comparisons made explicit for clarity -- pylint: disable-msg=C6403
       
  1270         if (line.count('"', 0, comment_position) - line.count('\\"', 0, comment_position)) % 2 == 0:   # not in quotes
       
  1271             # Allow one space before end of line comment.
       
  1272             if (not match(r'^\s*$', line[:comment_position])
       
  1273                 and (comment_position >= 1
       
  1274                 and ((line[comment_position - 1] not in string.whitespace)
       
  1275                      or (comment_position >= 2
       
  1276                          and line[comment_position - 2] in string.whitespace)))):
       
  1277                 error(line_number, 'whitespace/comments', 5,
       
  1278                       'One space before end of line comments')
       
  1279             # There should always be a space between the // and the comment
       
  1280             commentend = comment_position + 2
       
  1281             if commentend < len(line) and not line[commentend] == ' ':
       
  1282                 # but some lines are exceptions -- e.g. if they're big
       
  1283                 # comment delimiters like:
       
  1284                 # //----------------------------------------------------------
       
  1285                 # or they begin with multiple slashes followed by a space:
       
  1286                 # //////// Header comment
       
  1287                 matched = (search(r'[=/-]{4,}\s*$', line[commentend:])
       
  1288                            or search(r'^/+ ', line[commentend:]))
       
  1289                 if not matched:
       
  1290                     error(line_number, 'whitespace/comments', 4,
       
  1291                           'Should have a space between // and comment')
       
  1292 
       
  1293     line = clean_lines.elided[line_number]  # get rid of comments and strings
       
  1294 
       
  1295     # Don't try to do spacing checks for operator methods
       
  1296     line = sub(r'operator(==|!=|<|<<|<=|>=|>>|>)\(', 'operator\(', line)
       
  1297     # Don't try to do spacing checks for #include or #import statements at
       
  1298     # minimum because it messes up checks for spacing around /
       
  1299     if match(r'\s*#\s*(?:include|import)', line):
       
  1300         return
       
  1301     if search(r'[\w.]=[\w.]', line):
       
  1302         error(line_number, 'whitespace/operators', 4,
       
  1303               'Missing spaces around =')
       
  1304 
       
  1305     # FIXME: It's not ok to have spaces around binary operators like .
       
  1306 
       
  1307     # You should always have whitespace around binary operators.
       
  1308     # Alas, we can't test < or > because they're legitimately used sans spaces
       
  1309     # (a->b, vector<int> a).  The only time we can tell is a < with no >, and
       
  1310     # only if it's not template params list spilling into the next line.
       
  1311     matched = search(r'[^<>=!\s](==|!=|\+=|-=|\*=|/=|/|\|=|&=|<<=|>>=|<=|>=|\|\||\||&&|>>|<<)[^<>=!\s]', line)
       
  1312     if not matched:
       
  1313         # Note that while it seems that the '<[^<]*' term in the following
       
  1314         # regexp could be simplified to '<.*', which would indeed match
       
  1315         # the same class of strings, the [^<] means that searching for the
       
  1316         # regexp takes linear rather than quadratic time.
       
  1317         if not search(r'<[^<]*,\s*$', line):  # template params spill
       
  1318             matched = search(r'[^<>=!\s](<)[^<>=!\s]([^>]|->)*$', line)
       
  1319     if matched:
       
  1320         error(line_number, 'whitespace/operators', 3,
       
  1321               'Missing spaces around %s' % matched.group(1))
       
  1322 
       
  1323     # There shouldn't be space around unary operators
       
  1324     matched = search(r'(!\s|~\s|[\s]--[\s;]|[\s]\+\+[\s;])', line)
       
  1325     if matched:
       
  1326         error(line_number, 'whitespace/operators', 4,
       
  1327               'Extra space for operator %s' % matched.group(1))
       
  1328 
       
  1329     # A pet peeve of mine: no spaces after an if, while, switch, or for
       
  1330     matched = search(r' (if\(|for\(|foreach\(|while\(|switch\()', line)
       
  1331     if matched:
       
  1332         error(line_number, 'whitespace/parens', 5,
       
  1333               'Missing space before ( in %s' % matched.group(1))
       
  1334 
       
  1335     # For if/for/foreach/while/switch, the left and right parens should be
       
  1336     # consistent about how many spaces are inside the parens, and
       
  1337     # there should either be zero or one spaces inside the parens.
       
  1338     # We don't want: "if ( foo)" or "if ( foo   )".
       
  1339     # Exception: "for ( ; foo; bar)" and "for (foo; bar; )" are allowed.
       
  1340     matched = search(r'\b(?P<statement>if|for|foreach|while|switch)\s*\((?P<remainder>.*)$', line)
       
  1341     if matched:
       
  1342         statement = matched.group('statement')
       
  1343         condition, rest = up_to_unmatched_closing_paren(matched.group('remainder'))
       
  1344         if condition is not None:
       
  1345             condition_match = search(r'(?P<leading>[ ]*)(?P<separator>.).*[^ ]+(?P<trailing>[ ]*)', condition)
       
  1346             if condition_match:
       
  1347                 n_leading = len(condition_match.group('leading'))
       
  1348                 n_trailing = len(condition_match.group('trailing'))
       
  1349                 if n_leading != 0:
       
  1350                     for_exception = statement == 'for' and condition.startswith(' ;')
       
  1351                     if not for_exception:
       
  1352                         error(line_number, 'whitespace/parens', 5,
       
  1353                               'Extra space after ( in %s' % statement)
       
  1354                 if n_trailing != 0:
       
  1355                     for_exception = statement == 'for' and condition.endswith('; ')
       
  1356                     if not for_exception:
       
  1357                         error(line_number, 'whitespace/parens', 5,
       
  1358                               'Extra space before ) in %s' % statement)
       
  1359 
       
  1360             # Do not check for more than one command in macros
       
  1361             in_macro = match(r'\s*#define', line)
       
  1362             if not in_macro and not match(r'((\s*{\s*}?)|(\s*;?))\s*\\?$', rest):
       
  1363                 error(line_number, 'whitespace/parens', 4,
       
  1364                       'More than one command on the same line in %s' % statement)
       
  1365 
       
  1366     # You should always have a space after a comma (either as fn arg or operator)
       
  1367     if search(r',[^\s]', line):
       
  1368         error(line_number, 'whitespace/comma', 3,
       
  1369               'Missing space after ,')
       
  1370 
       
  1371     matched = search(r'^\s*(?P<token1>[a-zA-Z0-9_\*&]+)\s\s+(?P<token2>[a-zA-Z0-9_\*&]+)', line)
       
  1372     if matched:
       
  1373         error(line_number, 'whitespace/declaration', 3,
       
  1374               'Extra space between %s and %s' % (matched.group('token1'), matched.group('token2')))
       
  1375 
       
  1376     if file_extension == 'cpp':
       
  1377         # C++ should have the & or * beside the type not the variable name.
       
  1378         matched = match(r'\s*\w+(?<!\breturn|\bdelete)\s+(?P<pointer_operator>\*|\&)\w+', line)
       
  1379         if matched:
       
  1380             error(line_number, 'whitespace/declaration', 3,
       
  1381                   'Declaration has space between type name and %s in %s' % (matched.group('pointer_operator'), matched.group(0).strip()))
       
  1382 
       
  1383     elif file_extension == 'c':
       
  1384         # C Pointer declaration should have the * beside the variable not the type name.
       
  1385         matched = search(r'^\s*\w+\*\s+\w+', line)
       
  1386         if matched:
       
  1387             error(line_number, 'whitespace/declaration', 3,
       
  1388                   'Declaration has space between * and variable name in %s' % matched.group(0).strip())
       
  1389 
       
  1390     # Next we will look for issues with function calls.
       
  1391     check_spacing_for_function_call(line, line_number, error)
       
  1392 
       
  1393     # Except after an opening paren, you should have spaces before your braces.
       
  1394     # And since you should never have braces at the beginning of a line, this is
       
  1395     # an easy test.
       
  1396     if search(r'[^ ({]{', line):
       
  1397         error(line_number, 'whitespace/braces', 5,
       
  1398               'Missing space before {')
       
  1399 
       
  1400     # Make sure '} else {' has spaces.
       
  1401     if search(r'}else', line):
       
  1402         error(line_number, 'whitespace/braces', 5,
       
  1403               'Missing space before else')
       
  1404 
       
  1405     # You shouldn't have spaces before your brackets, except maybe after
       
  1406     # 'delete []' or 'new char * []'.
       
  1407     if search(r'\w\s+\[', line) and not search(r'delete\s+\[', line):
       
  1408         error(line_number, 'whitespace/braces', 5,
       
  1409               'Extra space before [')
       
  1410 
       
  1411     # You shouldn't have a space before a semicolon at the end of the line.
       
  1412     # There's a special case for "for" since the style guide allows space before
       
  1413     # the semicolon there.
       
  1414     if search(r':\s*;\s*$', line):
       
  1415         error(line_number, 'whitespace/semicolon', 5,
       
  1416               'Semicolon defining empty statement. Use { } instead.')
       
  1417     elif search(r'^\s*;\s*$', line):
       
  1418         error(line_number, 'whitespace/semicolon', 5,
       
  1419               'Line contains only semicolon. If this should be an empty statement, '
       
  1420               'use { } instead.')
       
  1421     elif (search(r'\s+;\s*$', line) and not search(r'\bfor\b', line)):
       
  1422         error(line_number, 'whitespace/semicolon', 5,
       
  1423               'Extra space before last semicolon. If this should be an empty '
       
  1424               'statement, use { } instead.')
       
  1425     elif (search(r'\b(for|while)\s*\(.*\)\s*;\s*$', line)
       
  1426           and line.count('(') == line.count(')')
       
  1427           # Allow do {} while();
       
  1428           and not search(r'}\s*while', line)):
       
  1429         error(line_number, 'whitespace/semicolon', 5,
       
  1430               'Semicolon defining empty statement for this loop. Use { } instead.')
       
  1431 
       
  1432 
       
  1433 def get_previous_non_blank_line(clean_lines, line_number):
       
  1434     """Return the most recent non-blank line and its line number.
       
  1435 
       
  1436     Args:
       
  1437       clean_lines: A CleansedLines instance containing the file contents.
       
  1438       line_number: The number of the line to check.
       
  1439 
       
  1440     Returns:
       
  1441       A tuple with two elements.  The first element is the contents of the last
       
  1442       non-blank line before the current line, or the empty string if this is the
       
  1443       first non-blank line.  The second is the line number of that line, or -1
       
  1444       if this is the first non-blank line.
       
  1445     """
       
  1446 
       
  1447     previous_line_number = line_number - 1
       
  1448     while previous_line_number >= 0:
       
  1449         previous_line = clean_lines.elided[previous_line_number]
       
  1450         if not is_blank_line(previous_line):     # if not a blank line...
       
  1451             return (previous_line, previous_line_number)
       
  1452         previous_line_number -= 1
       
  1453     return ('', -1)
       
  1454 
       
  1455 
       
  1456 def check_namespace_indentation(clean_lines, line_number, file_extension, file_state, error):
       
  1457     """Looks for indentation errors inside of namespaces.
       
  1458 
       
  1459     Args:
       
  1460       clean_lines: A CleansedLines instance containing the file.
       
  1461       line_number: The number of the line to check.
       
  1462       file_extension: The extension (dot not included) of the file.
       
  1463       file_state: A _FileState instance which maintains information about
       
  1464                   the state of things in the file.
       
  1465       error: The function to call with any errors found.
       
  1466     """
       
  1467 
       
  1468     line = clean_lines.elided[line_number] # Get rid of comments and strings.
       
  1469 
       
  1470     namespace_match = match(r'(?P<namespace_indentation>\s*)namespace\s+\S+\s*{\s*$', line)
       
  1471     if not namespace_match:
       
  1472         return
       
  1473 
       
  1474     current_indentation_level = len(namespace_match.group('namespace_indentation'))
       
  1475     if current_indentation_level > 0:
       
  1476         # Don't warn about an indented namespace if we already warned about indented code.
       
  1477         if not file_state.did_inside_namespace_indent_warning():
       
  1478             error(line_number, 'whitespace/indent', 4,
       
  1479                   'namespace should never be indented.')
       
  1480         return
       
  1481     looking_for_semicolon = False;
       
  1482     line_offset = 0
       
  1483     in_preprocessor_directive = False;
       
  1484     for current_line in clean_lines.elided[line_number + 1:]:
       
  1485         line_offset += 1
       
  1486         if not current_line.strip():
       
  1487             continue
       
  1488         if not current_indentation_level:
       
  1489             if not (in_preprocessor_directive or looking_for_semicolon):
       
  1490                 if not match(r'\S', current_line) and not file_state.did_inside_namespace_indent_warning():
       
  1491                     file_state.set_did_inside_namespace_indent_warning()
       
  1492                     error(line_number + line_offset, 'whitespace/indent', 4,
       
  1493                           'Code inside a namespace should not be indented.')
       
  1494             if in_preprocessor_directive or (current_line.strip()[0] == '#'): # This takes care of preprocessor directive syntax.
       
  1495                 in_preprocessor_directive = current_line[-1] == '\\'
       
  1496             else:
       
  1497                 looking_for_semicolon = ((current_line.find(';') == -1) and (current_line.strip()[-1] != '}')) or (current_line[-1] == '\\')
       
  1498         else:
       
  1499             looking_for_semicolon = False; # If we have a brace we may not need a semicolon.
       
  1500         current_indentation_level += current_line.count('{') - current_line.count('}')
       
  1501         if current_indentation_level < 0:
       
  1502             break;
       
  1503 
       
  1504 def check_using_std(file_extension, clean_lines, line_number, error):
       
  1505     """Looks for 'using std::foo;' statements which should be replaced with 'using namespace std;'.
       
  1506 
       
  1507     Args:
       
  1508       file_extension: The extension of the current file, without the leading dot.
       
  1509       clean_lines: A CleansedLines instance containing the file.
       
  1510       line_number: The number of the line to check.
       
  1511       error: The function to call with any errors found.
       
  1512     """
       
  1513 
       
  1514     # This check doesn't apply to C or Objective-C implementation files.
       
  1515     if is_c_or_objective_c(file_extension):
       
  1516         return
       
  1517 
       
  1518     line = clean_lines.elided[line_number] # Get rid of comments and strings.
       
  1519 
       
  1520     using_std_match = match(r'\s*using\s+std::(?P<method_name>\S+)\s*;\s*$', line)
       
  1521     if not using_std_match:
       
  1522         return
       
  1523 
       
  1524     method_name = using_std_match.group('method_name')
       
  1525     error(line_number, 'build/using_std', 4,
       
  1526           "Use 'using namespace std;' instead of 'using std::%s;'." % method_name)
       
  1527 
       
  1528 
       
  1529 def check_max_min_macros(file_extension, clean_lines, line_number, error):
       
  1530     """Looks use of MAX() and MIN() macros that should be replaced with std::max() and std::min().
       
  1531 
       
  1532     Args:
       
  1533       file_extension: The extension of the current file, without the leading dot.
       
  1534       clean_lines: A CleansedLines instance containing the file.
       
  1535       line_number: The number of the line to check.
       
  1536       error: The function to call with any errors found.
       
  1537     """
       
  1538 
       
  1539     # This check doesn't apply to C or Objective-C implementation files.
       
  1540     if is_c_or_objective_c(file_extension):
       
  1541         return
       
  1542 
       
  1543     line = clean_lines.elided[line_number] # Get rid of comments and strings.
       
  1544 
       
  1545     max_min_macros_search = search(r'\b(?P<max_min_macro>(MAX|MIN))\s*\(', line)
       
  1546     if not max_min_macros_search:
       
  1547         return
       
  1548 
       
  1549     max_min_macro = max_min_macros_search.group('max_min_macro')
       
  1550     max_min_macro_lower = max_min_macro.lower()
       
  1551     error(line_number, 'runtime/max_min_macros', 4,
       
  1552           'Use std::%s() or std::%s<type>() instead of the %s() macro.'
       
  1553           % (max_min_macro_lower, max_min_macro_lower, max_min_macro))
       
  1554 
       
  1555 
       
  1556 def check_switch_indentation(clean_lines, line_number, error):
       
  1557     """Looks for indentation errors inside of switch statements.
       
  1558 
       
  1559     Args:
       
  1560       clean_lines: A CleansedLines instance containing the file.
       
  1561       line_number: The number of the line to check.
       
  1562       error: The function to call with any errors found.
       
  1563     """
       
  1564 
       
  1565     line = clean_lines.elided[line_number] # Get rid of comments and strings.
       
  1566 
       
  1567     switch_match = match(r'(?P<switch_indentation>\s*)switch\s*\(.+\)\s*{\s*$', line)
       
  1568     if not switch_match:
       
  1569         return
       
  1570 
       
  1571     switch_indentation = switch_match.group('switch_indentation')
       
  1572     inner_indentation = switch_indentation + ' ' * 4
       
  1573     line_offset = 0
       
  1574     encountered_nested_switch = False
       
  1575 
       
  1576     for current_line in clean_lines.elided[line_number + 1:]:
       
  1577         line_offset += 1
       
  1578 
       
  1579         # Skip not only empty lines but also those with preprocessor directives.
       
  1580         if current_line.strip() == '' or current_line.startswith('#'):
       
  1581             continue
       
  1582 
       
  1583         if match(r'\s*switch\s*\(.+\)\s*{\s*$', current_line):
       
  1584             # Complexity alarm - another switch statement nested inside the one
       
  1585             # that we're currently testing. We'll need to track the extent of
       
  1586             # that inner switch if the upcoming label tests are still supposed
       
  1587             # to work correctly. Let's not do that; instead, we'll finish
       
  1588             # checking this line, and then leave it like that. Assuming the
       
  1589             # indentation is done consistently (even if incorrectly), this will
       
  1590             # still catch all indentation issues in practice.
       
  1591             encountered_nested_switch = True
       
  1592 
       
  1593         current_indentation_match = match(r'(?P<indentation>\s*)(?P<remaining_line>.*)$', current_line);
       
  1594         current_indentation = current_indentation_match.group('indentation')
       
  1595         remaining_line = current_indentation_match.group('remaining_line')
       
  1596 
       
  1597         # End the check at the end of the switch statement.
       
  1598         if remaining_line.startswith('}') and current_indentation == switch_indentation:
       
  1599             break
       
  1600         # Case and default branches should not be indented. The regexp also
       
  1601         # catches single-line cases like "default: break;" but does not trigger
       
  1602         # on stuff like "Document::Foo();".
       
  1603         elif match(r'(default|case\s+.*)\s*:([^:].*)?$', remaining_line):
       
  1604             if current_indentation != switch_indentation:
       
  1605                 error(line_number + line_offset, 'whitespace/indent', 4,
       
  1606                       'A case label should not be indented, but line up with its switch statement.')
       
  1607                 # Don't throw an error for multiple badly indented labels,
       
  1608                 # one should be enough to figure out the problem.
       
  1609                 break
       
  1610         # We ignore goto labels at the very beginning of a line.
       
  1611         elif match(r'\w+\s*:\s*$', remaining_line):
       
  1612             continue
       
  1613         # It's not a goto label, so check if it's indented at least as far as
       
  1614         # the switch statement plus one more level of indentation.
       
  1615         elif not current_indentation.startswith(inner_indentation):
       
  1616             error(line_number + line_offset, 'whitespace/indent', 4,
       
  1617                   'Non-label code inside switch statements should be indented.')
       
  1618             # Don't throw an error for multiple badly indented statements,
       
  1619             # one should be enough to figure out the problem.
       
  1620             break
       
  1621 
       
  1622         if encountered_nested_switch:
       
  1623             break
       
  1624 
       
  1625 
       
  1626 def check_braces(clean_lines, line_number, error):
       
  1627     """Looks for misplaced braces (e.g. at the end of line).
       
  1628 
       
  1629     Args:
       
  1630       clean_lines: A CleansedLines instance containing the file.
       
  1631       line_number: The number of the line to check.
       
  1632       error: The function to call with any errors found.
       
  1633     """
       
  1634 
       
  1635     line = clean_lines.elided[line_number] # Get rid of comments and strings.
       
  1636 
       
  1637     if match(r'\s*{\s*$', line):
       
  1638         # We allow an open brace to start a line in the case where someone
       
  1639         # is using braces for function definition or in a block to
       
  1640         # explicitly create a new scope, which is commonly used to control
       
  1641         # the lifetime of stack-allocated variables.  We don't detect this
       
  1642         # perfectly: we just don't complain if the last non-whitespace
       
  1643         # character on the previous non-blank line is ';', ':', '{', '}',
       
  1644         # ')', or ') const' and doesn't begin with 'if|for|while|switch|else'.
       
  1645         # We also allow '#' for #endif and '=' for array initialization.
       
  1646         previous_line = get_previous_non_blank_line(clean_lines, line_number)[0]
       
  1647         if ((not search(r'[;:}{)=]\s*$|\)\s*const\s*$', previous_line)
       
  1648              or search(r'\b(if|for|foreach|while|switch|else)\b', previous_line))
       
  1649             and previous_line.find('#') < 0):
       
  1650             error(line_number, 'whitespace/braces', 4,
       
  1651                   'This { should be at the end of the previous line')
       
  1652     elif (search(r'\)\s*(const\s*)?{\s*$', line)
       
  1653           and line.count('(') == line.count(')')
       
  1654           and not search(r'\b(if|for|foreach|while|switch)\b', line)
       
  1655           and not match(r'\s+[A-Z_][A-Z_0-9]+\b', line)):
       
  1656         error(line_number, 'whitespace/braces', 4,
       
  1657               'Place brace on its own line for function definitions.')
       
  1658 
       
  1659     if (match(r'\s*}\s*(else\s*({\s*)?)?$', line) and line_number > 1):
       
  1660         # We check if a closed brace has started a line to see if a
       
  1661         # one line control statement was previous.
       
  1662         previous_line = clean_lines.elided[line_number - 2]
       
  1663         if (previous_line.find('{') > 0 and previous_line.find('}') < 0
       
  1664             and search(r'\b(if|for|foreach|while|else)\b', previous_line)):
       
  1665             error(line_number, 'whitespace/braces', 4,
       
  1666                   'One line control clauses should not use braces.')
       
  1667 
       
  1668     # An else clause should be on the same line as the preceding closing brace.
       
  1669     if match(r'\s*else\s*', line):
       
  1670         previous_line = get_previous_non_blank_line(clean_lines, line_number)[0]
       
  1671         if match(r'\s*}\s*$', previous_line):
       
  1672             error(line_number, 'whitespace/newline', 4,
       
  1673                   'An else should appear on the same line as the preceding }')
       
  1674 
       
  1675     # Likewise, an else should never have the else clause on the same line
       
  1676     if search(r'\belse [^\s{]', line) and not search(r'\belse if\b', line):
       
  1677         error(line_number, 'whitespace/newline', 4,
       
  1678               'Else clause should never be on same line as else (use 2 lines)')
       
  1679 
       
  1680     # In the same way, a do/while should never be on one line
       
  1681     if match(r'\s*do [^\s{]', line):
       
  1682         error(line_number, 'whitespace/newline', 4,
       
  1683               'do/while clauses should not be on a single line')
       
  1684 
       
  1685     # Braces shouldn't be followed by a ; unless they're defining a struct
       
  1686     # or initializing an array.
       
  1687     # We can't tell in general, but we can for some common cases.
       
  1688     previous_line_number = line_number
       
  1689     while True:
       
  1690         (previous_line, previous_line_number) = get_previous_non_blank_line(clean_lines, previous_line_number)
       
  1691         if match(r'\s+{.*}\s*;', line) and not previous_line.count(';'):
       
  1692             line = previous_line + line
       
  1693         else:
       
  1694             break
       
  1695     if (search(r'{.*}\s*;', line)
       
  1696         and line.count('{') == line.count('}')
       
  1697         and not search(r'struct|class|enum|\s*=\s*{', line)):
       
  1698         error(line_number, 'readability/braces', 4,
       
  1699               "You don't need a ; after a }")
       
  1700 
       
  1701 
       
  1702 def check_exit_statement_simplifications(clean_lines, line_number, error):
       
  1703     """Looks for else or else-if statements that should be written as an
       
  1704     if statement when the prior if concludes with a return, break, continue or
       
  1705     goto statement.
       
  1706 
       
  1707     Args:
       
  1708       clean_lines: A CleansedLines instance containing the file.
       
  1709       line_number: The number of the line to check.
       
  1710       error: The function to call with any errors found.
       
  1711     """
       
  1712 
       
  1713     line = clean_lines.elided[line_number] # Get rid of comments and strings.
       
  1714 
       
  1715     else_match = match(r'(?P<else_indentation>\s*)(\}\s*)?else(\s+if\s*\(|(?P<else>\s*(\{\s*)?\Z))', line)
       
  1716     if not else_match:
       
  1717         return
       
  1718 
       
  1719     else_indentation = else_match.group('else_indentation')
       
  1720     inner_indentation = else_indentation + ' ' * 4
       
  1721 
       
  1722     previous_lines = clean_lines.elided[:line_number]
       
  1723     previous_lines.reverse()
       
  1724     line_offset = 0
       
  1725     encountered_exit_statement = False
       
  1726 
       
  1727     for current_line in previous_lines:
       
  1728         line_offset -= 1
       
  1729 
       
  1730         # Skip not only empty lines but also those with preprocessor directives
       
  1731         # and goto labels.
       
  1732         if current_line.strip() == '' or current_line.startswith('#') or match(r'\w+\s*:\s*$', current_line):
       
  1733             continue
       
  1734 
       
  1735         # Skip lines with closing braces on the original indentation level.
       
  1736         # Even though the styleguide says they should be on the same line as
       
  1737         # the "else if" statement, we also want to check for instances where
       
  1738         # the current code does not comply with the coding style. Thus, ignore
       
  1739         # these lines and proceed to the line before that.
       
  1740         if current_line == else_indentation + '}':
       
  1741             continue
       
  1742 
       
  1743         current_indentation_match = match(r'(?P<indentation>\s*)(?P<remaining_line>.*)$', current_line);
       
  1744         current_indentation = current_indentation_match.group('indentation')
       
  1745         remaining_line = current_indentation_match.group('remaining_line')
       
  1746 
       
  1747         # As we're going up the lines, the first real statement to encounter
       
  1748         # has to be an exit statement (return, break, continue or goto) -
       
  1749         # otherwise, this check doesn't apply.
       
  1750         if not encountered_exit_statement:
       
  1751             # We only want to find exit statements if they are on exactly
       
  1752             # the same level of indentation as expected from the code inside
       
  1753             # the block. If the indentation doesn't strictly match then we
       
  1754             # might have a nested if or something, which must be ignored.
       
  1755             if current_indentation != inner_indentation:
       
  1756                 break
       
  1757             if match(r'(return(\W+.*)|(break|continue)\s*;|goto\s*\w+;)$', remaining_line):
       
  1758                 encountered_exit_statement = True
       
  1759                 continue
       
  1760             break
       
  1761 
       
  1762         # When code execution reaches this point, we've found an exit statement
       
  1763         # as last statement of the previous block. Now we only need to make
       
  1764         # sure that the block belongs to an "if", then we can throw an error.
       
  1765 
       
  1766         # Skip lines with opening braces on the original indentation level,
       
  1767         # similar to the closing braces check above. ("if (condition)\n{")
       
  1768         if current_line == else_indentation + '{':
       
  1769             continue
       
  1770 
       
  1771         # Skip everything that's further indented than our "else" or "else if".
       
  1772         if current_indentation.startswith(else_indentation) and current_indentation != else_indentation:
       
  1773             continue
       
  1774 
       
  1775         # So we've got a line with same (or less) indentation. Is it an "if"?
       
  1776         # If yes: throw an error. If no: don't throw an error.
       
  1777         # Whatever the outcome, this is the end of our loop.
       
  1778         if match(r'if\s*\(', remaining_line):
       
  1779             if else_match.start('else') != -1:
       
  1780                 error(line_number + line_offset, 'readability/control_flow', 4,
       
  1781                       'An else statement can be removed when the prior "if" '
       
  1782                       'concludes with a return, break, continue or goto statement.')
       
  1783             else:
       
  1784                 error(line_number + line_offset, 'readability/control_flow', 4,
       
  1785                       'An else if statement should be written as an if statement '
       
  1786                       'when the prior "if" concludes with a return, break, '
       
  1787                       'continue or goto statement.')
       
  1788         break
       
  1789 
       
  1790 
       
  1791 def replaceable_check(operator, macro, line):
       
  1792     """Determine whether a basic CHECK can be replaced with a more specific one.
       
  1793 
       
  1794     For example suggest using CHECK_EQ instead of CHECK(a == b) and
       
  1795     similarly for CHECK_GE, CHECK_GT, CHECK_LE, CHECK_LT, CHECK_NE.
       
  1796 
       
  1797     Args:
       
  1798       operator: The C++ operator used in the CHECK.
       
  1799       macro: The CHECK or EXPECT macro being called.
       
  1800       line: The current source line.
       
  1801 
       
  1802     Returns:
       
  1803       True if the CHECK can be replaced with a more specific one.
       
  1804     """
       
  1805 
       
  1806     # This matches decimal and hex integers, strings, and chars (in that order).
       
  1807     match_constant = r'([-+]?(\d+|0[xX][0-9a-fA-F]+)[lLuU]{0,3}|".*"|\'.*\')'
       
  1808 
       
  1809     # Expression to match two sides of the operator with something that
       
  1810     # looks like a literal, since CHECK(x == iterator) won't compile.
       
  1811     # This means we can't catch all the cases where a more specific
       
  1812     # CHECK is possible, but it's less annoying than dealing with
       
  1813     # extraneous warnings.
       
  1814     match_this = (r'\s*' + macro + r'\((\s*' +
       
  1815                   match_constant + r'\s*' + operator + r'[^<>].*|'
       
  1816                   r'.*[^<>]' + operator + r'\s*' + match_constant +
       
  1817                   r'\s*\))')
       
  1818 
       
  1819     # Don't complain about CHECK(x == NULL) or similar because
       
  1820     # CHECK_EQ(x, NULL) won't compile (requires a cast).
       
  1821     # Also, don't complain about more complex boolean expressions
       
  1822     # involving && or || such as CHECK(a == b || c == d).
       
  1823     return match(match_this, line) and not search(r'NULL|&&|\|\|', line)
       
  1824 
       
  1825 
       
  1826 def check_check(clean_lines, line_number, error):
       
  1827     """Checks the use of CHECK and EXPECT macros.
       
  1828 
       
  1829     Args:
       
  1830       clean_lines: A CleansedLines instance containing the file.
       
  1831       line_number: The number of the line to check.
       
  1832       error: The function to call with any errors found.
       
  1833     """
       
  1834 
       
  1835     # Decide the set of replacement macros that should be suggested
       
  1836     raw_lines = clean_lines.raw_lines
       
  1837     current_macro = ''
       
  1838     for macro in _CHECK_MACROS:
       
  1839         if raw_lines[line_number].find(macro) >= 0:
       
  1840             current_macro = macro
       
  1841             break
       
  1842     if not current_macro:
       
  1843         # Don't waste time here if line doesn't contain 'CHECK' or 'EXPECT'
       
  1844         return
       
  1845 
       
  1846     line = clean_lines.elided[line_number]        # get rid of comments and strings
       
  1847 
       
  1848     # Encourage replacing plain CHECKs with CHECK_EQ/CHECK_NE/etc.
       
  1849     for operator in ['==', '!=', '>=', '>', '<=', '<']:
       
  1850         if replaceable_check(operator, current_macro, line):
       
  1851             error(line_number, 'readability/check', 2,
       
  1852                   'Consider using %s instead of %s(a %s b)' % (
       
  1853                       _CHECK_REPLACEMENT[current_macro][operator],
       
  1854                       current_macro, operator))
       
  1855             break
       
  1856 
       
  1857 
       
  1858 def check_for_comparisons_to_zero(clean_lines, line_number, error):
       
  1859     # Get the line without comments and strings.
       
  1860     line = clean_lines.elided[line_number]
       
  1861 
       
  1862     # Include NULL here so that users don't have to convert NULL to 0 first and then get this error.
       
  1863     if search(r'[=!]=\s*(NULL|0|true|false)\W', line) or search(r'\W(NULL|0|true|false)\s*[=!]=', line):
       
  1864         error(line_number, 'readability/comparison_to_zero', 5,
       
  1865               'Tests for true/false, null/non-null, and zero/non-zero should all be done without equality comparisons.')
       
  1866 
       
  1867 
       
  1868 def check_for_null(file_extension, clean_lines, line_number, error):
       
  1869     # This check doesn't apply to C or Objective-C implementation files.
       
  1870     if is_c_or_objective_c(file_extension):
       
  1871         return
       
  1872 
       
  1873     line = clean_lines.elided[line_number]
       
  1874 
       
  1875     # Don't warn about NULL usage in g_*(). See Bug 32858 and 39372.
       
  1876     if search(r'\bg(_[a-z]+)+\b', line):
       
  1877         return
       
  1878 
       
  1879     # Don't warn about NULL usage in gst_*_many(). See Bug 39740
       
  1880     if search(r'\bgst_\w+_many\b', line):
       
  1881         return
       
  1882 
       
  1883     # Don't warn about NULL usage in g_str{join,concat}(). See Bug 34834
       
  1884     if search(r'\bg_str(join|concat)\b', line):
       
  1885         return
       
  1886 
       
  1887     if search(r'\bNULL\b', line):
       
  1888         error(line_number, 'readability/null', 5, 'Use 0 instead of NULL.')
       
  1889         return
       
  1890 
       
  1891     line = clean_lines.raw_lines[line_number]
       
  1892     # See if NULL occurs in any comments in the line. If the search for NULL using the raw line
       
  1893     # matches, then do the check with strings collapsed to avoid giving errors for
       
  1894     # NULLs occurring in strings.
       
  1895     if search(r'\bNULL\b', line) and search(r'\bNULL\b', CleansedLines.collapse_strings(line)):
       
  1896         error(line_number, 'readability/null', 4, 'Use 0 instead of NULL.')
       
  1897 
       
  1898 def get_line_width(line):
       
  1899     """Determines the width of the line in column positions.
       
  1900 
       
  1901     Args:
       
  1902       line: A string, which may be a Unicode string.
       
  1903 
       
  1904     Returns:
       
  1905       The width of the line in column positions, accounting for Unicode
       
  1906       combining characters and wide characters.
       
  1907     """
       
  1908     if isinstance(line, unicode):
       
  1909         width = 0
       
  1910         for c in unicodedata.normalize('NFC', line):
       
  1911             if unicodedata.east_asian_width(c) in ('W', 'F'):
       
  1912                 width += 2
       
  1913             elif not unicodedata.combining(c):
       
  1914                 width += 1
       
  1915         return width
       
  1916     return len(line)
       
  1917 
       
  1918 
       
  1919 def check_style(clean_lines, line_number, file_extension, file_state, error):
       
  1920     """Checks rules from the 'C++ style rules' section of cppguide.html.
       
  1921 
       
  1922     Most of these rules are hard to test (naming, comment style), but we
       
  1923     do what we can.  In particular we check for 4-space indents, line lengths,
       
  1924     tab usage, spaces inside code, etc.
       
  1925 
       
  1926     Args:
       
  1927       clean_lines: A CleansedLines instance containing the file.
       
  1928       line_number: The number of the line to check.
       
  1929       file_extension: The extension (without the dot) of the filename.
       
  1930       file_state: A _FileState instance which maintains information about
       
  1931                   the state of things in the file.
       
  1932       error: The function to call with any errors found.
       
  1933     """
       
  1934 
       
  1935     raw_lines = clean_lines.raw_lines
       
  1936     line = raw_lines[line_number]
       
  1937 
       
  1938     if line.find('\t') != -1:
       
  1939         error(line_number, 'whitespace/tab', 1,
       
  1940               'Tab found; better to use spaces')
       
  1941 
       
  1942     # One or three blank spaces at the beginning of the line is weird; it's
       
  1943     # hard to reconcile that with 4-space indents.
       
  1944     # NOTE: here are the conditions rob pike used for his tests.  Mine aren't
       
  1945     # as sophisticated, but it may be worth becoming so:  RLENGTH==initial_spaces
       
  1946     # if(RLENGTH > 20) complain = 0;
       
  1947     # if(match($0, " +(error|private|public|protected):")) complain = 0;
       
  1948     # if(match(prev, "&& *$")) complain = 0;
       
  1949     # if(match(prev, "\\|\\| *$")) complain = 0;
       
  1950     # if(match(prev, "[\",=><] *$")) complain = 0;
       
  1951     # if(match($0, " <<")) complain = 0;
       
  1952     # if(match(prev, " +for \\(")) complain = 0;
       
  1953     # if(prevodd && match(prevprev, " +for \\(")) complain = 0;
       
  1954     initial_spaces = 0
       
  1955     cleansed_line = clean_lines.elided[line_number]
       
  1956     while initial_spaces < len(line) and line[initial_spaces] == ' ':
       
  1957         initial_spaces += 1
       
  1958     if line and line[-1].isspace():
       
  1959         error(line_number, 'whitespace/end_of_line', 4,
       
  1960               'Line ends in whitespace.  Consider deleting these extra spaces.')
       
  1961     # There are certain situations we allow one space, notably for labels
       
  1962     elif ((initial_spaces >= 1 and initial_spaces <= 3)
       
  1963           and not match(r'\s*\w+\s*:\s*$', cleansed_line)):
       
  1964         error(line_number, 'whitespace/indent', 3,
       
  1965               'Weird number of spaces at line-start.  '
       
  1966               'Are you using a 4-space indent?')
       
  1967     # Labels should always be indented at least one space.
       
  1968     elif not initial_spaces and line[:2] != '//':
       
  1969         label_match = match(r'(?P<label>[^:]+):\s*$', line)
       
  1970 
       
  1971         if label_match:
       
  1972             label = label_match.group('label')
       
  1973             # Only throw errors for stuff that is definitely not a goto label,
       
  1974             # because goto labels can in fact occur at the start of the line.
       
  1975             if label in ['public', 'private', 'protected'] or label.find(' ') != -1:
       
  1976                 error(line_number, 'whitespace/labels', 4,
       
  1977                       'Labels should always be indented at least one space.  '
       
  1978                       'If this is a member-initializer list in a constructor, '
       
  1979                       'the colon should be on the line after the definition header.')
       
  1980 
       
  1981     if (cleansed_line.count(';') > 1
       
  1982         # for loops are allowed two ;'s (and may run over two lines).
       
  1983         and cleansed_line.find('for') == -1
       
  1984         and (get_previous_non_blank_line(clean_lines, line_number)[0].find('for') == -1
       
  1985              or get_previous_non_blank_line(clean_lines, line_number)[0].find(';') != -1)
       
  1986         # It's ok to have many commands in a switch case that fits in 1 line
       
  1987         and not ((cleansed_line.find('case ') != -1
       
  1988                   or cleansed_line.find('default:') != -1)
       
  1989                  and cleansed_line.find('break;') != -1)
       
  1990         and not cleansed_line.startswith('#define ')):
       
  1991         error(line_number, 'whitespace/newline', 4,
       
  1992               'More than one command on the same line')
       
  1993 
       
  1994     if cleansed_line.strip().endswith('||') or cleansed_line.strip().endswith('&&'):
       
  1995         error(line_number, 'whitespace/operators', 4,
       
  1996               'Boolean expressions that span multiple lines should have their '
       
  1997               'operators on the left side of the line instead of the right side.')
       
  1998 
       
  1999     # Some more style checks
       
  2000     check_namespace_indentation(clean_lines, line_number, file_extension, file_state, error)
       
  2001     check_using_std(file_extension, clean_lines, line_number, error)
       
  2002     check_max_min_macros(file_extension, clean_lines, line_number, error)
       
  2003     check_switch_indentation(clean_lines, line_number, error)
       
  2004     check_braces(clean_lines, line_number, error)
       
  2005     check_exit_statement_simplifications(clean_lines, line_number, error)
       
  2006     check_spacing(file_extension, clean_lines, line_number, error)
       
  2007     check_check(clean_lines, line_number, error)
       
  2008     check_for_comparisons_to_zero(clean_lines, line_number, error)
       
  2009     check_for_null(file_extension, clean_lines, line_number, error)
       
  2010 
       
  2011 
       
  2012 _RE_PATTERN_INCLUDE_NEW_STYLE = re.compile(r'#include +"[^/]+\.h"')
       
  2013 _RE_PATTERN_INCLUDE = re.compile(r'^\s*#\s*include\s*([<"])([^>"]*)[>"].*$')
       
  2014 # Matches the first component of a filename delimited by -s and _s. That is:
       
  2015 #  _RE_FIRST_COMPONENT.match('foo').group(0) == 'foo'
       
  2016 #  _RE_FIRST_COMPONENT.match('foo.cpp').group(0) == 'foo'
       
  2017 #  _RE_FIRST_COMPONENT.match('foo-bar_baz.cpp').group(0) == 'foo'
       
  2018 #  _RE_FIRST_COMPONENT.match('foo_bar-baz.cpp').group(0) == 'foo'
       
  2019 _RE_FIRST_COMPONENT = re.compile(r'^[^-_.]+')
       
  2020 
       
  2021 
       
  2022 def _drop_common_suffixes(filename):
       
  2023     """Drops common suffixes like _test.cpp or -inl.h from filename.
       
  2024 
       
  2025     For example:
       
  2026       >>> _drop_common_suffixes('foo/foo-inl.h')
       
  2027       'foo/foo'
       
  2028       >>> _drop_common_suffixes('foo/bar/foo.cpp')
       
  2029       'foo/bar/foo'
       
  2030       >>> _drop_common_suffixes('foo/foo_internal.h')
       
  2031       'foo/foo'
       
  2032       >>> _drop_common_suffixes('foo/foo_unusualinternal.h')
       
  2033       'foo/foo_unusualinternal'
       
  2034 
       
  2035     Args:
       
  2036       filename: The input filename.
       
  2037 
       
  2038     Returns:
       
  2039       The filename with the common suffix removed.
       
  2040     """
       
  2041     for suffix in ('test.cpp', 'regtest.cpp', 'unittest.cpp',
       
  2042                    'inl.h', 'impl.h', 'internal.h'):
       
  2043         if (filename.endswith(suffix) and len(filename) > len(suffix)
       
  2044             and filename[-len(suffix) - 1] in ('-', '_')):
       
  2045             return filename[:-len(suffix) - 1]
       
  2046     return os.path.splitext(filename)[0]
       
  2047 
       
  2048 
       
  2049 def _classify_include(filename, include, is_system, include_state):
       
  2050     """Figures out what kind of header 'include' is.
       
  2051 
       
  2052     Args:
       
  2053       filename: The current file cpp_style is running over.
       
  2054       include: The path to a #included file.
       
  2055       is_system: True if the #include used <> rather than "".
       
  2056       include_state: An _IncludeState instance in which the headers are inserted.
       
  2057 
       
  2058     Returns:
       
  2059       One of the _XXX_HEADER constants.
       
  2060 
       
  2061     For example:
       
  2062       >>> _classify_include('foo.cpp', 'config.h', False)
       
  2063       _CONFIG_HEADER
       
  2064       >>> _classify_include('foo.cpp', 'foo.h', False)
       
  2065       _PRIMARY_HEADER
       
  2066       >>> _classify_include('foo.cpp', 'bar.h', False)
       
  2067       _OTHER_HEADER
       
  2068     """
       
  2069 
       
  2070     # If it is a system header we know it is classified as _OTHER_HEADER.
       
  2071     if is_system:
       
  2072         return _OTHER_HEADER
       
  2073 
       
  2074     # If the include is named config.h then this is WebCore/config.h.
       
  2075     if include == "config.h":
       
  2076         return _CONFIG_HEADER
       
  2077 
       
  2078     # There cannot be primary includes in header files themselves. Only an
       
  2079     # include exactly matches the header filename will be is flagged as
       
  2080     # primary, so that it triggers the "don't include yourself" check.
       
  2081     if filename.endswith('.h') and filename != include:
       
  2082         return _OTHER_HEADER;
       
  2083 
       
  2084     # Qt's moc files do not follow the naming and ordering rules, so they should be skipped
       
  2085     if include.startswith('moc_') and include.endswith('.cpp'):
       
  2086         return _MOC_HEADER
       
  2087 
       
  2088     if include.endswith('.moc'):
       
  2089         return _MOC_HEADER
       
  2090 
       
  2091     # If the target file basename starts with the include we're checking
       
  2092     # then we consider it the primary header.
       
  2093     target_base = FileInfo(filename).base_name()
       
  2094     include_base = FileInfo(include).base_name()
       
  2095 
       
  2096     # If we haven't encountered a primary header, then be lenient in checking.
       
  2097     if not include_state.visited_primary_section() and target_base.find(include_base) != -1:
       
  2098         return _PRIMARY_HEADER
       
  2099     # If we already encountered a primary header, perform a strict comparison.
       
  2100     # In case the two filename bases are the same then the above lenient check
       
  2101     # probably was a false positive.
       
  2102     elif include_state.visited_primary_section() and target_base == include_base:
       
  2103         if include == "ResourceHandleWin.h":
       
  2104             # FIXME: Thus far, we've only seen one example of these, but if we
       
  2105             # start to see more, please consider generalizing this check
       
  2106             # somehow.
       
  2107             return _OTHER_HEADER
       
  2108         return _PRIMARY_HEADER
       
  2109 
       
  2110     return _OTHER_HEADER
       
  2111 
       
  2112 
       
  2113 def check_include_line(filename, file_extension, clean_lines, line_number, include_state, error):
       
  2114     """Check rules that are applicable to #include lines.
       
  2115 
       
  2116     Strings on #include lines are NOT removed from elided line, to make
       
  2117     certain tasks easier. However, to prevent false positives, checks
       
  2118     applicable to #include lines in CheckLanguage must be put here.
       
  2119 
       
  2120     Args:
       
  2121       filename: The name of the current file.
       
  2122       file_extension: The current file extension, without the leading dot.
       
  2123       clean_lines: A CleansedLines instance containing the file.
       
  2124       line_number: The number of the line to check.
       
  2125       include_state: An _IncludeState instance in which the headers are inserted.
       
  2126       error: The function to call with any errors found.
       
  2127     """
       
  2128     # FIXME: For readability or as a possible optimization, consider
       
  2129     #        exiting early here by checking whether the "build/include"
       
  2130     #        category should be checked for the given filename.  This
       
  2131     #        may involve having the error handler classes expose a
       
  2132     #        should_check() method, in addition to the usual __call__
       
  2133     #        method.
       
  2134     line = clean_lines.lines[line_number]
       
  2135 
       
  2136     matched = _RE_PATTERN_INCLUDE.search(line)
       
  2137     if not matched:
       
  2138         return
       
  2139 
       
  2140     include = matched.group(2)
       
  2141     is_system = (matched.group(1) == '<')
       
  2142 
       
  2143     # Look for any of the stream classes that are part of standard C++.
       
  2144     if match(r'(f|ind|io|i|o|parse|pf|stdio|str|)?stream$', include):
       
  2145         error(line_number, 'readability/streams', 3,
       
  2146               'Streams are highly discouraged.')
       
  2147 
       
  2148     # Look for specific includes to fix.
       
  2149     if include.startswith('wtf/') and not is_system:
       
  2150         error(line_number, 'build/include', 4,
       
  2151               'wtf includes should be <wtf/file.h> instead of "wtf/file.h".')
       
  2152 
       
  2153     duplicate_header = include in include_state
       
  2154     if duplicate_header:
       
  2155         error(line_number, 'build/include', 4,
       
  2156               '"%s" already included at %s:%s' %
       
  2157               (include, filename, include_state[include]))
       
  2158     else:
       
  2159         include_state[include] = line_number
       
  2160 
       
  2161     header_type = _classify_include(filename, include, is_system, include_state)
       
  2162     include_state.header_types[line_number] = header_type
       
  2163 
       
  2164     # Only proceed if this isn't a duplicate header.
       
  2165     if duplicate_header:
       
  2166         return
       
  2167 
       
  2168     # We want to ensure that headers appear in the right order:
       
  2169     # 1) for implementation files: config.h, primary header, blank line, alphabetically sorted
       
  2170     # 2) for header files: alphabetically sorted
       
  2171     # The include_state object keeps track of the last type seen
       
  2172     # and complains if the header types are out of order or missing.
       
  2173     error_message = include_state.check_next_include_order(header_type, file_extension == "h")
       
  2174 
       
  2175     # Check to make sure we have a blank line after primary header.
       
  2176     if not error_message and header_type == _PRIMARY_HEADER:
       
  2177          next_line = clean_lines.raw_lines[line_number + 1]
       
  2178          if not is_blank_line(next_line):
       
  2179             error(line_number, 'build/include_order', 4,
       
  2180                   'You should add a blank line after implementation file\'s own header.')
       
  2181 
       
  2182     # Check to make sure all headers besides config.h and the primary header are
       
  2183     # alphabetically sorted. Skip Qt's moc files.
       
  2184     if not error_message and header_type == _OTHER_HEADER:
       
  2185          previous_line_number = line_number - 1;
       
  2186          previous_line = clean_lines.lines[previous_line_number]
       
  2187          previous_match = _RE_PATTERN_INCLUDE.search(previous_line)
       
  2188          while (not previous_match and previous_line_number > 0
       
  2189                 and not search(r'\A(#if|#ifdef|#ifndef|#else|#elif|#endif)', previous_line)):
       
  2190             previous_line_number -= 1;
       
  2191             previous_line = clean_lines.lines[previous_line_number]
       
  2192             previous_match = _RE_PATTERN_INCLUDE.search(previous_line)
       
  2193          if previous_match:
       
  2194             previous_header_type = include_state.header_types[previous_line_number]
       
  2195             if previous_header_type == _OTHER_HEADER and previous_line.strip() > line.strip():
       
  2196                 error(line_number, 'build/include_order', 4,
       
  2197                       'Alphabetical sorting problem.')
       
  2198 
       
  2199     if error_message:
       
  2200         if file_extension == 'h':
       
  2201             error(line_number, 'build/include_order', 4,
       
  2202                   '%s Should be: alphabetically sorted.' %
       
  2203                   error_message)
       
  2204         else:
       
  2205             error(line_number, 'build/include_order', 4,
       
  2206                   '%s Should be: config.h, primary header, blank line, and then alphabetically sorted.' %
       
  2207                   error_message)
       
  2208 
       
  2209 
       
  2210 def check_language(filename, clean_lines, line_number, file_extension, include_state,
       
  2211                    error):
       
  2212     """Checks rules from the 'C++ language rules' section of cppguide.html.
       
  2213 
       
  2214     Some of these rules are hard to test (function overloading, using
       
  2215     uint32 inappropriately), but we do the best we can.
       
  2216 
       
  2217     Args:
       
  2218       filename: The name of the current file.
       
  2219       clean_lines: A CleansedLines instance containing the file.
       
  2220       line_number: The number of the line to check.
       
  2221       file_extension: The extension (without the dot) of the filename.
       
  2222       include_state: An _IncludeState instance in which the headers are inserted.
       
  2223       error: The function to call with any errors found.
       
  2224     """
       
  2225     # If the line is empty or consists of entirely a comment, no need to
       
  2226     # check it.
       
  2227     line = clean_lines.elided[line_number]
       
  2228     if not line:
       
  2229         return
       
  2230 
       
  2231     matched = _RE_PATTERN_INCLUDE.search(line)
       
  2232     if matched:
       
  2233         check_include_line(filename, file_extension, clean_lines, line_number, include_state, error)
       
  2234         return
       
  2235 
       
  2236     # FIXME: figure out if they're using default arguments in fn proto.
       
  2237 
       
  2238     # Check to see if they're using an conversion function cast.
       
  2239     # I just try to capture the most common basic types, though there are more.
       
  2240     # Parameterless conversion functions, such as bool(), are allowed as they are
       
  2241     # probably a member operator declaration or default constructor.
       
  2242     matched = search(
       
  2243         r'\b(int|float|double|bool|char|int32|uint32|int64|uint64)\([^)]', line)
       
  2244     if matched:
       
  2245         # gMock methods are defined using some variant of MOCK_METHODx(name, type)
       
  2246         # where type may be float(), int(string), etc.  Without context they are
       
  2247         # virtually indistinguishable from int(x) casts.
       
  2248         if not match(r'^\s*MOCK_(CONST_)?METHOD\d+(_T)?\(', line):
       
  2249             error(line_number, 'readability/casting', 4,
       
  2250                   'Using deprecated casting style.  '
       
  2251                   'Use static_cast<%s>(...) instead' %
       
  2252                   matched.group(1))
       
  2253 
       
  2254     check_c_style_cast(line_number, line, clean_lines.raw_lines[line_number],
       
  2255                        'static_cast',
       
  2256                        r'\((int|float|double|bool|char|u?int(16|32|64))\)',
       
  2257                        error)
       
  2258     # This doesn't catch all cases.  Consider (const char * const)"hello".
       
  2259     check_c_style_cast(line_number, line, clean_lines.raw_lines[line_number],
       
  2260                        'reinterpret_cast', r'\((\w+\s?\*+\s?)\)', error)
       
  2261 
       
  2262     # In addition, we look for people taking the address of a cast.  This
       
  2263     # is dangerous -- casts can assign to temporaries, so the pointer doesn't
       
  2264     # point where you think.
       
  2265     if search(
       
  2266         r'(&\([^)]+\)[\w(])|(&(static|dynamic|reinterpret)_cast\b)', line):
       
  2267         error(line_number, 'runtime/casting', 4,
       
  2268               ('Are you taking an address of a cast?  '
       
  2269                'This is dangerous: could be a temp var.  '
       
  2270                'Take the address before doing the cast, rather than after'))
       
  2271 
       
  2272     # Check for people declaring static/global STL strings at the top level.
       
  2273     # This is dangerous because the C++ language does not guarantee that
       
  2274     # globals with constructors are initialized before the first access.
       
  2275     matched = match(
       
  2276         r'((?:|static +)(?:|const +))string +([a-zA-Z0-9_:]+)\b(.*)',
       
  2277         line)
       
  2278     # Make sure it's not a function.
       
  2279     # Function template specialization looks like: "string foo<Type>(...".
       
  2280     # Class template definitions look like: "string Foo<Type>::Method(...".
       
  2281     if matched and not match(r'\s*(<.*>)?(::[a-zA-Z0-9_]+)?\s*\(([^"]|$)',
       
  2282                              matched.group(3)):
       
  2283         error(line_number, 'runtime/string', 4,
       
  2284               'For a static/global string constant, use a C style string instead: '
       
  2285               '"%schar %s[]".' %
       
  2286               (matched.group(1), matched.group(2)))
       
  2287 
       
  2288     # Check that we're not using RTTI outside of testing code.
       
  2289     if search(r'\bdynamic_cast<', line):
       
  2290         error(line_number, 'runtime/rtti', 5,
       
  2291               'Do not use dynamic_cast<>.  If you need to cast within a class '
       
  2292               "hierarchy, use static_cast<> to upcast.  Google doesn't support "
       
  2293               'RTTI.')
       
  2294 
       
  2295     if search(r'\b([A-Za-z0-9_]*_)\(\1\)', line):
       
  2296         error(line_number, 'runtime/init', 4,
       
  2297               'You seem to be initializing a member variable with itself.')
       
  2298 
       
  2299     if file_extension == 'h':
       
  2300         # FIXME: check that 1-arg constructors are explicit.
       
  2301         #        How to tell it's a constructor?
       
  2302         #        (handled in check_for_non_standard_constructs for now)
       
  2303         pass
       
  2304 
       
  2305     # Check if people are using the verboten C basic types.  The only exception
       
  2306     # we regularly allow is "unsigned short port" for port.
       
  2307     if search(r'\bshort port\b', line):
       
  2308         if not search(r'\bunsigned short port\b', line):
       
  2309             error(line_number, 'runtime/int', 4,
       
  2310                   'Use "unsigned short" for ports, not "short"')
       
  2311 
       
  2312     # When snprintf is used, the second argument shouldn't be a literal.
       
  2313     matched = search(r'snprintf\s*\(([^,]*),\s*([0-9]*)\s*,', line)
       
  2314     if matched:
       
  2315         error(line_number, 'runtime/printf', 3,
       
  2316               'If you can, use sizeof(%s) instead of %s as the 2nd arg '
       
  2317               'to snprintf.' % (matched.group(1), matched.group(2)))
       
  2318 
       
  2319     # Check if some verboten C functions are being used.
       
  2320     if search(r'\bsprintf\b', line):
       
  2321         error(line_number, 'runtime/printf', 5,
       
  2322               'Never use sprintf.  Use snprintf instead.')
       
  2323     matched = search(r'\b(strcpy|strcat)\b', line)
       
  2324     if matched:
       
  2325         error(line_number, 'runtime/printf', 4,
       
  2326               'Almost always, snprintf is better than %s' % matched.group(1))
       
  2327 
       
  2328     if search(r'\bsscanf\b', line):
       
  2329         error(line_number, 'runtime/printf', 1,
       
  2330               'sscanf can be ok, but is slow and can overflow buffers.')
       
  2331 
       
  2332     # Check for suspicious usage of "if" like
       
  2333     # } if (a == b) {
       
  2334     if search(r'\}\s*if\s*\(', line):
       
  2335         error(line_number, 'readability/braces', 4,
       
  2336               'Did you mean "else if"? If not, start a new line for "if".')
       
  2337 
       
  2338     # Check for potential format string bugs like printf(foo).
       
  2339     # We constrain the pattern not to pick things like DocidForPrintf(foo).
       
  2340     # Not perfect but it can catch printf(foo.c_str()) and printf(foo->c_str())
       
  2341     matched = re.search(r'\b((?:string)?printf)\s*\(([\w.\->()]+)\)', line, re.I)
       
  2342     if matched:
       
  2343         error(line_number, 'runtime/printf', 4,
       
  2344               'Potential format string bug. Do %s("%%s", %s) instead.'
       
  2345               % (matched.group(1), matched.group(2)))
       
  2346 
       
  2347     # Check for potential memset bugs like memset(buf, sizeof(buf), 0).
       
  2348     matched = search(r'memset\s*\(([^,]*),\s*([^,]*),\s*0\s*\)', line)
       
  2349     if matched and not match(r"^''|-?[0-9]+|0x[0-9A-Fa-f]$", matched.group(2)):
       
  2350         error(line_number, 'runtime/memset', 4,
       
  2351               'Did you mean "memset(%s, 0, %s)"?'
       
  2352               % (matched.group(1), matched.group(2)))
       
  2353 
       
  2354     # Detect variable-length arrays.
       
  2355     matched = match(r'\s*(.+::)?(\w+) [a-z]\w*\[(.+)];', line)
       
  2356     if (matched and matched.group(2) != 'return' and matched.group(2) != 'delete' and
       
  2357         matched.group(3).find(']') == -1):
       
  2358         # Split the size using space and arithmetic operators as delimiters.
       
  2359         # If any of the resulting tokens are not compile time constants then
       
  2360         # report the error.
       
  2361         tokens = re.split(r'\s|\+|\-|\*|\/|<<|>>]', matched.group(3))
       
  2362         is_const = True
       
  2363         skip_next = False
       
  2364         for tok in tokens:
       
  2365             if skip_next:
       
  2366                 skip_next = False
       
  2367                 continue
       
  2368 
       
  2369             if search(r'sizeof\(.+\)', tok):
       
  2370                 continue
       
  2371             if search(r'arraysize\(\w+\)', tok):
       
  2372                 continue
       
  2373 
       
  2374             tok = tok.lstrip('(')
       
  2375             tok = tok.rstrip(')')
       
  2376             if not tok:
       
  2377                 continue
       
  2378             if match(r'\d+', tok):
       
  2379                 continue
       
  2380             if match(r'0[xX][0-9a-fA-F]+', tok):
       
  2381                 continue
       
  2382             if match(r'k[A-Z0-9]\w*', tok):
       
  2383                 continue
       
  2384             if match(r'(.+::)?k[A-Z0-9]\w*', tok):
       
  2385                 continue
       
  2386             if match(r'(.+::)?[A-Z][A-Z0-9_]*', tok):
       
  2387                 continue
       
  2388             # A catch all for tricky sizeof cases, including 'sizeof expression',
       
  2389             # 'sizeof(*type)', 'sizeof(const type)', 'sizeof(struct StructName)'
       
  2390             # requires skipping the next token becasue we split on ' ' and '*'.
       
  2391             if tok.startswith('sizeof'):
       
  2392                 skip_next = True
       
  2393                 continue
       
  2394             is_const = False
       
  2395             break
       
  2396         if not is_const:
       
  2397             error(line_number, 'runtime/arrays', 1,
       
  2398                   'Do not use variable-length arrays.  Use an appropriately named '
       
  2399                   "('k' followed by CamelCase) compile-time constant for the size.")
       
  2400 
       
  2401     # Check for use of unnamed namespaces in header files.  Registration
       
  2402     # macros are typically OK, so we allow use of "namespace {" on lines
       
  2403     # that end with backslashes.
       
  2404     if (file_extension == 'h'
       
  2405         and search(r'\bnamespace\s*{', line)
       
  2406         and line[-1] != '\\'):
       
  2407         error(line_number, 'build/namespaces', 4,
       
  2408               'Do not use unnamed namespaces in header files.  See '
       
  2409               'http://google-styleguide.googlecode.com/svn/trunk/cppguide.xml#Namespaces'
       
  2410               ' for more information.')
       
  2411 
       
  2412     check_identifier_name_in_declaration(filename, line_number, line, error)
       
  2413 
       
  2414 
       
  2415 def check_identifier_name_in_declaration(filename, line_number, line, error):
       
  2416     """Checks if identifier names contain any underscores.
       
  2417 
       
  2418     As identifiers in libraries we are using have a bunch of
       
  2419     underscores, we only warn about the declarations of identifiers
       
  2420     and don't check use of identifiers.
       
  2421 
       
  2422     Args:
       
  2423       filename: The name of the current file.
       
  2424       line_number: The number of the line to check.
       
  2425       line: The line of code to check.
       
  2426       error: The function to call with any errors found.
       
  2427     """
       
  2428     # We don't check a return statement.
       
  2429     if match(r'\s*(return|delete)\b', line):
       
  2430         return
       
  2431 
       
  2432     # Basically, a declaration is a type name followed by whitespaces
       
  2433     # followed by an identifier. The type name can be complicated
       
  2434     # due to type adjectives and templates. We remove them first to
       
  2435     # simplify the process to find declarations of identifiers.
       
  2436 
       
  2437     # Convert "long long", "long double", and "long long int" to
       
  2438     # simple types, but don't remove simple "long".
       
  2439     line = sub(r'long (long )?(?=long|double|int)', '', line)
       
  2440     # Convert unsigned/signed types to simple types, too.
       
  2441     line = sub(r'(unsigned|signed) (?=char|short|int|long)', '', line)
       
  2442     line = sub(r'\b(inline|using|static|const|volatile|auto|register|extern|typedef|restrict|struct|class|virtual)(?=\W)', '', line)
       
  2443 
       
  2444     # Remove all template parameters by removing matching < and >.
       
  2445     # Loop until no templates are removed to remove nested templates.
       
  2446     while True:
       
  2447         line, number_of_replacements = subn(r'<([\w\s:]|::)+\s*[*&]*\s*>', '', line)
       
  2448         if not number_of_replacements:
       
  2449             break
       
  2450 
       
  2451     # Declarations of local variables can be in condition expressions
       
  2452     # of control flow statements (e.g., "if (RenderObject* p = o->parent())").
       
  2453     # We remove the keywords and the first parenthesis.
       
  2454     #
       
  2455     # Declarations in "while", "if", and "switch" are different from
       
  2456     # other declarations in two aspects:
       
  2457     #
       
  2458     # - There can be only one declaration between the parentheses.
       
  2459     #   (i.e., you cannot write "if (int i = 0, j = 1) {}")
       
  2460     # - The variable must be initialized.
       
  2461     #   (i.e., you cannot write "if (int i) {}")
       
  2462     #
       
  2463     # and we will need different treatments for them.
       
  2464     line = sub(r'^\s*for\s*\(', '', line)
       
  2465     line, control_statement = subn(r'^\s*(while|else if|if|switch)\s*\(', '', line)
       
  2466 
       
  2467     # Detect variable and functions.
       
  2468     type_regexp = r'\w([\w]|\s*[*&]\s*|::)+'
       
  2469     identifier_regexp = r'(?P<identifier>[\w:]+)'
       
  2470     maybe_bitfield_regexp = r'(:\s*\d+\s*)?'
       
  2471     character_after_identifier_regexp = r'(?P<character_after_identifier>[[;()=,])(?!=)'
       
  2472     declaration_without_type_regexp = r'\s*' + identifier_regexp + r'\s*' + maybe_bitfield_regexp + character_after_identifier_regexp
       
  2473     declaration_with_type_regexp = r'\s*' + type_regexp + r'\s' + declaration_without_type_regexp
       
  2474     is_function_arguments = False
       
  2475     number_of_identifiers = 0
       
  2476     while True:
       
  2477         # If we are seeing the first identifier or arguments of a
       
  2478         # function, there should be a type name before an identifier.
       
  2479         if not number_of_identifiers or is_function_arguments:
       
  2480             declaration_regexp = declaration_with_type_regexp
       
  2481         else:
       
  2482             declaration_regexp = declaration_without_type_regexp
       
  2483 
       
  2484         matched = match(declaration_regexp, line)
       
  2485         if not matched:
       
  2486             return
       
  2487         identifier = matched.group('identifier')
       
  2488         character_after_identifier = matched.group('character_after_identifier')
       
  2489 
       
  2490         # If we removed a non-for-control statement, the character after
       
  2491         # the identifier should be '='. With this rule, we can avoid
       
  2492         # warning for cases like "if (val & INT_MAX) {".
       
  2493         if control_statement and character_after_identifier != '=':
       
  2494             return
       
  2495 
       
  2496         is_function_arguments = is_function_arguments or character_after_identifier == '('
       
  2497 
       
  2498         # Remove "m_" and "s_" to allow them.
       
  2499         modified_identifier = sub(r'(^|(?<=::))[ms]_', '', identifier)
       
  2500         if modified_identifier.find('_') >= 0:
       
  2501             # Various exceptions to the rule: JavaScript op codes functions, const_iterator.
       
  2502             if (not (filename.find('JavaScriptCore') >= 0 and modified_identifier.find('_op_') >= 0)
       
  2503                 and not modified_identifier.startswith('tst_')
       
  2504                 and not modified_identifier.startswith('webkit_dom_object_')
       
  2505                 and not modified_identifier.startswith('qt_')
       
  2506                 and not modified_identifier.find('::qt_') >= 0
       
  2507                 and not modified_identifier == "const_iterator"):
       
  2508                 error(line_number, 'readability/naming', 4, identifier + " is incorrectly named. Don't use underscores in your identifier names.")
       
  2509 
       
  2510         # Check for variables named 'l', these are too easy to confuse with '1' in some fonts
       
  2511         if modified_identifier == 'l':
       
  2512             error(line_number, 'readability/naming', 4, identifier + " is incorrectly named. Don't use the single letter 'l' as an identifier name.")
       
  2513 
       
  2514         # There can be only one declaration in non-for-control statements.
       
  2515         if control_statement:
       
  2516             return
       
  2517         # We should continue checking if this is a function
       
  2518         # declaration because we need to check its arguments.
       
  2519         # Also, we need to check multiple declarations.
       
  2520         if character_after_identifier != '(' and character_after_identifier != ',':
       
  2521             return
       
  2522 
       
  2523         number_of_identifiers += 1
       
  2524         line = line[matched.end():]
       
  2525 
       
  2526 def check_c_style_cast(line_number, line, raw_line, cast_type, pattern,
       
  2527                        error):
       
  2528     """Checks for a C-style cast by looking for the pattern.
       
  2529 
       
  2530     This also handles sizeof(type) warnings, due to similarity of content.
       
  2531 
       
  2532     Args:
       
  2533       line_number: The number of the line to check.
       
  2534       line: The line of code to check.
       
  2535       raw_line: The raw line of code to check, with comments.
       
  2536       cast_type: The string for the C++ cast to recommend.  This is either
       
  2537                  reinterpret_cast or static_cast, depending.
       
  2538       pattern: The regular expression used to find C-style casts.
       
  2539       error: The function to call with any errors found.
       
  2540     """
       
  2541     matched = search(pattern, line)
       
  2542     if not matched:
       
  2543         return
       
  2544 
       
  2545     # e.g., sizeof(int)
       
  2546     sizeof_match = match(r'.*sizeof\s*$', line[0:matched.start(1) - 1])
       
  2547     if sizeof_match:
       
  2548         error(line_number, 'runtime/sizeof', 1,
       
  2549               'Using sizeof(type).  Use sizeof(varname) instead if possible')
       
  2550         return
       
  2551 
       
  2552     remainder = line[matched.end(0):]
       
  2553 
       
  2554     # The close paren is for function pointers as arguments to a function.
       
  2555     # eg, void foo(void (*bar)(int));
       
  2556     # The semicolon check is a more basic function check; also possibly a
       
  2557     # function pointer typedef.
       
  2558     # eg, void foo(int); or void foo(int) const;
       
  2559     # The equals check is for function pointer assignment.
       
  2560     # eg, void *(*foo)(int) = ...
       
  2561     #
       
  2562     # Right now, this will only catch cases where there's a single argument, and
       
  2563     # it's unnamed.  It should probably be expanded to check for multiple
       
  2564     # arguments with some unnamed.
       
  2565     function_match = match(r'\s*(\)|=|(const)?\s*(;|\{|throw\(\)))', remainder)
       
  2566     if function_match:
       
  2567         if (not function_match.group(3)
       
  2568             or function_match.group(3) == ';'
       
  2569             or raw_line.find('/*') < 0):
       
  2570             error(line_number, 'readability/function', 3,
       
  2571                   'All parameters should be named in a function')
       
  2572         return
       
  2573 
       
  2574     # At this point, all that should be left is actual casts.
       
  2575     error(line_number, 'readability/casting', 4,
       
  2576           'Using C-style cast.  Use %s<%s>(...) instead' %
       
  2577           (cast_type, matched.group(1)))
       
  2578 
       
  2579 
       
  2580 _HEADERS_CONTAINING_TEMPLATES = (
       
  2581     ('<deque>', ('deque',)),
       
  2582     ('<functional>', ('unary_function', 'binary_function',
       
  2583                       'plus', 'minus', 'multiplies', 'divides', 'modulus',
       
  2584                       'negate',
       
  2585                       'equal_to', 'not_equal_to', 'greater', 'less',
       
  2586                       'greater_equal', 'less_equal',
       
  2587                       'logical_and', 'logical_or', 'logical_not',
       
  2588                       'unary_negate', 'not1', 'binary_negate', 'not2',
       
  2589                       'bind1st', 'bind2nd',
       
  2590                       'pointer_to_unary_function',
       
  2591                       'pointer_to_binary_function',
       
  2592                       'ptr_fun',
       
  2593                       'mem_fun_t', 'mem_fun', 'mem_fun1_t', 'mem_fun1_ref_t',
       
  2594                       'mem_fun_ref_t',
       
  2595                       'const_mem_fun_t', 'const_mem_fun1_t',
       
  2596                       'const_mem_fun_ref_t', 'const_mem_fun1_ref_t',
       
  2597                       'mem_fun_ref',
       
  2598                      )),
       
  2599     ('<limits>', ('numeric_limits',)),
       
  2600     ('<list>', ('list',)),
       
  2601     ('<map>', ('map', 'multimap',)),
       
  2602     ('<memory>', ('allocator',)),
       
  2603     ('<queue>', ('queue', 'priority_queue',)),
       
  2604     ('<set>', ('set', 'multiset',)),
       
  2605     ('<stack>', ('stack',)),
       
  2606     ('<string>', ('char_traits', 'basic_string',)),
       
  2607     ('<utility>', ('pair',)),
       
  2608     ('<vector>', ('vector',)),
       
  2609 
       
  2610     # gcc extensions.
       
  2611     # Note: std::hash is their hash, ::hash is our hash
       
  2612     ('<hash_map>', ('hash_map', 'hash_multimap',)),
       
  2613     ('<hash_set>', ('hash_set', 'hash_multiset',)),
       
  2614     ('<slist>', ('slist',)),
       
  2615     )
       
  2616 
       
  2617 _HEADERS_ACCEPTED_BUT_NOT_PROMOTED = {
       
  2618     # We can trust with reasonable confidence that map gives us pair<>, too.
       
  2619     'pair<>': ('map', 'multimap', 'hash_map', 'hash_multimap')
       
  2620 }
       
  2621 
       
  2622 _RE_PATTERN_STRING = re.compile(r'\bstring\b')
       
  2623 
       
  2624 _re_pattern_algorithm_header = []
       
  2625 for _template in ('copy', 'max', 'min', 'min_element', 'sort', 'swap',
       
  2626                   'transform'):
       
  2627     # Match max<type>(..., ...), max(..., ...), but not foo->max, foo.max or
       
  2628     # type::max().
       
  2629     _re_pattern_algorithm_header.append(
       
  2630         (re.compile(r'[^>.]\b' + _template + r'(<.*?>)?\([^\)]'),
       
  2631          _template,
       
  2632          '<algorithm>'))
       
  2633 
       
  2634 _re_pattern_templates = []
       
  2635 for _header, _templates in _HEADERS_CONTAINING_TEMPLATES:
       
  2636     for _template in _templates:
       
  2637         _re_pattern_templates.append(
       
  2638             (re.compile(r'(\<|\b)' + _template + r'\s*\<'),
       
  2639              _template + '<>',
       
  2640              _header))
       
  2641 
       
  2642 
       
  2643 def files_belong_to_same_module(filename_cpp, filename_h):
       
  2644     """Check if these two filenames belong to the same module.
       
  2645 
       
  2646     The concept of a 'module' here is a as follows:
       
  2647     foo.h, foo-inl.h, foo.cpp, foo_test.cpp and foo_unittest.cpp belong to the
       
  2648     same 'module' if they are in the same directory.
       
  2649     some/path/public/xyzzy and some/path/internal/xyzzy are also considered
       
  2650     to belong to the same module here.
       
  2651 
       
  2652     If the filename_cpp contains a longer path than the filename_h, for example,
       
  2653     '/absolute/path/to/base/sysinfo.cpp', and this file would include
       
  2654     'base/sysinfo.h', this function also produces the prefix needed to open the
       
  2655     header. This is used by the caller of this function to more robustly open the
       
  2656     header file. We don't have access to the real include paths in this context,
       
  2657     so we need this guesswork here.
       
  2658 
       
  2659     Known bugs: tools/base/bar.cpp and base/bar.h belong to the same module
       
  2660     according to this implementation. Because of this, this function gives
       
  2661     some false positives. This should be sufficiently rare in practice.
       
  2662 
       
  2663     Args:
       
  2664       filename_cpp: is the path for the .cpp file
       
  2665       filename_h: is the path for the header path
       
  2666 
       
  2667     Returns:
       
  2668       Tuple with a bool and a string:
       
  2669       bool: True if filename_cpp and filename_h belong to the same module.
       
  2670       string: the additional prefix needed to open the header file.
       
  2671     """
       
  2672 
       
  2673     if not filename_cpp.endswith('.cpp'):
       
  2674         return (False, '')
       
  2675     filename_cpp = filename_cpp[:-len('.cpp')]
       
  2676     if filename_cpp.endswith('_unittest'):
       
  2677         filename_cpp = filename_cpp[:-len('_unittest')]
       
  2678     elif filename_cpp.endswith('_test'):
       
  2679         filename_cpp = filename_cpp[:-len('_test')]
       
  2680     filename_cpp = filename_cpp.replace('/public/', '/')
       
  2681     filename_cpp = filename_cpp.replace('/internal/', '/')
       
  2682 
       
  2683     if not filename_h.endswith('.h'):
       
  2684         return (False, '')
       
  2685     filename_h = filename_h[:-len('.h')]
       
  2686     if filename_h.endswith('-inl'):
       
  2687         filename_h = filename_h[:-len('-inl')]
       
  2688     filename_h = filename_h.replace('/public/', '/')
       
  2689     filename_h = filename_h.replace('/internal/', '/')
       
  2690 
       
  2691     files_belong_to_same_module = filename_cpp.endswith(filename_h)
       
  2692     common_path = ''
       
  2693     if files_belong_to_same_module:
       
  2694         common_path = filename_cpp[:-len(filename_h)]
       
  2695     return files_belong_to_same_module, common_path
       
  2696 
       
  2697 
       
  2698 def update_include_state(filename, include_state, io=codecs):
       
  2699     """Fill up the include_state with new includes found from the file.
       
  2700 
       
  2701     Args:
       
  2702       filename: the name of the header to read.
       
  2703       include_state: an _IncludeState instance in which the headers are inserted.
       
  2704       io: The io factory to use to read the file. Provided for testability.
       
  2705 
       
  2706     Returns:
       
  2707       True if a header was succesfully added. False otherwise.
       
  2708     """
       
  2709     header_file = None
       
  2710     try:
       
  2711         header_file = io.open(filename, 'r', 'utf8', 'replace')
       
  2712     except IOError:
       
  2713         return False
       
  2714     line_number = 0
       
  2715     for line in header_file:
       
  2716         line_number += 1
       
  2717         clean_line = cleanse_comments(line)
       
  2718         matched = _RE_PATTERN_INCLUDE.search(clean_line)
       
  2719         if matched:
       
  2720             include = matched.group(2)
       
  2721             # The value formatting is cute, but not really used right now.
       
  2722             # What matters here is that the key is in include_state.
       
  2723             include_state.setdefault(include, '%s:%d' % (filename, line_number))
       
  2724     return True
       
  2725 
       
  2726 
       
  2727 def check_for_include_what_you_use(filename, clean_lines, include_state, error,
       
  2728                                    io=codecs):
       
  2729     """Reports for missing stl includes.
       
  2730 
       
  2731     This function will output warnings to make sure you are including the headers
       
  2732     necessary for the stl containers and functions that you use. We only give one
       
  2733     reason to include a header. For example, if you use both equal_to<> and
       
  2734     less<> in a .h file, only one (the latter in the file) of these will be
       
  2735     reported as a reason to include the <functional>.
       
  2736 
       
  2737     Args:
       
  2738       filename: The name of the current file.
       
  2739       clean_lines: A CleansedLines instance containing the file.
       
  2740       include_state: An _IncludeState instance.
       
  2741       error: The function to call with any errors found.
       
  2742       io: The IO factory to use to read the header file. Provided for unittest
       
  2743           injection.
       
  2744     """
       
  2745     required = {}  # A map of header name to line_number and the template entity.
       
  2746         # Example of required: { '<functional>': (1219, 'less<>') }
       
  2747 
       
  2748     for line_number in xrange(clean_lines.num_lines()):
       
  2749         line = clean_lines.elided[line_number]
       
  2750         if not line or line[0] == '#':
       
  2751             continue
       
  2752 
       
  2753         # String is special -- it is a non-templatized type in STL.
       
  2754         if _RE_PATTERN_STRING.search(line):
       
  2755             required['<string>'] = (line_number, 'string')
       
  2756 
       
  2757         for pattern, template, header in _re_pattern_algorithm_header:
       
  2758             if pattern.search(line):
       
  2759                 required[header] = (line_number, template)
       
  2760 
       
  2761         # The following function is just a speed up, no semantics are changed.
       
  2762         if not '<' in line:  # Reduces the cpu time usage by skipping lines.
       
  2763             continue
       
  2764 
       
  2765         for pattern, template, header in _re_pattern_templates:
       
  2766             if pattern.search(line):
       
  2767                 required[header] = (line_number, template)
       
  2768 
       
  2769     # The policy is that if you #include something in foo.h you don't need to
       
  2770     # include it again in foo.cpp. Here, we will look at possible includes.
       
  2771     # Let's copy the include_state so it is only messed up within this function.
       
  2772     include_state = include_state.copy()
       
  2773 
       
  2774     # Did we find the header for this file (if any) and succesfully load it?
       
  2775     header_found = False
       
  2776 
       
  2777     # Use the absolute path so that matching works properly.
       
  2778     abs_filename = os.path.abspath(filename)
       
  2779 
       
  2780     # For Emacs's flymake.
       
  2781     # If cpp_style is invoked from Emacs's flymake, a temporary file is generated
       
  2782     # by flymake and that file name might end with '_flymake.cpp'. In that case,
       
  2783     # restore original file name here so that the corresponding header file can be
       
  2784     # found.
       
  2785     # e.g. If the file name is 'foo_flymake.cpp', we should search for 'foo.h'
       
  2786     # instead of 'foo_flymake.h'
       
  2787     abs_filename = re.sub(r'_flymake\.cpp$', '.cpp', abs_filename)
       
  2788 
       
  2789     # include_state is modified during iteration, so we iterate over a copy of
       
  2790     # the keys.
       
  2791     for header in include_state.keys():  #NOLINT
       
  2792         (same_module, common_path) = files_belong_to_same_module(abs_filename, header)
       
  2793         fullpath = common_path + header
       
  2794         if same_module and update_include_state(fullpath, include_state, io):
       
  2795             header_found = True
       
  2796 
       
  2797     # If we can't find the header file for a .cpp, assume it's because we don't
       
  2798     # know where to look. In that case we'll give up as we're not sure they
       
  2799     # didn't include it in the .h file.
       
  2800     # FIXME: Do a better job of finding .h files so we are confident that
       
  2801     #        not having the .h file means there isn't one.
       
  2802     if filename.endswith('.cpp') and not header_found:
       
  2803         return
       
  2804 
       
  2805     # All the lines have been processed, report the errors found.
       
  2806     for required_header_unstripped in required:
       
  2807         template = required[required_header_unstripped][1]
       
  2808         if template in _HEADERS_ACCEPTED_BUT_NOT_PROMOTED:
       
  2809             headers = _HEADERS_ACCEPTED_BUT_NOT_PROMOTED[template]
       
  2810             if [True for header in headers if header in include_state]:
       
  2811                 continue
       
  2812         if required_header_unstripped.strip('<>"') not in include_state:
       
  2813             error(required[required_header_unstripped][0],
       
  2814                   'build/include_what_you_use', 4,
       
  2815                   'Add #include ' + required_header_unstripped + ' for ' + template)
       
  2816 
       
  2817 
       
  2818 def process_line(filename, file_extension,
       
  2819                  clean_lines, line, include_state, function_state,
       
  2820                  class_state, file_state, error):
       
  2821     """Processes a single line in the file.
       
  2822 
       
  2823     Args:
       
  2824       filename: Filename of the file that is being processed.
       
  2825       file_extension: The extension (dot not included) of the file.
       
  2826       clean_lines: An array of strings, each representing a line of the file,
       
  2827                    with comments stripped.
       
  2828       line: Number of line being processed.
       
  2829       include_state: An _IncludeState instance in which the headers are inserted.
       
  2830       function_state: A _FunctionState instance which counts function lines, etc.
       
  2831       class_state: A _ClassState instance which maintains information about
       
  2832                    the current stack of nested class declarations being parsed.
       
  2833       file_state: A _FileState instance which maintains information about
       
  2834                   the state of things in the file.
       
  2835       error: A callable to which errors are reported, which takes arguments:
       
  2836              line number, error level, and message
       
  2837 
       
  2838     """
       
  2839     raw_lines = clean_lines.raw_lines
       
  2840     check_for_function_lengths(clean_lines, line, function_state, error)
       
  2841     if search(r'\bNOLINT\b', raw_lines[line]):  # ignore nolint lines
       
  2842         return
       
  2843     check_for_multiline_comments_and_strings(clean_lines, line, error)
       
  2844     check_style(clean_lines, line, file_extension, file_state, error)
       
  2845     check_language(filename, clean_lines, line, file_extension, include_state,
       
  2846                    error)
       
  2847     check_for_non_standard_constructs(clean_lines, line, class_state, error)
       
  2848     check_posix_threading(clean_lines, line, error)
       
  2849     check_invalid_increment(clean_lines, line, error)
       
  2850 
       
  2851 
       
  2852 def _process_lines(filename, file_extension, lines, error, min_confidence):
       
  2853     """Performs lint checks and reports any errors to the given error function.
       
  2854 
       
  2855     Args:
       
  2856       filename: Filename of the file that is being processed.
       
  2857       file_extension: The extension (dot not included) of the file.
       
  2858       lines: An array of strings, each representing a line of the file, with the
       
  2859              last element being empty if the file is termined with a newline.
       
  2860       error: A callable to which errors are reported, which takes 4 arguments:
       
  2861     """
       
  2862     lines = (['// marker so line numbers and indices both start at 1'] + lines +
       
  2863              ['// marker so line numbers end in a known way'])
       
  2864 
       
  2865     include_state = _IncludeState()
       
  2866     function_state = _FunctionState(min_confidence)
       
  2867     class_state = _ClassState()
       
  2868     file_state = _FileState()
       
  2869 
       
  2870     check_for_copyright(lines, error)
       
  2871 
       
  2872     if file_extension == 'h':
       
  2873         check_for_header_guard(filename, lines, error)
       
  2874 
       
  2875     remove_multi_line_comments(lines, error)
       
  2876     clean_lines = CleansedLines(lines)
       
  2877     for line in xrange(clean_lines.num_lines()):
       
  2878         process_line(filename, file_extension, clean_lines, line,
       
  2879                      include_state, function_state, class_state, file_state, error)
       
  2880     class_state.check_finished(error)
       
  2881 
       
  2882     check_for_include_what_you_use(filename, clean_lines, include_state, error)
       
  2883 
       
  2884     # We check here rather than inside process_line so that we see raw
       
  2885     # lines rather than "cleaned" lines.
       
  2886     check_for_unicode_replacement_characters(lines, error)
       
  2887 
       
  2888     check_for_new_line_at_eof(lines, error)
       
  2889 
       
  2890 
       
  2891 class CppChecker(object):
       
  2892 
       
  2893     """Processes C++ lines for checking style."""
       
  2894 
       
  2895     # This list is used to--
       
  2896     #
       
  2897     # (1) generate an explicit list of all possible categories,
       
  2898     # (2) unit test that all checked categories have valid names, and
       
  2899     # (3) unit test that all categories are getting unit tested.
       
  2900     #
       
  2901     categories = set([
       
  2902         'build/class',
       
  2903         'build/deprecated',
       
  2904         'build/endif_comment',
       
  2905         'build/forward_decl',
       
  2906         'build/header_guard',
       
  2907         'build/include',
       
  2908         'build/include_order',
       
  2909         'build/include_what_you_use',
       
  2910         'build/namespaces',
       
  2911         'build/printf_format',
       
  2912         'build/storage_class',
       
  2913         'build/using_std',
       
  2914         'legal/copyright',
       
  2915         'readability/braces',
       
  2916         'readability/casting',
       
  2917         'readability/check',
       
  2918         'readability/comparison_to_zero',
       
  2919         'readability/constructors',
       
  2920         'readability/control_flow',
       
  2921         'readability/fn_size',
       
  2922         'readability/function',
       
  2923         'readability/multiline_comment',
       
  2924         'readability/multiline_string',
       
  2925         'readability/naming',
       
  2926         'readability/null',
       
  2927         'readability/streams',
       
  2928         'readability/todo',
       
  2929         'readability/utf8',
       
  2930         'runtime/arrays',
       
  2931         'runtime/casting',
       
  2932         'runtime/explicit',
       
  2933         'runtime/init',
       
  2934         'runtime/int',
       
  2935         'runtime/invalid_increment',
       
  2936         'runtime/max_min_macros',
       
  2937         'runtime/memset',
       
  2938         'runtime/printf',
       
  2939         'runtime/printf_format',
       
  2940         'runtime/references',
       
  2941         'runtime/rtti',
       
  2942         'runtime/sizeof',
       
  2943         'runtime/string',
       
  2944         'runtime/threadsafe_fn',
       
  2945         'runtime/virtual',
       
  2946         'whitespace/blank_line',
       
  2947         'whitespace/braces',
       
  2948         'whitespace/comma',
       
  2949         'whitespace/comments',
       
  2950         'whitespace/declaration',
       
  2951         'whitespace/end_of_line',
       
  2952         'whitespace/ending_newline',
       
  2953         'whitespace/indent',
       
  2954         'whitespace/labels',
       
  2955         'whitespace/line_length',
       
  2956         'whitespace/newline',
       
  2957         'whitespace/operators',
       
  2958         'whitespace/parens',
       
  2959         'whitespace/semicolon',
       
  2960         'whitespace/tab',
       
  2961         'whitespace/todo',
       
  2962         ])
       
  2963 
       
  2964     def __init__(self, file_path, file_extension, handle_style_error,
       
  2965                  min_confidence):
       
  2966         """Create a CppChecker instance.
       
  2967 
       
  2968         Args:
       
  2969           file_extension: A string that is the file extension, without
       
  2970                           the leading dot.
       
  2971 
       
  2972         """
       
  2973         self.file_extension = file_extension
       
  2974         self.file_path = file_path
       
  2975         self.handle_style_error = handle_style_error
       
  2976         self.min_confidence = min_confidence
       
  2977 
       
  2978     # Useful for unit testing.
       
  2979     def __eq__(self, other):
       
  2980         """Return whether this CppChecker instance is equal to another."""
       
  2981         if self.file_extension != other.file_extension:
       
  2982             return False
       
  2983         if self.file_path != other.file_path:
       
  2984             return False
       
  2985         if self.handle_style_error != other.handle_style_error:
       
  2986             return False
       
  2987         if self.min_confidence != other.min_confidence:
       
  2988             return False
       
  2989 
       
  2990         return True
       
  2991 
       
  2992     # Useful for unit testing.
       
  2993     def __ne__(self, other):
       
  2994         # Python does not automatically deduce __ne__() from __eq__().
       
  2995         return not self.__eq__(other)
       
  2996 
       
  2997     def check(self, lines):
       
  2998         _process_lines(self.file_path, self.file_extension, lines,
       
  2999                        self.handle_style_error, self.min_confidence)
       
  3000 
       
  3001 
       
  3002 # FIXME: Remove this function (requires refactoring unit tests).
       
  3003 def process_file_data(filename, file_extension, lines, error, min_confidence):
       
  3004     checker = CppChecker(filename, file_extension, error, min_confidence)
       
  3005     checker.check(lines)