platform35/org.eclipse.core.resources/src/org/eclipse/core/internal/propertytester/StringMatcher.java
changeset 40 eb3c938c7fef
equal deleted inserted replaced
39:2a03ec4dbf31 40:eb3c938c7fef
       
     1 /*******************************************************************************
       
     2  * Copyright (c) 2000, 2006 IBM Corporation and others.
       
     3  * All rights reserved. This program and the accompanying materials
       
     4  * are made available under the terms of the Eclipse Public License v1.0
       
     5  * which accompanies this distribution, and is available at
       
     6  * http://www.eclipse.org/legal/epl-v10.html
       
     7  *
       
     8  * Contributors:
       
     9  *     IBM Corporation - initial API and implementation
       
    10  *******************************************************************************/
       
    11 package org.eclipse.core.internal.propertytester;
       
    12 
       
    13 import java.util.ArrayList;
       
    14 
       
    15 /**
       
    16  * A string pattern matcher, supporting "*" and "?" wild cards.
       
    17  * 
       
    18  * @since 3.2
       
    19  */
       
    20 public class StringMatcher {
       
    21 	private static final char SINGLE_WILD_CARD = '\u0000';
       
    22 
       
    23 	/**
       
    24 	 * Boundary value beyond which we don't need to search in the text 
       
    25 	 */
       
    26 	private int bound = 0;
       
    27 
       
    28 	private boolean hasLeadingStar;
       
    29 
       
    30 	private boolean hasTrailingStar;
       
    31 
       
    32 	private final String pattern;
       
    33 
       
    34 	private final int patternLength;
       
    35 
       
    36 	/**
       
    37 	 * The pattern split into segments separated by *
       
    38 	 */
       
    39 	private String segments[];
       
    40 
       
    41 	/**
       
    42 	 * StringMatcher constructor takes in a String object that is a simple 
       
    43 	 * pattern which may contain '*' for 0 and many characters and
       
    44 	 * '?' for exactly one character.  
       
    45 	 *
       
    46 	 * Literal '*' and '?' characters must be escaped in the pattern 
       
    47 	 * e.g., "\*" means literal "*", etc.
       
    48 	 *
       
    49 	 * Escaping any other character (including the escape character itself), 
       
    50 	 * just results in that character in the pattern.
       
    51 	 * e.g., "\a" means "a" and "\\" means "\"
       
    52 	 *
       
    53 	 * If invoking the StringMatcher with string literals in Java, don't forget
       
    54 	 * escape characters are represented by "\\".
       
    55 	 *
       
    56 	 * @param pattern the pattern to match text against
       
    57 	 */
       
    58 	public StringMatcher(String pattern) {
       
    59 		if (pattern == null)
       
    60 			throw new IllegalArgumentException();
       
    61 		this.pattern = pattern;
       
    62 		patternLength = pattern.length();
       
    63 		parseWildCards();
       
    64 	}
       
    65 
       
    66 	/** 
       
    67 	 * @param text a simple regular expression that may only contain '?'(s)
       
    68 	 * @param start the starting index in the text for search, inclusive
       
    69 	 * @param end the stopping point of search, exclusive
       
    70 	 * @param p a simple regular expression that may contain '?'
       
    71 	 * @return the starting index in the text of the pattern , or -1 if not found 
       
    72 	 */
       
    73 	private int findPosition(String text, int start, int end, String p) {
       
    74 		boolean hasWildCard = p.indexOf(SINGLE_WILD_CARD) >= 0;
       
    75 		int plen = p.length();
       
    76 		for (int i = start, max = end - plen; i <= max; ++i) {
       
    77 			if (hasWildCard) {
       
    78 				if (regExpRegionMatches(text, i, p, 0, plen))
       
    79 					return i;
       
    80 			} else {
       
    81 				if (text.regionMatches(true, i, p, 0, plen))
       
    82 					return i;
       
    83 			}
       
    84 		}
       
    85 		return -1;
       
    86 	}
       
    87 
       
    88 	/**
       
    89 	 * Given the starting (inclusive) and the ending (exclusive) positions in the   
       
    90 	 * <code>text</code>, determine if the given substring matches with aPattern  
       
    91 	 * @return true if the specified portion of the text matches the pattern
       
    92 	 * @param text a String object that contains the substring to match 
       
    93 	 */
       
    94 	public boolean match(String text) {
       
    95 		if (text == null)
       
    96 			return false;
       
    97 		final int end = text.length();
       
    98 		final int segmentCount = segments.length;
       
    99 		if (segmentCount == 0 && (hasLeadingStar || hasTrailingStar)) // pattern contains only '*'(s)
       
   100 			return true;
       
   101 		if (end == 0)
       
   102 			return patternLength == 0;
       
   103 		if (patternLength == 0)
       
   104 			return false;
       
   105 		int currentTextPosition = 0;
       
   106 		if ((end - bound) < 0)
       
   107 			return false;
       
   108 		int segmentIndex = 0;
       
   109 		String current = segments[segmentIndex];
       
   110 
       
   111 		/* process first segment */
       
   112 		if (!hasLeadingStar) {
       
   113 			int currentLength = current.length();
       
   114 			if (!regExpRegionMatches(text, 0, current, 0, currentLength))
       
   115 				return false;
       
   116 			segmentIndex++;
       
   117 			currentTextPosition = currentTextPosition + currentLength;
       
   118 		}
       
   119 		if ((segmentCount == 1) && (!hasLeadingStar) && (!hasTrailingStar)) {
       
   120 			// only one segment to match, no wild cards specified
       
   121 			return currentTextPosition == end;
       
   122 		}
       
   123 		/* process middle segments */
       
   124 		while (segmentIndex < segmentCount) {
       
   125 			current = segments[segmentIndex];
       
   126 			int currentMatch = findPosition(text, currentTextPosition, end, current);
       
   127 			if (currentMatch < 0)
       
   128 				return false;
       
   129 			currentTextPosition = currentMatch + current.length();
       
   130 			segmentIndex++;
       
   131 		}
       
   132 
       
   133 		/* process final segment */
       
   134 		if (!hasTrailingStar && currentTextPosition != end) {
       
   135 			int currentLength = current.length();
       
   136 			return regExpRegionMatches(text, end - currentLength, current, 0, currentLength);
       
   137 		}
       
   138 		return segmentIndex == segmentCount;
       
   139 	}
       
   140 
       
   141 	/**
       
   142 	 * Parses the pattern into segments separated by wildcard '*' characters.
       
   143 	 */
       
   144 	private void parseWildCards() {
       
   145 		if (pattern.startsWith("*"))//$NON-NLS-1$
       
   146 			hasLeadingStar = true;
       
   147 		if (pattern.endsWith("*")) {//$NON-NLS-1$
       
   148 			/* make sure it's not an escaped wildcard */
       
   149 			if (patternLength > 1 && pattern.charAt(patternLength - 2) != '\\') {
       
   150 				hasTrailingStar = true;
       
   151 			}
       
   152 		}
       
   153 
       
   154 		ArrayList temp = new ArrayList();
       
   155 
       
   156 		int pos = 0;
       
   157 		StringBuffer buf = new StringBuffer();
       
   158 		while (pos < patternLength) {
       
   159 			char c = pattern.charAt(pos++);
       
   160 			switch (c) {
       
   161 				case '\\' :
       
   162 					if (pos >= patternLength) {
       
   163 						buf.append(c);
       
   164 					} else {
       
   165 						char next = pattern.charAt(pos++);
       
   166 						/* if it's an escape sequence */
       
   167 						if (next == '*' || next == '?' || next == '\\') {
       
   168 							buf.append(next);
       
   169 						} else {
       
   170 							/* not an escape sequence, just insert literally */
       
   171 							buf.append(c);
       
   172 							buf.append(next);
       
   173 						}
       
   174 					}
       
   175 					break;
       
   176 				case '*' :
       
   177 					if (buf.length() > 0) {
       
   178 						/* new segment */
       
   179 						temp.add(buf.toString());
       
   180 						bound += buf.length();
       
   181 						buf.setLength(0);
       
   182 					}
       
   183 					break;
       
   184 				case '?' :
       
   185 					/* append special character representing single match wildcard */
       
   186 					buf.append(SINGLE_WILD_CARD);
       
   187 					break;
       
   188 				default :
       
   189 					buf.append(c);
       
   190 			}
       
   191 		}
       
   192 
       
   193 		/* add last buffer to segment list */
       
   194 		if (buf.length() > 0) {
       
   195 			temp.add(buf.toString());
       
   196 			bound += buf.length();
       
   197 		}
       
   198 		segments = (String[]) temp.toArray(new String[temp.size()]);
       
   199 	}
       
   200 
       
   201 	/**
       
   202 	 * 
       
   203 	 * @return boolean
       
   204 	 * @param text a String to match
       
   205 	 * @param tStart the starting index of match, inclusive
       
   206 	 * @param p a simple regular expression that may contain '?'
       
   207 	 * @param pStart The start position in the pattern
       
   208 	 * @param plen The length of the pattern
       
   209 	 */
       
   210 	private boolean regExpRegionMatches(String text, int tStart, String p, int pStart, int plen) {
       
   211 		while (plen-- > 0) {
       
   212 			char tchar = text.charAt(tStart++);
       
   213 			char pchar = p.charAt(pStart++);
       
   214 
       
   215 			// process wild cards, skipping single wild cards
       
   216 			if (pchar == SINGLE_WILD_CARD)
       
   217 				continue;
       
   218 			if (pchar == tchar)
       
   219 				continue;
       
   220 			if (Character.toUpperCase(tchar) == Character.toUpperCase(pchar))
       
   221 				continue;
       
   222 			// comparing after converting to upper case doesn't handle all cases;
       
   223 			// also compare after converting to lower case
       
   224 			if (Character.toLowerCase(tchar) == Character.toLowerCase(pchar))
       
   225 				continue;
       
   226 			return false;
       
   227 		}
       
   228 		return true;
       
   229 	}
       
   230 }