1 #!/usr/bin/perl |
|
2 |
|
3 |
|
4 |
|
5 use IO::Socket; |
|
6 use Getopt::Long; |
|
7 |
|
8 |
|
9 my $target_url; #target url for the roadmap |
|
10 my $tdomain; #tag for the domain to be use in csv file |
|
11 my $csvfile; #output csv file name |
|
12 my $authon= ''; #does it require authorisation? default is false |
|
13 |
|
14 |
|
15 my $count_target; #this value is used to pass a string to match and count on each package backlog |
|
16 my $ispackage; |
|
17 my $splitbklogs; |
|
18 my $summaryheader="ID\tPackage\tFeatures\tFormat\tHttp\n" ; |
|
19 my $newtdformat = 0; |
|
20 my @blist = ("Tracking_Package_features","Kernel_EPLization_Backlog", |
|
21 "Package_Backlog_Q","Wishlist","Test_Package","Backlog_Details"); |
|
22 |
|
23 |
|
24 sub blacklist |
|
25 { |
|
26 ($name)=@_; |
|
27 |
|
28 foreach(@blist) { |
|
29 |
|
30 if ( $name =~ m/$_/sg) { print "WARNING - $name is blacklisted\n"; return 1;} |
|
31 } |
|
32 |
|
33 return 0; |
|
34 |
|
35 } |
|
36 |
|
37 |
|
38 sub getpage |
|
39 { |
|
40 #arguments |
|
41 ($page,$host,$auth,$myfile)=@_; |
|
42 |
|
43 |
|
44 #output file |
|
45 open ( outputfile, ">".$myfile); |
|
46 |
|
47 |
|
48 $port = "http(80)"; |
|
49 $getmess = "GET " . $page ." HTTP/1.1\n" . $auth; |
|
50 |
|
51 print "INFO - sending message - $getmess\n"; |
|
52 print outputfile "$getmess\n\n"; |
|
53 |
|
54 $sock = IO::Socket::INET->new |
|
55 ( |
|
56 PeerAddr => $host, PeerPort => $port, Proto => 'tcp', |
|
57 ) ; |
|
58 |
|
59 |
|
60 print $sock "$getmess\n\n"; |
|
61 |
|
62 |
|
63 while(<$sock>) { |
|
64 |
|
65 print outputfile $_; |
|
66 |
|
67 } |
|
68 |
|
69 close ($sock); |
|
70 close (outputfile); |
|
71 } |
|
72 |
|
73 sub prntfeatures |
|
74 { |
|
75 |
|
76 ($release,$package,$features,$myfile,$domain)=@_; |
|
77 |
|
78 $release =~ s/\\//sg; |
|
79 $csvdel ="\",\""; |
|
80 $invcoma ="\""; |
|
81 if ($newtdformat) { |
|
82 $package =~ s/backlog//sgi; |
|
83 print $myfile "$invcoma $release $csvdel $domain $csvdel $package $csvdel $myfeat $invcoma\n"; |
|
84 |
|
85 } else { |
|
86 |
|
87 $features = $features."<dt"; |
|
88 |
|
89 |
|
90 |
|
91 while ( $features =~ /dt\>(.*?)\<\/dt(.*?)\<dt/sg ){ |
|
92 $myfeat = $1; |
|
93 $subfeat =$2; |
|
94 |
|
95 $myfeat =~ s/\n/ /sg; |
|
96 |
|
97 pos($features) = pos($features) -2; |
|
98 |
|
99 $mystr=""; |
|
100 while ( $subfeat =~ /\<dd\>(.*?)\<\/dd\>/sg) { |
|
101 $mysubfeat = $mysubfeat.$mystr.$1; |
|
102 $mystr = " & "; |
|
103 } |
|
104 undef $mystr; |
|
105 $mysubfeat =~ s/,/ /sg; |
|
106 $mysubfeat =~ s/\n//sg; |
|
107 $mysubfeat =~ s/\<.*?\>//sg; |
|
108 |
|
109 |
|
110 print $myfile "$invcoma $release $csvdel $domain $csvdel $package $csvdel $myfeat $csvdel $csvdel $csvdel $mysubfeat $invcoma\n"; |
|
111 |
|
112 $mysubfeat = ""; |
|
113 } |
|
114 |
|
115 } |
|
116 } |
|
117 |
|
118 sub loadfile |
|
119 { |
|
120 |
|
121 $/ = " "; |
|
122 #arguments |
|
123 ($myfile)=@_; |
|
124 open ( inputfile, "<".$myfile); |
|
125 my $contents = do { local $/; <inputfile> }; |
|
126 close(inputfile); |
|
127 return $contents; |
|
128 |
|
129 } |
|
130 |
|
131 sub td_roadmap |
|
132 { |
|
133 |
|
134 |
|
135 #arguments |
|
136 ($infile,$outfile,$domain,@releases)=@_; |
|
137 |
|
138 |
|
139 $roadmap=loadfile $infile; |
|
140 open ( outputfile, ">>".$outfile); |
|
141 |
|
142 |
|
143 if ($newtdformat) { |
|
144 print "Processing new TD roadmap format\n"; |
|
145 if ($roadmap =~ m /Contents\<\/h2\>.*?\<\/table/sg) { $roadmap =$';} |
|
146 foreach (@releases) { |
|
147 $exp=$_." Roadmap"; |
|
148 |
|
149 if ($roadmap =~ m /($exp)/sg) { |
|
150 print "PASS - Found entry for $_ \n"; |
|
151 $relroad =$'; |
|
152 |
|
153 if ($roadmap =~ m /table\>(.*?)\<\/table/sg) { $relroad =$1;} |
|
154 $relroad =~ s/\n//sg; |
|
155 |
|
156 $skipfirst =1; #skipping the header of the table |
|
157 while ($relroad =~ m/\<tr\>(.*?)\<\/tr(.*)/sg){ |
|
158 $myfeat=$1; |
|
159 $relroad =$2; |
|
160 if ($skipfirst) {$skipfirst=0;next;} |
|
161 $package=""; |
|
162 if ($myfeat =~ m/title\=\"(.*?)\"/sg) { $package=$1; } #looking for package name |
|
163 |
|
164 $myfeat=~ s/\<\/td\>\<td\>/\",\"/sg; |
|
165 $myfeat=~ s/\<.*?\>//sg; |
|
166 |
|
167 if ($myfeat =~m/[A-z]/sg ){prntfeatures($_,$package,$myfeat,outputfile,$domain);} |
|
168 |
|
169 } |
|
170 } |
|
171 } |
|
172 } else { |
|
173 |
|
174 foreach (@releases) { |
|
175 |
|
176 $exp="\\<h2\\>.*?\\>".$_; |
|
177 |
|
178 if ($roadmap =~ m /($exp)/sg) { |
|
179 print "PASS - Found entry for $_ \n"; |
|
180 $relroad =$'; |
|
181 |
|
182 if ($relroad =~ m /(.*?)\<h2/sg) { $relroad =$1;} |
|
183 $i=0; |
|
184 while ($relroad=~ m/\<h3\>.*\>(.*?)\<.*<\/h3/g) { |
|
185 $package = $1; |
|
186 $ppos[$i]= pos($relroad); |
|
187 $pname[$i]= $package; |
|
188 $i++; |
|
189 } |
|
190 for ( $i=0;$i<($#ppos); $i++){ |
|
191 $features= substr ($relroad, $ppos[$i],$ppos[$i+1]-$ppos[$i]); |
|
192 prntfeatures($_,$pname[$i],$features,outputfile,$domain); |
|
193 } |
|
194 $features= substr ($relroad, $ppos[$i]); |
|
195 |
|
196 prntfeatures($_,$pname[$i],$features,outputfile,$domain); |
|
197 @ppos =""; |
|
198 @pname =""; |
|
199 undef ($features); |
|
200 } |
|
201 } |
|
202 |
|
203 } |
|
204 |
|
205 |
|
206 |
|
207 close (outputfile); |
|
208 |
|
209 |
|
210 } |
|
211 |
|
212 |
|
213 sub parse_category { |
|
214 |
|
215 #arguments |
|
216 ($infile)=@_; |
|
217 |
|
218 my @mylink; |
|
219 |
|
220 $mypage=loadfile $infile; |
|
221 $i=0; |
|
222 if ( $mypage =~ m/Pages in category(.*)\<\/table/sg) { |
|
223 print "INFO - Category page found\n"; |
|
224 $mypage = $1; |
|
225 |
|
226 while ($mypage =~ m /\<a href\=\"(\/wiki\/index\.php\/.*?)\"/g) { |
|
227 |
|
228 $mylink[$i] = $1; |
|
229 $i++; |
|
230 |
|
231 } |
|
232 print "INFO - Found $i items in the category page\n" |
|
233 } |
|
234 return @mylink; |
|
235 } |
|
236 |
|
237 sub bklog_domain { |
|
238 #argument |
|
239 ($mytechdomian)=@_; |
|
240 |
|
241 $mytechdomian =~s/\)//sg; |
|
242 $mytechdomian =~s/\(//sg; |
|
243 |
|
244 $domaininfo=loadfile ("package_domains.csv"); |
|
245 if ($domaininfo =~ m/$mytechdomian.*?\{(.*?)\}/sgi ) { |
|
246 return $1; |
|
247 } |
|
248 print "ERROR - domain not found for $mytechdomian\n"; |
|
249 return "orphan"; |
|
250 |
|
251 } |
|
252 |
|
253 sub parse_bklog { |
|
254 |
|
255 #arguments |
|
256 ($infile,$outfile,$id)=@_; |
|
257 $mypkg=loadfile $infile; |
|
258 #list if the bklog has been ported to the new bugzilla based format |
|
259 $headerformat= "wiki_format"; |
|
260 |
|
261 |
|
262 open ( outputfile, ">>".$outfile); |
|
263 open ( soutputfile, ">>"."summary_".$outfile); |
|
264 |
|
265 if ($mypkg =~ m/index\.php\/(.*?) HTTP/sg) { |
|
266 |
|
267 $pagename = $1; |
|
268 |
|
269 if ($splitbklogs) { |
|
270 $whichtd = bklog_domain($pagename); |
|
271 open ( tdoutputfile, ">>".$whichtd. "_".$outfile); |
|
272 } |
|
273 |
|
274 |
|
275 |
|
276 if (blacklist($pagename)) { |
|
277 |
|
278 close (outputfile); |
|
279 close (soutputfile); |
|
280 |
|
281 if ($splitbklogs) { close (tdoutputfile);} |
|
282 return 0; |
|
283 } |
|
284 print "INFO -Processing Package $pagename \n"; |
|
285 $i=0; |
|
286 $found_counter =0; |
|
287 |
|
288 if ($mypkg =~m/class\=\"bugzilla sortable\"/sg ) { $headerformat="autobug_format"; } |
|
289 |
|
290 while ($mypkg =~ m/\<tr.*?\>(.*?)\<\/tr/sg) { |
|
291 $myheader= $&; |
|
292 if ($myheader =~ m/style=\"background-color\:/sg) { |
|
293 next; |
|
294 } |
|
295 $myfeat= $1; |
|
296 $myfeat =~ s/\<\/td\>/\t/sg; |
|
297 $myfeat =~ s/\<.*?\>//sg; |
|
298 $myfeat =~ s/\n//sg; |
|
299 |
|
300 if ($myfeat =~ m/IDPStatus/sg) { #header for bugzilla mediawiki plugin |
|
301 next; |
|
302 } |
|
303 |
|
304 if ($myfeat =~ m/[A-z]/sg and not $myfeat =~ m/\<\;etc/sg and |
|
305 not $myfeat =~ m/\<\;Feature/sg and not $myfeat =~ m/Item not available/sg) { |
|
306 print outputfile "$pagename\t$myfeat\n"; |
|
307 if ($splitbklogs) { print tdoutputfile "$pagename\t$myfeat\n";} |
|
308 |
|
309 # print "matching $myfeat with $count_target\n" ; |
|
310 if ($myfeat =~ m/$count_target/sg) {$found_counter++;} |
|
311 $i++; |
|
312 } |
|
313 |
|
314 } |
|
315 |
|
316 if ($count_target){ |
|
317 $mycount=$i."\t".$found_counter; |
|
318 } else { |
|
319 $mycount=$i; |
|
320 } |
|
321 |
|
322 if ($splitbklogs) { |
|
323 print soutputfile "$id\t$pagename\t$mycount\t$headerformat\t$whichtd\thttp://developer.symbian.org/wiki/index.php/$pagename\n"; |
|
324 } else { |
|
325 print soutputfile "$id\t$pagename\t$mycount\t$headerformat\thttp://developer.symbian.org/wiki/index.php/$pagename\n"; |
|
326 } |
|
327 |
|
328 } |
|
329 |
|
330 close (outputfile); |
|
331 close (soutputfile); |
|
332 if ($splitbklogs) { close (tdoutputfile);} |
|
333 |
|
334 } |
|
335 |
|
336 |
|
337 |
|
338 |
|
339 #help print |
|
340 sub printhelp |
|
341 { |
|
342 |
|
343 print "\n\n version 1.1 |
|
344 \ngettd.pl -t=url -d=domain \n\nRequired parameters for Technology Roadmaps:\n\t -t url containing the technology domain roadmap\n\t -d the technology domain name |
|
345 \n\nOptional Parmeters for Technology Roadmaps\n\t-new if the roadmap has the new wiki format |
|
346 \n\nRequired Parameters for Package backlogs\n\t-p for package backlog analysis. just run gettd.pl -p |
|
347 \n\nOptional Pararmeters for Package backlogs\n\t -compare [f1] [f2] compares two package summary files for changes ignores order |
|
348 \n\t -split splits the content of the backlog output into technology domains. requires package_domains.csv file with mapping details |
|
349 \n\t -count=regexp counts the times that a package backlog line matches the regexp, the results are output to the summary file |
|
350 \n\nCommonOptional parameters\n\t-o filename ,the output is logged into the output.csv file by default\n\t-h for help |
|
351 \n\t recommend to run under cygwin environment and perl version v5.10.0 \n |
|
352 \n\t pages blacklisted for package backlogs are @blist\n"; |
|
353 exit; |
|
354 } |
|
355 |
|
356 |
|
357 |
|
358 #compare bklogs |
|
359 sub compare_bklogs { |
|
360 #arguments |
|
361 (@bklogs)=@_; |
|
362 |
|
363 if (not $#bklogs == 1) { printhelp;} |
|
364 |
|
365 |
|
366 $cmd ="cut -f 2,3 ". $bklogs[0] . " | sort -u > tmp1.txt"; |
|
367 |
|
368 system($cmd); |
|
369 |
|
370 $cmd ="cut -f 2,3 ". $bklogs[1] . " | sort -u > tmp2.txt"; |
|
371 system($cmd); |
|
372 |
|
373 exec ("diff tmp1.txt tmp2.txt | grep '[<|>]'"); |
|
374 system("rm temp*.txt"); |
|
375 |
|
376 exit; |
|
377 |
|
378 } |
|
379 |
|
380 |
|
381 |
|
382 |
|
383 #process command line options |
|
384 sub cmd_options |
|
385 { |
|
386 |
|
387 my $help; |
|
388 my @compare; |
|
389 |
|
390 |
|
391 GetOptions('h' => \$help,'t=s'=> \$target_url, 'd=s' => \$tdomain , 'o=s' => \$csvfile, |
|
392 'a' => \$authon , 'p' => \$ispackage, 'compare=s{2}' =>\@compare, 'new' => \$isnewformat, |
|
393 'split' => \$splitbklogs, 'count=s' => \$count_target); |
|
394 |
|
395 if (@compare) { |
|
396 compare_bklogs @compare; |
|
397 |
|
398 } |
|
399 if ($count_target) { |
|
400 print "INFO - Seaching for $count_target\n"; |
|
401 } |
|
402 |
|
403 if ($help) { |
|
404 printhelp; |
|
405 } |
|
406 |
|
407 |
|
408 if ($ispackage) { |
|
409 |
|
410 $tdomain =" "; |
|
411 $target_url = "http://developer.symbian.org/wiki/index.php/Category:Package_Backlog"; |
|
412 |
|
413 } |
|
414 if ($isnewformat){ |
|
415 $newtdformat = 1; |
|
416 |
|
417 } |
|
418 |
|
419 if ( not $target_url) { |
|
420 |
|
421 print "ERROR-missing arguments target url\n"; |
|
422 printhelp; |
|
423 } |
|
424 |
|
425 |
|
426 if (not $tdomain){ |
|
427 print "ERROR-missing arguments domain level\n"; |
|
428 printhelp; |
|
429 } |
|
430 |
|
431 print "\nINFO-downloading $target_url with label $tdomain\n"; |
|
432 |
|
433 |
|
434 if (not $csvfile) { |
|
435 if (not $ispackage) { |
|
436 $csvfile="output.csv"; |
|
437 |
|
438 } else { |
|
439 $csvfile="output.txt"; |
|
440 system ("rm *output.txt"); |
|
441 |
|
442 } |
|
443 } |
|
444 print "\nINFO-output recorded in $csvfile \n"; |
|
445 |
|
446 |
|
447 |
|
448 } |
|
449 #main |
|
450 $/ = " "; |
|
451 $host1 = "developer.symbian.org"; |
|
452 |
|
453 cmd_options(); |
|
454 |
|
455 if ($authon) { |
|
456 #file containing login details from http cookie |
|
457 $mycookie = loadfile("mycookie.txt"); |
|
458 |
|
459 $auth = "Cookie: " . $mycookie ; |
|
460 } |
|
461 |
|
462 |
|
463 if ($ispackage) { |
|
464 getpage($target_url, $host1, $auth, "debug.txt"); |
|
465 @bklog = parse_category("debug.txt"); |
|
466 $j=0; |
|
467 |
|
468 foreach (@bklog) { |
|
469 getpage("http://".$host1.$_, $host1, $auth, "pkg".$j.".txt"); |
|
470 parse_bklog ("pkg".$j.".txt",$csvfile, $j); |
|
471 $j++; |
|
472 |
|
473 |
|
474 |
|
475 } |
|
476 |
|
477 } else { |
|
478 |
|
479 #foundation releases - add as required |
|
480 @releases=("Symbian\\^2","Symbian\\^3","Symbian\\^4"); |
|
481 |
|
482 getpage($target_url, $host1, $auth, "debug.txt"); |
|
483 td_roadmap("debug.txt" , $csvfile, $tdomain ,@releases); |
|
484 } |
|