254 } |
255 } |
255 close(FILE); |
256 close(FILE); |
256 } |
257 } |
257 } |
258 } |
258 } |
259 } |
|
260 |
|
261 sub count_distinct |
|
262 { |
|
263 my @files; |
|
264 my $finder = sub { |
|
265 return if ! -f; |
|
266 return if ! /\.tsv$/; |
|
267 push @files, $File::Find::name; |
|
268 }; |
|
269 find($finder, $::releaseablesdir); |
|
270 |
|
271 for my $file (@files) |
|
272 { |
|
273 $file =~ /$::releaseablesdir[\\\/](.*)[\\\/]info\.tsv/; |
|
274 my $package = $1; |
|
275 $package =~ s,\\,/,g; |
|
276 |
|
277 my @releasables; |
|
278 open(FILE, $file); |
|
279 while (<FILE>) |
|
280 { |
|
281 my $line = $_; |
|
282 next if ($line !~ /^([^\t]*)\t[^\t]*\t[^\t]*$/); |
|
283 push @releasables, $1; |
|
284 } |
|
285 close(FILE); |
|
286 #for my $r (@releasables) {print "$r\n";} |
|
287 #print "\n\n\n\n"; |
|
288 my $previous = ''; |
|
289 my @distincts = grep {$_ ne $previous && ($previous = $_, 1) } sort @releasables; |
|
290 |
|
291 my $nd = scalar(@distincts); |
|
292 #print "adding $package -> $nd to releaseables_by_package\n"; |
|
293 $::releaseables_by_package->{$package} = $nd; |
|
294 } |
|
295 } |
|
296 |
259 sub remove_missing_duplicates |
297 sub remove_missing_duplicates |
260 { |
298 { |
261 opendir(DIR, $::raptorbitsdir); |
299 opendir(DIR, $::raptorbitsdir); |
262 my @files = grep((-f "$::raptorbitsdir/$_" && $_ !~ /^\.\.?$/ && $_ =~ /_missing\.txt$/), readdir(DIR)); |
300 my @files = grep((-f "$::raptorbitsdir/$_" && $_ !~ /^\.\.?$/ && $_ =~ /_missing\.txt$/), readdir(DIR)); |
263 close(DIR); |
301 close(DIR); |
264 |
302 |
265 for my $file (@files) |
303 for my $file (@files) |
266 { |
304 { |
267 open(FILE, "+<$::raptorbitsdir/$file"); |
305 open(FILE, "+<$::raptorbitsdir/$file"); |
268 print "working on $file\n"; |
306 #print "working on $file\n"; |
269 |
307 |
270 # Read it |
308 # Read it |
271 my @content = <FILE>; |
309 my @content = <FILE>; |
272 |
310 |
273 # Sort it, and grep to remove duplicates |
311 # Sort it, and grep to remove duplicates |