maphew

count_types.cmd

@echo off
:: 2007-February-07, matt.wilkie@gov.yk.ca, this script is public domain.
::
:: Requires: grep
::
:: A special purpose script to count the number of occurences for specified file extensions in 
:: specially prepared text files. Prepared by "dir /s/b" and then search & replace to strip file 
:: and folder names.  The files are expected to look like:
::	.xls
::
::	.doc
::
::
::	.doc
::	.xls
::	.doc
::	.xls
:: ...etc

if not exist "%1" goto :Usage

:: List of filetypes that the Google Mini search appliance knows about and can index as of 2007-Feb-07
:: http://www.google.com/enterprise/mini/fileformats.html
:: Word processing 
set word=ans dif doc dox dtf dx fft htm html iwa iwp jaw jbw jw leg ltr lwp m11 mcw mw mwii nb ow4 pdf pfb pfc pw pwp qa qw rft rtf sam sm smt spr tw txt vw wks wmc wp wp4 wp5 wp6 wpf wpl wps wri ws ws1 ws2 ws3 wsd wst xy3 xyp xyw
:: Spreadsheets
set spread=300 cal cod col dbf def fol fw3 mod smt ss ssf tid wb1 wb2 wcm wdb wk1 wk2 wk3 wk4 wk5 wki wkq wks wks wku wpf wps wq1 wr1 xlc xls xlw np
:: Databases
set database=300 db dba dbf dbm dbt def dql dtf fmt fox fsl fw fw2 fw3 fwk inx pfc px qa qw r2d rbf ssf wdb wks wpf smt
:: Graphics
set graphic=ai bmp cdr cgm cur dcx dib draw drw dsf dwg dxf emf eps fmv fpx gdm gem gif gp4 hpgl ico iges img jfif jpeg pbm pcd pcx pgm pic pict pif pmf png pntg ppm ps psd psp rle sdw srs targa tiff wmf wpg wpg2 xbm xdm xpm
:: Presentation
set present=ch3 cht cpr drw flw hgs ppt pre prs shw ppt
:: Compressed Archives
set archive=gzip gz lza lzh tar tgz zip


:Main
for %%d in (%lists%) do (
	echo ==== Processing %%d
	for %%t in (word spread database graphic present archive) do (
		echo Processing doctype %%t
		for %%a in (%%t) do (
			call :Loop %%a %%t %%d
			)
		echo ---- Finished %%t
		)
	echo ==== Finished %%d.
	)
	goto :EOF

:Loop
	:: count the number of occurences for each extension type in the searchfile
	set doctype=%1
	set extensions=%2
	set searchfile=%3
	
	:: thank you ss64, again, for helping with assigning variabls to the content of another var
	:: http://www.ss64.com/nt/call.html
	call set extensions=%%%extensions%%%
	
	echo Extensions for "%doctype%" are %extensions%
	for %%e in (%extensions%) do (
		rem this loop allows us to only report results greater than zero
		for /f %%a in ('grep -ci \.%%e %searchfile%') do (
			if not "%%a"=="0" echo .%%e = %%a
			)
		rem pass
		)
	:: If you don't have grep, this works on any windows machine but is verbose
	:: find /c /i "%%a" share_extensions_list.txt
	goto :EOF

:Usage
	echo.
	echo This is a special purpose script to count the number of
	echo occurences for known file extensions in a specially 
	echo prepared text file. Read the comments in %0 for details.
	echo.
	echo	Usage: %0 [list of text files to search]
	echo.
	goto :EOF