3 # Setup global variables
6 # Function: Check if an attachment file exists.
7 # Returns with -1 if it exists.
9 # Example: attachmentFileExists 10 && echo Echoed when 10 exists
10 # attachmentFileExists 11 || echo Echoed when 11 does not exist
11 function attachmentFileExists () {
15 if [ ! -r a.$1 ]; then
22 # Function: Check if an attempted download of an attachment actually
23 # resulted in a wget for an attachment that does not exist.
24 # If the attachment in reality is a HTML-file that says
25 # that this attachment does not exist, then that downloaded
26 # attachment is deleted.
28 # Examples: attachmentIsNotInvalid 2000 && echo Attachment 2000 is not invalid
29 # attachmentIsNotInvalid 10 || echo Attachment 10 does not exist or is invalid
30 function attachmentIsNotInvalid () {
31 attachmentFileExists $1 || return -1
32 cat a.$1 | grep -q "Attachment #$1 does not exist" && return -1
37 # Function: Download a specific attachment from bugzilla
38 # Input: The number of the attachment
39 # Example: downloadAttachment 57
40 function downloadAttachment () {
41 attachmentFileExists $1 || \
42 wget -q -O a.$1 "http://bugzilla.lyx.org/attachment.cgi?id=$1&action=view"
46 # Function: Download a sequence of attachments
47 # Input: $1 = Start number
49 # Example: downloadAttachments 10 20
50 function downloadAttachments () {
51 for number in `seq $1 $2`; do
52 downloadAttachment $number
57 # Function: Download attachments until an invalid one is found.
58 # Then delete that attachment and return.
60 function downloadRemainingAttachments () {
63 while attachmentIsNotInvalid $n; do
65 echo Acquiring attachment $n
68 attachmentIsNotInvalid $n || rm a.$n
73 # Function: One test to see if the file is spam. This test checks
74 # if the file is of a "spam type" according to 'file'.
75 # It is considered spam if it is of one of the following
77 # 'HTML document text'
79 # In: $1 = The number of the attachment
81 function attachmentIsSpam () {
82 file_type_maybe_spam=0
83 fileResult=`file a.$1`
84 echo $fileResult | grep -q ASCII && file_type_may_be_spam=1
85 echo $fileResult | grep -q HTML && file_type_may_be_spam=1
87 if [[ $file_type_may_be_spam -ne 0 ]]; then
89 # Check if the file contains '<script'
90 file_contains_script=0
91 cat a.$1 | grep -iq '< *script' && file_contains_script=1
93 if [[ $file_contains_script -ne 0 ]]; then
97 return -1; # Not detected as spam
102 # Download all attachments
105 let nMin=1830 # Min number to test, 1616 is the 1st spam
106 let nMax=2000 # Max number to test
108 # Download any new attachments, starting from number $1
109 echo Downloading attachments...
110 downloadRemainingAttachments $nMin
112 echo Analyzing downloaded attachments
113 echo 'Note: "Tagged" attachments are not re-analyzed'
115 while [ $n -le $nMax ]; do
116 # Only process downloaded attachments
118 # Don't process attachments already tagged as spam
119 if [ ! -r spam.$n ]; then
120 # Don't process attachments already tagged as spam candidates
121 if [ ! -r spam.$n.candidate ]; then
122 # Check for tag indicating it is _not_ spam
123 if [ ! -r notspam.$n ]; then
124 if attachmentIsSpam $n; then
125 echo $n is possible spam
126 ln -s a.$n spam.$n.candidate