spurge - getting the source

Spurge is written in Python; the whole source code is included in its package. You may view up-to-the-minute sources using Spurge's CVS tree, but be warned that they're not certain to work.

The current version of spurge.py, the main program, is listed below:

  1 "Spurge RGTP server (simple python-based user-friendly reverse gossip engine)"
  2 
  3 #
  4 # Copyright (c) 2002 Thomas Thurman
  5 # thomas@thurman.org.uk
  6 # 
  7 # This program is free software; you can redistribute it and/or modify
  8 # it under the terms of the GNU General Public License as published by
  9 # the Free Software Foundation; either version 2 of the License, or
 10 # (at your option) any later version.
 11 # 
 12 # This program is distributed in the hope that it will be useful,
 13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 15 # GNU General Public License for more details.
 16 # 
 17 # You should have be able to view the GNU General Public License at 
 18 # http://www.gnu.org/copyleft/gpl.html ; if not, write to the Free Software
 19 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
 20 
 21 ################################################################
 22 #
 23 # Still to implement:
 24 
 25 # Primary importance (needed for r/o operation)
 26 # Secondary importance (needed for ordinary append access):
 27 # -- all done
 28 
 29 # Tertiary importance (needed for editing):
 30 # EDLK EDUL EDIT EDIX EDCF EDAB MOTS
 31 
 32 # Not very important (random little commands):
 33 # ALVL ELOG DIFF UDBM
 34 #
 35 # Note that there's also no contention control,
 36 # and there should be.
 37 # 
 38 ################################################################
 39 
 40 import sys
 41 import ConfigParser
 42 import os.path
 43 import md5
 44 import binascii
 45 import re
 46 import socket
 47 import traceback
 48 import time
 49 import random
 50 import smtplib
 51 import getopt
 52 
 53 basic_config_filename = '/etc/spurge.conf'
 54 
 55 ################################################################
 56 
 57 def auth_level(code):
 58 	try:
 59 		return ['none', 'read-only', 'append', 'editor'][code]
 60 	except IndexError:
 61 		raise "Unknown access level (%d)!" % (code)
 62 
 63 ################################################################
 64 
 65 class blob:
 66 	"A file on disk."
 67 
 68 	def __init__(self, name):
 69 		self.name = name
 70 
 71 	def spew_into(self, target, filter=None):
 72 		for line in open(self.name).xreadlines():
 73 			if not filter or filter(line):
 74 				target.output_line(line[:-1])
 75 
 76 ################################################################
 77 
 78 def readln():
 79 	"Returns the next line from stdin, with trailing control characters removed."
 80 	str = sys.stdin.readline()
 81 	while str and ord(str[-1])<32: str=str[:-1]
 82 	return str
 83 
 84 ################################################################
 85 
 86 # stuff we lifted from yarrow
 87 # (hmm, should have some sort of mutual dependency thing)
 88 
 89 def inverted_bitstring(x):
 90 	result = ""
 91 	for i in range(len(x)):
 92 		result = result + chr(255-ord(x[i]))
 93 	return result
 94 
 95 def random_hex_string(length = 32):
 96 	"Generates a string of random hex digits. Useful for nonces."
 97 
 98 	result = ''
 99 
100 	# The easiest way to generate a hex digit is using the built-in
101 	# hex() function, which returns strings of the form "0x7"-- so
102 	# we take the third character.
103 	
104 	for n in range(0, length):
105 		result = result + hex(int(random.random()*16))[2]
106 
107 	return result
108 
109 ################################################################
110 
111 class connection:
112 
113 	def __init__(self, config, vaultname):
114 
115 		self.remote_host = calling_host()
116 		self.user = None
117 		self.grogname = None
118 		self.data = None
119 		self.expecting_secret = None
120 		self.creating_account = 0
121 		self.server_nonce = None
122 		self.itemid_regexp = re.compile('^[A-Z][0-9]{7}$')
123 		self.partial_log_line = None
124 		self.continuing = None
125 		self.config = config
126      		self.language = self.conf('default-language')
127 		self.access_level = self.confbool('allow-anonymous')
128 		self.potential_access_level = self.access_level
129 		self.directory = os.path.join(self.conf('vault-dir'),
130 					      vaultname)
131 
132 		if self.confbool('logging'):
133 			self.logfile = open(os.path.join(self.conf('log-dir'),
134 							 vaultname+'.log'),
135 					    'a')
136 		else:
137 			self.logfile = None
138 
139 		self.log('(connect)')
140 
141 	def flush_log(self):
142 		"If there's data waiting in partial_log_line, writes it out."
143 		if self.logfile and self.partial_log_line:
144 			self.logfile.write('%s\n' % (self.partial_log_line))
145 			self.logfile.flush()
146 			self.partial_log_line = None
147 
148 	def log(self, message):
149 		if self.logfile:
150 			self.flush_log()
151 
152 			if self.user:
153 				if self.potential_access_level != self.access_level:
154 					# We're not sure they are who they claim to be.
155 					username = '(%s)' % (self.user)
156 				else:
157 					username = self.user
158 			else:
159 				username = '-'
160 
161 			self.partial_log_line = '%s %s [%s] %s' % (
162 				self.remote_host,
163 				username,
164 				time.ctime(),
165 				message,
166 				)
167 
168 	def discuss(self):
169 		self.boilerplate('hello-' + auth_level(self.access_level))
170 
171 		while 1:
172 			sys.stdout.flush()
173 			self.flush_log()
174 			command = readln()
175 
176 			if not command:
177 				self.log('(disconnect)')
178 				self.handle_quit()
179 
180 			# fixme: need sanity check here; command can't
181 			# be over 300 bytes according to the protocol
182 
183 			self.log('"' + command + '"')
184 			command = command.split(' ', 1)
185 			if len(command)==1:
186 				params = ''
187 			else:
188 				params = command[1]
189 				
190 			command = 'handle_'+command[0].lower()[:4]
191 			methods = self.__class__.__dict__
192 			if methods.has_key(command):
193 				flags = methods[command].__doc__
194 				flags = flags[:flags.index(':')]
195 
196 				if int(flags[0]) > self.access_level:
197 					self.boilerplate('permission')
198 				elif ('d' in flags) and not self.data:
199 					self.boilerplate('need-data')
200 				elif ('e' in flags) and not self.edit_lock:
201 					self.boilerplate('need-lock')
202 				else:
203 					methods[command](self, params)
204 			else:
205 				self.boilerplate('parse-fail')
206 
207 	################################################################
208 
209 	def boilerplate(self, name):
210 		"Outputs a piece of boilerplate text."
211 		self.output(self.config.getint('codes', name),
212 			    self.config.get('lang-'+self.language, name))
213 
214 	def output(self, code, message=''):
215 		sys.stdout.write('%03d %s\r\n' % (code, message))
216 		if self.logfile:
217 			self.partial_log_line = '%s %03d' % (
218 				self.partial_log_line,
219 				code)
220 
221 	def output_line(self, line):
222 		sys.stdout.write(line + '\r\n')
223 
224 	def end_output(self):
225 		sys.stdout.write('.\r\n')
226 
227 	################################################################
228 
229 	def handle_user(self, params):
230                 "0:say who you are"
231 
232 		# The USER command has two distinct uses.
233 		# Usually it introduces a user who already has
234 		# an account, but sometimes it's the second
235 		# half of a REGU command.
236 
237 		if self.creating_account:
238 			self.registration_handle_user(params)
239 		else:
240 			self.logging_in_handle_user(params)
241 
242 	def logging_in_handle_user(self, params):
243 		"The USER command, when it's for identification."
244 		if self.user:
245 			self.boilerplate('double-user')
246 		else:
247 			requested_access = 3
248 			params = params.split(' ')
249 			if len(params)>1 and params[1]:
250 				request_char = params[1][0]
251 				if request_char in ('1', '2', '3'):
252 					requested_access = int(request_char)
253 			username = params[0]
254 			users = ConfigParser.ConfigParser()
255 			users.read(self.filename('users'))
256 			if not users.has_section(username):
257 				self.boilerplate('unknown-user')
258 				self.exit_program()
259 
260 			allowed_access = users.getint(username,
261 						      'access')
262 			self.user = username
263 
264 			if not users.has_option(username, 'secret'):
265 				# Oh, okay; better just let them in, then.
266 				self.access_level = self.potential_access_level = allowed_access
267 				self.boilerplate('guest-'+auth_level(allowed_access))
268 				return
269 
270 			self.expecting_secret = users.get(username,
271 							  'secret')
272 			self.potential_access_level = max(requested_access,
273 							  allowed_access)
274 
275 			self.boilerplate('prove-who-you-are')
276 			self.server_nonce = random_hex_string()
277 			self.output(333, self.server_nonce)
278 
279 	def registration_handle_user(self, params):
280 		"The USER command for creating a new account."
281 
282 		if not re.match('^[^@\s<>]*@[^.\s<>]*\.[^\s<>]*$',
283 			    params):
284 			# That's not an email address.
285 			# (ObPython:
286 			#   "That's not a part of the body!"
287 			#   "No, it's a link, though.")
288 
289 			self.boilerplate('unearthly-email-address')
290 			return
291 
292 		users = ConfigParser.ConfigParser()
293 		users.read(self.filename('users'))
294 
295 		if users.has_section(params):
296 			# Getting a case of deja vu here: that person already
297 			# has an account!
298 
299 			self.boilerplate('already-have-an-account')
300 			return
301 
302 		# FIXME: and we should also check the userid against
303 		# the REs in the config file. We need three kinds
304 		# of address:
305 		#
306 		#  * People who we give accounts to when they ask
307 		#  * People who don't get accounts at all when they ask
308 		#  * People who we have to ask the editors about
309 		# and for this category, we also need something in UDBM
310 		# to let the editors say yes or no.
311 		#
312 		# Anyway...
313 
314 		# Okay, looks like they're real. Create them a password...
315 		new_password = random_hex_string(8)
316 
317 		# ...and add them to the users file.
318 		users.add_section(params)
319 		users.set(params, 'secret', new_password)
320 		users.set(params, 'access', self.conf('newbie-privs'))
321 
322 		users.write(open(self.filename('users'), 'w'))
323 		
324 		# Now we just have to send them some mail.
325 
326 		mail_from = None
327 		if self.config.has_option('main', 'mail-from'):
328 			mail_from = self.conf('mail-from')
329 		else:
330 			mail_from = 'root@' + socket.gethostname()
331 
332 		mail = smtplib.SMTP(self.conf('smtp-server'))
333 		mail.sendmail(mail_from,
334 			      params,
335 			      ('From: %s\r\n'+
336 			      'To: %s\r\n'+
337 			      'Delivered-By-The-Graces-Of: Spurge\r\n'+
338 			      'Subject: %s\r\n'+
339 			      '\r\n%s') % (
340 			mail_from,
341 			params,
342 			self.conf('newbie-email-subject'),
343 			self.conf('newbie-email-body').replace('[SECRET]', new_password)))
344 		mail.quit()
345 		
346 		self.boilerplate('created-user')
347 		self.exit_program()
348 
349         def handle_auth(self, params):
350                 "0:prove who you are"
351 		if not self.expecting_secret:
352 			self.boilerplate('unexpected-auth')
353 			return
354 
355 		params = params.split()
356 
357 		if len(params)!=2:
358 			self.boilerplate('no-nonce')
359 			return
360 		
361 		if len(params[1])!=32:
362 			self.boilerplate('short-nonce')
363 			return
364 
365 		squished_userid = self.user.lower()[:16]
366 		while len(squished_userid)<16:
367 			squished_userid += '\0'
368 
369 		fingerprint = md5.new()
370 		fingerprint.update(binascii.unhexlify(params[1]))
371 		fingerprint.update(binascii.unhexlify(self.server_nonce))
372 		fingerprint.update(squished_userid)
373 		fingerprint.update(inverted_bitstring(binascii.unhexlify(self.expecting_secret)))
374 
375 		if params[0].lower()==fingerprint.hexdigest():
376 			# yay, they're who we think they are
377 			response = md5.new()
378 			response.update(binascii.unhexlify(self.server_nonce))
379 			response.update(binascii.unhexlify(params[1]))
380 			response.update(squished_userid)
381 			response.update(binascii.unhexlify(self.expecting_secret))
382 
383 			# Grrr. WrenGROGGS only allows the server to respond
384 			# in uppercase here, in violation of the protocol. :(
385 			self.output(133, response.hexdigest().upper())
386 
387 			self.access_level = self.potential_access_level
388 			self.boilerplate('authorised-' + auth_level(self.access_level))
389 		else:
390 			self.boilerplate('auth-failed')
391 			self.exit_program()
392 
393         def handle_alvl(self, params):
394                 "1:request different privs"
395                 self.not_implemented()
396 
397         def handle_motd(self, params):
398                 "0:request the message of the day"
399 		self.dump_file('motd')
400 
401         def handle_indx(self, params):
402                 "1:request a list of all available items"
403 
404 		class IndexFilter:
405 			def __init__(self, left_margin, value):
406 				self.target = value
407 				self.start = left_margin
408 
409 			def __call__(self, candidate):
410 				return int(candidate[self.start:self.start+8],16)>=self.target
411 
412 		filter = None
413 		if params!='':
414 			# okay, we need some filter or other.
415 			try:
416 				if params[0]=='#':
417 					filter = IndexFilter(0, int(params[1:], 16))
418 				else:
419 					filter = IndexFilter(9, int(params, 16))
420 			except ValueError:
421 				self.boilerplate('indx-bad-arguments')
422 				return
423 				
424 		self.dump_file('index', filter, 1)
425 
426         def handle_item(self, params):
427                 "1:request one particular item"
428 		params = params.upper()
429 		if self.itemid_regexp.match(params):
430 			# OK, so it looks like an itemid...
431 			self.dump_file(params)
432 		else:
433 			self.boilerplate('unearthly-itemid')
434 
435         def handle_stat(self, params):
436                 "1:request the status of an item"
437 		params = params.upper()
438 		if self.itemid_regexp.match(params):
439 			filename = self.filename(params)
440 			if os.path.isfile(filename):
441 				self.output(211, open(filename).readline()[:-1])
442 			else:
443 				self.boilerplate('file-not-found')
444 		else:
445 			self.boilerplate('unearthly-itemid')
446 
447         def handle_data(self, params):
448                 "0:provide your name and some data, for a future command"
449 
450 		self.boilerplate('data-please')
451 		sys.stdout.flush()
452 		self.grogname = readln()
453 
454 		self.data = ''
455 		while 1:
456 			line = readln()
457 			if line=='.':
458 				break
459 			self.data += line + '\n'
460 			
461 		self.boilerplate('data-thank-you')
462 
463         def handle_newi(self, params):
464                 "2d:create a new item"
465 
466 		sequence = self.new_sequence()
467 		itemid = self.new_itemid()
468 		filename = self.filename(itemid)
469 		subject = params
470 		timestamp = int(time.time())
471 		
472 		itemfile = open(filename, 'w')
473 
474 		itemfile.write('%27s%08x %s\n%sSubject: %s\n\n%s\n' % (
475 			'',
476 			sequence,
477 			subject,
478 			self.item_header(sequence,
479 					 timestamp,
480 					 itemid),
481 			subject,
482 			self.data
483 			))
484 
485 		self.add_index_record(sequence,
486 				      timestamp,
487 				      itemid,
488 				      'I',
489 				      params)
490 
491 		self.data = None
492 		self.continuing = None
493 
494 		self.output(120, itemid)
495 		# FIXME: should be from boilerplate
496 		self.output(220, '%08x  %s' % (sequence,
497 					       'OK, posted'))
498 
499         def handle_repl(self, params):
500                 "2d:reply to an item"
501 		itemid = params.upper()
502 		sequence = self.new_sequence()
503 		filename = self.filename(itemid)
504 		timestamp = int(time.time())
505 
506 		if not os.path.isfile(filename):
507 			self.boilerplate('file-not-found')
508 		elif os.path.getsize(filename) > int(self.conf('max-item-size')):
509 			self.boilerplate('item-full')
510 			self.continuing = itemid
511 		else:
512 			old = open(filename, 'r')
513 			baby = open(filename+'.new', 'w')
514 
515 			temp = old.readline()
516 			subject = temp[36:-1]
517 			baby.write('%s%08x %s\n' % (
518 				temp[:27],
519 				sequence,
520 				subject))
521 
522 			for line in old.xreadlines():
523 				baby.write(line)
524 
525 			baby.write('%s\n%s\n' % (
526 				self.item_header(sequence, timestamp),
527 				self.data))
528 
529 			self.graft(filename)
530 
531 			self.add_index_record(sequence,
532 					      timestamp,
533 					      itemid,
534 					      'R',
535 					      subject)
536 			self.data = None
537 			self.continuing = None
538 			self.output(220, '%08x  OK, posted' % (sequence))
539 
540         def handle_cont(self, params):
541                 "2d:continue a full item"
542 		# This is pretty similar to newi; refactor.
543 		# (is it still? FIXME)
544 
545 		if not self.continuing:
546 			self.boilerplate('unexpected-cont')
547 			return
548 
549 		subject = params
550 		sequence = self.new_sequence()
551 		new_itemid = self.new_itemid()
552 		old_itemid = self.continuing
553 		old_filename = self.filename(old_itemid)
554 		new_filename = self.filename(new_itemid)
555 		timestamp = int(time.time())
556 		
557 		# Consider returning 423 if the title contains the old itemid.
558 
559 		# First, create our new item.
560 
561 		open(new_filename, 'w').write('%8s%19s%08x %s\n%sSubject: %s\n\n%s' % (
562 			old_itemid,
563 			'',
564 			sequence,
565 			subject,
566 			self.item_header(sequence,
567 					 timestamp,
568 					 new_itemid),
569 			subject,
570 			self.data))
571 
572 		# Now we modify the old item to show it's been continued.
573 
574 		old = open(old_filename, 'r')
575 		baby = open(old_filename+'.new', 'w')
576 
577 		statline = old.readline()
578 		baby.write('%s%s%s' % (statline[:9], new_itemid, statline[17:]))
579 
580 		# Everything else is the same for a bit...
581 		for line in old.xreadlines():
582 			baby.write(line)
583 
584 		# Add the magic cookie for the continuation, and the human-readable
585 		# portion.
586 
587 		baby.write('^%08x %08x\n[Item continued in %s by %s.]\n' % (
588 			sequence, timestamp, new_itemid, self.user))
589 
590 		old.close()
591 		baby.close()
592 
593 		# Swap it in.
594 		self.graft(old_filename)
595 
596 		# Lastly, update the index.
597 		self.add_index_record(sequence, timestamp, new_itemid, 'C', subject)
598 		self.add_index_record(sequence, timestamp, old_itemid, 'F', subject)
599 
600 		# Okay, we're all done!
601 		self.output(120, new_itemid)
602 		self.output(220, '%08x  OK, posted' % (sequence))
603 
604 		self.data = None
605 		self.continuing = None
606 
607         def handle_edlk(self, params):
608                 "3:get a lock before editing"
609                 self.not_implemented()
610 
611         def handle_edul(self, params):
612                 "3e:relinquish the edit lock"
613                 self.not_implemented()
614 
615         def handle_edit(self, params):
616                 "3e:begin editing an item"
617                 self.not_implemented()
618 
619         def handle_edix(self, params):
620                 "3e:begin editing the index"
621                 self.not_implemented()
622 
623         def handle_edcf(self, params):
624                 "3e:confirm (finish) an edit"
625                 self.not_implemented()
626 
627         def handle_edab(self, params):
628                 "3e:abort an edit"
629                 self.not_implemented()
630 
631         def handle_mots(self, params):
632                 "3d:set the message of the day"
633                 self.not_implemented()
634 
635         def handle_elog(self, params):
636                 "1:list all administrative edits"
637                 self.not_implemented()
638 
639         def handle_diff(self, params):
640                 "3x:?"
641                 self.not_implemented()
642 
643         def handle_quit(self, params=None):
644                 "0:log out"
645 		self.boilerplate('goodbye')
646 		self.exit_program()
647 
648 	def exit_program(self):
649 		sys.stdout.flush()
650 		self.flush_log()
651 		sys.exit()
652 
653         def handle_regu(self, params):
654                 "0:get yourself a new account"
655 		if self.user:
656 			self.boilerplate('unexpected-regu')
657 		elif self.creating_account:
658 			self.boilerplate('double-regu')
659 		else:
660 			self.boilerplate('regu-spiel-begin')
661 			for line in self.conf('registration-message').split('\n'):
662 				self.output_line(' '+line)
663 			self.output_line('Use the command USER <new id> to continue, or QUIT to quit.')
664 			self.end_output()
665 
666 			self.creating_account = 1
667 
668         def handle_udbm(self, params):
669                 "3:database maintenance"
670                 self.not_implemented()
671 
672         def handle_noop(self, params):
673                 "0:does nothing"
674 		self.boilerplate('noop')
675 
676         def handle_xyzz(self, params):
677 		"0x:magic word"
678 		self.boilerplate('xyzzy')
679 
680         def handle_help(self, params):
681                 "0:list all the commands"
682 
683 		self.boilerplate('help-spiel-begin')
684 		methods = self.__class__.__dict__.keys()
685 		methods.sort()
686 		for method in methods:
687 			if method.startswith('handle_'):
688 				docstring = self.__class__.__dict__[method].__doc__
689 
690 				if int(docstring[0]) > self.access_level:
691 					continue
692 				
693 				colon = docstring.index(':')
694 				flags = docstring[1:colon]
695 
696 				if 'x' in flags:
697 					continue
698 				if 'd' in flags:
699 					docstring += ' (needs some DATA)'
700 				if 'e' in flags:
701 					docstring += ' (needs the edit lock)'
702 				
703 				self.output_line('%s - %s' % (
704 					method[7:].upper(),
705 					docstring[colon+1:]))
706 
707 		self.end_output()
708 
709 	def not_implemented(self):
710 		self.boilerplate('not-implemented')
711 
712 	def new_sequence(self):
713 		"Returns a new sequence number."
714 
715 		# FIXME: need contention control
716 
717 		result = 0
718 		filename = self.filename('sequence')
719 		if (os.path.isfile(filename)):
720 			# good, we already have a number
721 			result = int(open(filename).readline(),16)
722 
723 		# Now, the slightly trickier part:
724 		# write the number back for the next time.
725 
726 		open(filename, 'w').write('%08x' % (result+1))
727 
728 		return result
729 		
730 	def new_itemid(self):
731 		"Returns a GROGGS-style itemid."
732 
733 		# We may as well use the GROGGS year-lettering system,
734 		# rather than starting again at A. GROGGS's "A" year was 1985.
735 		#
736 		# Note that the spec says that the form of an itemid is:
737 		#         one letter indicating the year followed by
738 		#         3 digits for the day and 4 for the time
739 		#
740 		# but it's not explained how those digits map to the day or time,
741 		# so they're necessarily opaque! Given this, we just use the last
742 		# seven digits of the Unix timestamp to end itemids with. (The spec
743 		# also says that the letter indicates the year, without saying how
744 		# it maps to real years; but since the mapping of GROGGS year letters
745 		# to years is generally recognised by users, we keep to the same
746 		# system.)
747 		#
748 		# UPDATE: Inspection of itemids produced by IWJ's system indicates
749 		# that the format is Ydddhhmm, where Y is a year letter as above,
750 		# ddd is the number of days through the year, and hhmm is the time
751 		# in the 24h clock. This seems a lot of extra work to implement,
752 		# particularly since it's not obvious until you've read a lot of
753 		# itemids how it works. I think that, until I find a reason to
754 		# do otherwise, the numeric part of our itemids will be random.
755 		# I'm reserving itemids with a 9 in the second place for magic uses,
756 		# should such a need ever arise.
757 
758 		while 1:
759 			result = '%c%07d' % (
760 				65+time.gmtime()[0]-1985,
761 				random.randrange(0,9000000))
762 
763 			if not os.path.isfile(self.filename(result)):
764 				return result
765 
766 	def item_header(self, sequence, timestamp, itemid=None):
767 		result = '^%08x %08x\n' % (sequence, timestamp)
768 		date = self.neat_date(timestamp)
769 
770 		if itemid:
771 			result += 'Item ' + itemid
772 		else:
773 			result += 'Reply'
774 
775 		result += ' from '
776 
777 		# There are two equivalent forms we can use here.
778 		# The choice between them depends on the potential
779 		# length of the first line.
780 		
781 		if len(self.user+self.grogname)>52:
782 			result += '%s %s\nFrom %s\n'