#!/usr/bin/python2.4
# -*- coding: utf-8 -*-
class AtomobotLanguageError( Exception ):
pass
class LanguageText( object ):
def __init__( self, lang, text ):
self.lang = lang
self.text = text
self.decomposed = tuple( self.lang.decompose( text ) )
def __str__( self ):
return self.text
def __repr__( self ):
return "LanguageText('%s', %s)" % ( self.lang.code, repr( self.text ) )
def __hash__( self ):
return hash( self.decomposed )
def __cmp__( self, other ):
if isinstance( other, LanguageText ):
return cmp( self.decomposed, other.decomposed )
elif isinstance( other, basestring ):
otherlangtext = LanguageText( self.lang, other )
return cmp( self, otherlangtext )
else:
raise TypeError()
class AtomobotLanguage( object ):
plurals = {}
def __init__( self ):
self.number = 0
self.letters = [ u'?' ]
self.number_by_letter = {}
self.cgroups_by_first = {}
self.letters_by_first = {}
self.ch_types = {}
def new_number( self ):
self.number += 1
return self.number
def plural( self, number, name ):
forms = self.plurals.get( name, None )
if not forms:
return name
number = abs( number )
if number >= 5:
return forms[5]
return forms[ number ]
def add_letter( self, letter, ch_type='letter' ):
number = self.new_number()
self.letters.append( letter )
self.number_by_letter[ letter ] = number
cgroups = self.cgroups_by_first.setdefault( letter[0], {} )
samelets = cgroups.setdefault( len( letter ), [] )
samelets.append( letter )
self.ch_types.setdefault( ch_type, set() ).add( letter )
def tidy_letters( self ):
for letter, cgroups in self.cgroups_by_first.iteritems():
lst = self.letters_by_first.setdefault( letter, [] )
for size in reversed( sorted( cgroups.keys() ) ):
lst.extend( cgroups[ size ] )
def decompose( self, text ):
text = text.upper()
decomposed = []
pos = 0
while True:
if pos >= len( text ):
break
ch = text[ pos ]
firstlets = self.letters_by_first.get( ch, None )
if not firstlets:
decomposed.append( 0 )
pos += 1
continue
for firstlet in firstlets:
if pos + len( firstlet ) > len( text ):
continue
cpos = 1
mismatched = False
for fl in firstlet[ 1: ]:
if fl != text[ pos + cpos ]:
mismatched = True
break
if mismatched:
continue
decomposed.append( self.number_by_letter[ firstlet ] )
pos += len( firstlet )
break
return decomposed
def compose( self, decotext ):
return u''.join( [ self.letters[ number ] for number in decotext ] )
def compare_ci( self, text1, text2 ):
dec1 = self.decompose( text1 )
dec2 = self.decompose( text2 )
return cmp( dec1, dec2 )
class AtomobotLanguageSlovak( AtomobotLanguage ):
code = 'sk'
plurals = {
'článok': ( u'článkov', u'článok', u'články', u'články', u'články', u'článkov' ),
'kategória': ( u'kategórií', u'kategória', u'kategórie', u'kategórie', u'kategórie', u'kategórií' ),
}
TEXT_UPDATE = u'Atomobot :: aktualizácia'
MONTH_NAME = {
1: u'január', 2: u'február', 3: u'marec', 4: u'apríl', 5: u'máj', 6: u'jún', 7: u'júl',
8: u'august', 9: u'september', 10: u'október', 11: u'november', 12: u'december' }
MONTH_NAME_GEN = {
1: u'januára', 2: u'februára', 3: u'marca', 4: u'apríla', 5: u'mája', 6: u'júna', 7: u'júla',
8: u'augusta', 9: u'septembra', 10: u'októbra', 11: u'novembra', 12: u'decembra' }
def __init__( self ):
super( AtomobotLanguageSlovak, self ).__init__()
self.init_collated_letters()
self.tidy_letters()
def format_number( self, number, places=0 ):
minus = False
number = float( number )
formatstr = '%%.%sf' % places
numberstr = formatstr % number
parts = numberstr.split( '.', 1 )
if len( parts ) > 1:
pre, post = parts
else:
pre = parts[0]
post = ''
if pre.startswith( '-' ):
pre = pre[1:]
minus = True
final_number = []
grouppos = 0
for digit in reversed( pre ):
if grouppos >= 3:
final_number.insert( 0, ' ' )
grouppos = 0
final_number.insert( 0, digit )
grouppos += 1
if minus:
final_number.insert( 0, '-' )
final_str = ''.join( final_number )
if post:
final_str += ',' + post
return final_str
def init_collated_letters( self ):
self.add_letter( u' ', 'symbol' )
self.add_letter( u'!', 'symbol' )
self.add_letter( u'"', 'symbol' )
self.add_letter( u'#', 'symbol' )
self.add_letter( u'$', 'symbol' )
self.add_letter( u'.', 'symbol' )
self.add_letter( u',', 'symbol' )
self.add_letter( u'-', 'symbol' )
self.add_letter( u'(', 'symbol' )
self.add_letter( u')', 'symbol' )
self.add_letter( u'0', 'number' )
self.add_letter( u'1', 'number' )
self.add_letter( u'2', 'number' )
self.add_letter( u'3', 'number' )
self.add_letter( u'4', 'number' )
self.add_letter( u'5', 'number' )
self.add_letter( u'6', 'number' )
self.add_letter( u'7', 'number' )
self.add_letter( u'8', 'number' )
self.add_letter( u'9', 'number' )
self.add_letter( u'A' )
self.add_letter( u'Á' )
self.add_letter( u'Â' )
self.add_letter( u'Ä' )
self.add_letter( u'Å' )
self.add_letter( u'Æ' )
self.add_letter( u'B' )
self.add_letter( u'C' )
self.add_letter( u'Ć' )
self.add_letter( u'Č' )
self.add_letter( u'Ç' )
self.add_letter( u'D' )
self.add_letter( u'Ď' )
self.add_letter( u'DZ' )
self.add_letter( u'DŽ' )
self.add_letter( u'Ð' )
self.add_letter( u'E' )
self.add_letter( u'É' )
self.add_letter( u'Ě' )
self.add_letter( u'Ę' )
self.add_letter( u'F' )
self.add_letter( u'G' )
self.add_letter( u'H' )
self.add_letter( u'CH' )
self.add_letter( u'I' )
self.add_letter( u'Ì' )
self.add_letter( u'Í' )
self.add_letter( u'Î' )
self.add_letter( u'Ï' )
self.add_letter( u'Į' )
self.add_letter( u'J' )
self.add_letter( u'K' )
self.add_letter( u'L' )
self.add_letter( u'Ĺ' )
self.add_letter( u'Ľ' )
self.add_letter( u'Ł' )
self.add_letter( u'M' )
self.add_letter( u'N' )
self.add_letter( u'Ń' )
self.add_letter( u'Ň' )
self.add_letter( u'Ñ' )
self.add_letter( u'O' )
self.add_letter( u'Ó' )
self.add_letter( u'Ô' )
self.add_letter( u'Ö' )
self.add_letter( u'Ő' )
self.add_letter( u'Œ' )
self.add_letter( u'Ø' )
self.add_letter( u'P' )
self.add_letter( u'Q' )
self.add_letter( u'R' )
self.add_letter( u'Ŕ' )
self.add_letter( u'Ř' )
self.add_letter( u'S' )
self.add_letter( u'Ś' )
self.add_letter( u'Š' )
self.add_letter( u'Ş' )
self.add_letter( u'T' )
self.add_letter( u'Ť' )
self.add_letter( u'U' )
self.add_letter( u'Ú' )
self.add_letter( u'Ů' )
self.add_letter( u'Ü' )
self.add_letter( u'Ű' )
self.add_letter( u'V' )
self.add_letter( u'W' )
self.add_letter( u'X' )
self.add_letter( u'Y' )
self.add_letter( u'Ý' )
self.add_letter( u'Z' )
self.add_letter( u'Ź' )
self.add_letter( u'Ž' )