import urllib2, re, sys, json, traceback
from BeautifulSoup import BeautifulSoup
full_report = int(sys.argv[1])

if full_report:
    print "NOTARY'S CONTRACT TEXT REPORT"
    print '--------------------------------------'
else:
    data = []

for con in sorted(set(re.findall('href="/([^"]+)"', urllib2.urlopen('http://agora-notary.wikidot.com/system:page-tags/tag/contract').read()))):
    if con in ('what-is-a-wiki-site', 'start', 'how-to-edit-pages', 'text-of-each-contract') or ':' in con or '.gif' in con: continue
    try:
        rq = urllib2.urlopen('http://agora-notary.wikidot.com/' + con)
        token = re.search('token7=([^;]+)', rq.headers['Set-Cookie']).group(1)
        txt = rq.read()
        if 'Agora Nomic' not in txt: continue
        title = str(BeautifulSoup(re.search('<title>(.*)</title>', txt).group(1), convertEntities=BeautifulSoup.HTML_ENTITIES)).split(':', 1)[1].strip()
        page_id = re.search('pageId = ([0-9]+)', txt).group(1)
        tags = [i for i in re.findall('page-tags/tag/([a-z]+)#pages">', txt) if i != 'contract']
        #source = re.sub('\s*(?<!\n)\n(?![\*\+\n])\s*', ' ', source).strip()
        if full_report:
            source = json.loads(urllib2.urlopen(urllib2.Request('http://agora-notary.wikidot.com/ajax-module-connector.php', 'page_id=%s&moduleName=viewsource%%2FViewSourceModule&callbackIndex=1&wikidot_token7=%s' % (page_id, token), {'Cookie':'wikidot_token7='+token} )).read())['body'].replace('\n', '')
            source = source.replace('<div class="page-source">', '').replace('<br />', '\n').replace('</div />', '').replace('</div>', '').replace('<h1>Page source</h1>', '')
            source = re.sub('\[\[\[(.*\|)?(.*?)\]\]\]', '\\2', source)
            source = str(BeautifulSoup(source, convertEntities=BeautifulSoup.HTML_ENTITIES)).replace('\xc2\xa0\xc2\xa0', '')
            source = source.strip()
            print 
            print '--------------------------------------'
            print title
            print
            print source
            print 'Tags:', ', '.join(tags)
        else:
            data.append((title, ', '.join(tags)))
            print >> sys.stderr, data[-1]
        sys.stdout.flush()
        sys.stderr.flush()
    except:
        print >> sys.stderr, 'Problem with', con
        traceback.print_exc()

if not full_report:
    print "NOTARY'S CONTRACT SWITCH REPORT"
    dlen = 2 + max(len(i[0]) for i in data)
    elen = 2 + max(len(i[1]) for i in data)
    print '-'*(dlen+elen)
    sys.stdout.write('Contract'.ljust(dlen))
    print 'Tags'
    for a, b in data:
        sys.stdout.write(a.ljust(dlen))
        print b

