FML 读取
读取时有点儿问题,后来发现每条最后都是以 "FML" 结尾。
#! /usr/bin/python
# coding:cp936
# CopyRight 2011 Adou XD , All Rights Reserved
import os
import sys
import urllib
import urllib2
import re
class fml() :
'''FMyLife
'''
sites = ("http://www.fmylife.com/%s?page=%d", # home
"http://www.fmylife.com/tops/%s/day?page=%d", # day
"http://www.fmylife.com/tops/%s/week?page=%d", # week
"http://www.fmylife.com/tops/%s/month?page=%d", # month
"http://www.fmylife.com/tops/%s?page=%d" # all time
)
sorts = ( 'top' ,
'flop' ,
'comment',
'favorite'
)
def __init__(self):
self.page = 0
self.site = fml.sites[0]
self.sortt = ''
def chose(self) :
print 'Time Period:'
print '---1-- Home'
print '---2-- Top of the day'
print '---3-- Top of the week'
print '---4-- Top of the month'
print '---5-- All time'
num = -1
while num<0 or num>=len(fml.sites) :
num = input(">>") - 1
self.site = fml.sites[num]
if num != 0 :
self.chosesort()
def chosesort(self) :
print 'Sorted by :'
print '---1-- Agreed'
print '---2-- Deserved'
print '---3-- Comment'
print '---4-- Favorite'
num = -1
while num<0 or num>=len(fml.sorts) :
num = input(">>") - 1
self.sortt = fml.sorts[num]
def read(self) :
'''read one page
'''
url = self.site % (self.sortt,self.page)
self.page += 1
sdata = urllib2.urlopen(url).read()
content = re.findall(r'(?<=class="fmllink">).*?(?=</a>)',sdata)
msg = []
story = ''
for line in content :
story += line
if 'FML' in line:
msg.append(story)
story = ''
return msg
if __name__ == "__main__":
fm = fml()
fm.chose()
msg = fm.read()
for ms in msg :
print
print ms