FML 读取
读取时有点儿问题,后来发现每条最后都是以 "FML" 结尾。
#! /usr/bin/python # coding:cp936 # CopyRight 2011 Adou XD , All Rights Reserved import os import sys import urllib import urllib2 import re class fml() : '''FMyLife ''' sites = ("http://www.fmylife.com/%s?page=%d", # home "http://www.fmylife.com/tops/%s/day?page=%d", # day "http://www.fmylife.com/tops/%s/week?page=%d", # week "http://www.fmylife.com/tops/%s/month?page=%d", # month "http://www.fmylife.com/tops/%s?page=%d" # all time ) sorts = ( 'top' , 'flop' , 'comment', 'favorite' ) def __init__(self): self.page = 0 self.site = fml.sites[0] self.sortt = '' def chose(self) : print 'Time Period:' print '---1-- Home' print '---2-- Top of the day' print '---3-- Top of the week' print '---4-- Top of the month' print '---5-- All time' num = -1 while num<0 or num>=len(fml.sites) : num = input(">>") - 1 self.site = fml.sites[num] if num != 0 : self.chosesort() def chosesort(self) : print 'Sorted by :' print '---1-- Agreed' print '---2-- Deserved' print '---3-- Comment' print '---4-- Favorite' num = -1 while num<0 or num>=len(fml.sorts) : num = input(">>") - 1 self.sortt = fml.sorts[num] def read(self) : '''read one page ''' url = self.site % (self.sortt,self.page) self.page += 1 sdata = urllib2.urlopen(url).read() content = re.findall(r'(?<=class="fmllink">).*?(?=</a>)',sdata) msg = [] story = '' for line in content : story += line if 'FML' in line: msg.append(story) story = '' return msg if __name__ == "__main__": fm = fml() fm.chose() msg = fm.read() for ms in msg : print print ms