The UNIX and Linux Forums  

Go Back   The UNIX and Linux Forums > Top Forums > Shell Programming and Scripting
.
google unix.com




View Single Post in the UNIX and Linux Forums - Click on the Thread or Permalink to View Entire Thread -->
  #4 (permalink)  
Old 05-26-2009
ghostdog74 ghostdog74 is offline Forum Advisor  
Registered User
  
 

Join Date: Sep 2006
Posts: 2,546
if you have Python, an almost full alternative solution
Code:
 
#!/usr/bin/env python
import urllib2,re
pat=re.compile(""".*<span class="tdBlancBold">(.*)<div align="center">.*""",re.M|re.DOTALL)
days=['lundi','mardi','mercredi','jeudi','vendredi','samedi','dimanche']
url="http://www.natureetdecouvertes.com/pages/gener/view_FO_STORE_corgen.asp?mag_cod=%s"
for num in range(101,174):
    page=urllib2.urlopen(url % str(num))    
    data=page.read()
    if not "Impossible" in data:
        result = pat.findall(data)       
        store={}
        for i in result:
            for j in i.split("<br>"):
                j=j.strip()
                if j.startswith("le"):
                    j=j.split()
                    if j[1] in days:
                        t1,t2=j[-3],j[-1]
                        store.setdefault(j[1],[])
                        store[j[1]].extend([t1,t2])
        for DAY in days:
            try:
                print "%s |" %( ' '.join(store[DAY])),
            except: 
                print "\t\t|",
        print ""    
    else:
        print "Page not found ",url % str(num)
extract of output :
Code:
# python test.py
10.00 20.00 | 10.00 20.00 | 10.00 20.00 | 10.00 20.00 | 10.00 20.00 | 10.00 20.00 | 10.00 19.00 |
10.00 20.00 | 10.00 20.00 | 10.00 20.00 | 10.00 20.00 | 10.00 20.00 | 10.00 20.00 |             |
10.00 20.00 | 10.00 20.00 | 10.00 20.00 |               | 10.00 21.00 | 10.00 20.00 |           |
10.00 20.00 | 10.00 20.00 | 10.00 20.00 | 10.00 20.00 | 10.00 20.00 | 10.00 20.00 |             |
10.00 19.30 | 10.00 19.30 | 10.00 19.30 | 10.00 19.30 | 10.00 19.30 | 10.00 19.30 |             |
10.00 21.00 | 10.00 21.00 | 10.00 21.00 | 10.00 21.00 | 10.00 21.00 | 10.00 20.00 |             |
10.00 19.30 | 10.00 19.30 | 10.00 19.30 | 10.00 19.30 | 10.00 19.30 | 10.00 19.30 |             |
10.00 20.00 | 10.00 21.00 | 10.00 21.00 | 10.00 21.00 | 10.00 21.00 | 10.00 20.00 |             |
9.30 19.30 | 9.30 19.30 | 9.30 19.30 | 9.30 19.30 | 9.30 19.30 | 9.30 19.30 |           |
Page not found  http://www.natureetdecouvertes.com/pages/gener/view_FO_STORE_corgen.asp?mag_cod=110
10.00 20.00 | 10.00 20.00 | 10.00 20.00 | 10.00 20.00 | 10.00 20.00 | 10.00 20.00 |             |
10.00 19.30 | 10.00 19.30 | 10.00 19.30 | 10.00 19.30 | 10.00 19.30 | 10.00 19.30 |             |
10.00 20.00 | 10.00 20.00 | 10.00 20.00 | 10.00 20.00 | 10.00 20.00 | 10.00 20.00 |             |
10.00 20.00 | 10.00 20.00 | 10.00 20.00 | 10.00 20.00 | 10.00 20.00 | 10.00 20.00 |             |
9.30 19.30 | 9.30 19.30 | 9.30 19.30 | 9.30 19.30 | 9.30 19.30 | 9.30 19.30 |           |