-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathmiddle_html.py
More file actions
67 lines (58 loc) · 2.23 KB
/
middle_html.py
File metadata and controls
67 lines (58 loc) · 2.23 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
import re
from bs4 import BeautifulSoup
ITEM_HTML = '''<html><head></head><body>
<li class="col-xs-6 col-sm-4 col-md-3 col-lg-3">
<article class="product_pod">
<div class="image_container">
<a href="catalogue/a-light-in-the-attic_1000/index.html"><img src="media/cache/2c/da/2cdad67c44b002e7ead0cc35693c0e8b.jpg" alt="A Light in the Attic" class="thumbnail"></a>
</div>
<p class="star-rating Three">
<i class="icon-star"></i>
<i class="icon-star"></i>
<i class="icon-star"></i>
<i class="icon-star"></i>
<i class="icon-star"></i>
</p>
<h3><a href="catalogue/a-light-in-the-attic_1000/index.html" title="A Light in the Attic">A Light in the ...</a></h3>
<div class="product_price">
<p class="price_color">£51.77</p>
<p class="instock availability">
<i class="icon-ok"></i>
In stock
</p>
<form>
<button type="submit" class="btn btn-primary btn-block" data-loading-text="Adding...">Add to basket</button>
</form>
</div>
</article>
</li>
</body></html>
'''
soup = BeautifulSoup(ITEM_HTML, 'html.parser')
def find_item_name():
locator = 'article.product_pod h3 a'
item_link = soup.select_one(locator)
item_name = item_link.attrs['title']
print(item_name)
def find_item_nlink():
locator = 'article.product_pod h3 a'
item_link = soup.select_one(locator).attrs['href']
# item_name = item_link.attrs['href']
print(item_link)
def find_item_price():
locator = 'article.product_pod p.price_color'
item_price = soup.select_one(locator).string # £51.77
pattern = '£([0-9]+\.[0-9])'
matcher = re.search(pattern, item_price)
print(matcher.group(0)) # £51.77
print(float(matcher.group(1)) * 0.8) # 51.77 # * 0.8 we adding a discount.
def find_item_rating():
locator = 'article.product_pod p.star-rating'
star_rating_tag = soup.select_one(locator)
classes = star_rating_tag.attrs['class']
rating_classes = [r for r in classes if r != 'star-rating']
print(rating_classes[0])
find_item_name()
find_item_nlink()
find_item_price()
find_item_rating()