from atropine import go, check, special
from atropine.atropine import Atropine
import re
atropine = Atropine('''
<!-- snip -->
<table id="earningsTable">
<tbody>
<tr>
<td class="headerTableCell">
Quarterly Earnings
</td>
<td class="dataTableCell">
<span class="unhelpfulClassName">GBP</span>
<span class="unhelpfulClassName">123.45</span>
</td>
</tr>
</tbody>
</table>''', ignorewhitespace=True)
qearningsregex = re.compile(r'quarterly earnings', re.IGNORECASE)
atropine = atropine.resolve(go.only(tag='table', attrs=dict(id='earningsTable')),
go.child(0), check.has(tag='tbody'),
go.child(0), check.has(tag='tr'),
go.child(0), check.has(tag='td',
cls='headerTableCell',
onlytext=qearningsregex),
go.nextsib, check.has(tag='td', cls='dataTableCell'),
special.collect('earnings-info', alltext=True))
(currency, amount) = atropine.collection['earnings-info']
amount = int(float(amount) * 100)
|