Schema “Flats”

Example end to end:

import requests
import json
import re
from pprint import pprint
from dphelper import DPHelper
helper = DPHelper(is_verbose=True)
headers = helper.create_headers(authority="https://berzunamai.lt/butai/")
 
content = helper.from_url('https://berzunamai.lt/butai/', headers=headers)
# content of table
rg = re.compile('<tr class=".*?"><td class=".*?">(.*?)</td><td class=".*?">(.*?)</td><td class=".*?">(.*?)</td><td class=".*?">(.*?)</td><td class=".*?">(.*?)</td><td class=".*?">(.*?)</td><td class=".*?">(.*?)</td><td class=".*?">(.*?)</td><td class=".*?"><a href="(.*?)".*?></a>.*?</td><td class=".*?">(.*?)</td></tr>')
results = rg.findall(content)
# columns to get
apts = helper.parse_rows(
  ['id','floor_romanian','area','rooms','orientation','price','status','empty_column','www','floor'],
results,
verbose=True,    
)
# add flat link in to 'www' column
for row in apts:
    row['www'] = f"https://berzunamai.lt/butai/{row['floor']}-aukstas/{row['id']}-butas/"
 
# export to JSON
print(json.dumps(apts))

Leave a Reply

Your email address will not be published. Required fields are marked *