PyCon India 2014
September 27-28, 2014
I volunteered to provide technical assitance to an election campaign in Bangalore (and also in Andhra Pradesh).
from bs4 import BeautifulSoup import urllib2 def parse(html): soup = BeautifulSoup(html) # find all tds in a table rows = soup.select("#ctl00_ContentPlaceHolder1_GridView1 tr") # extract text for all rows except the header row for tr in rows[1:]: tds = tr.find_all("td") yield [td.get_text() for td in tr.find_all("td")] URL = ("http://ceokarnataka.kar.nic.in/ElectionFinalroll2014/" + "Part_List.aspx?ACNO=158") html = urllib2.urlopen(URL).read() data = parse(html)
@cache.disk_memoize("cache/wp.html") def get_wp_page(): return urllib2.urlopen(WP_URL).read() @cache.disk_memoize("cache/table_{0}.json") def get_table_for_state(state): ... @cache.disk_memoize("cache/{state_name}_pc.tsv") def get_pc_list(state_name): return [['PC{0:02d}'.format(int(row[0])), row[1].strip()] for row in get_table_for_state(state_name)]
@cache.disk_memoize("cache/MP/districts.json") def get_districts(self): ... @cache.disk_memoize("cache/MP/AC{ac:03d}_booths.tsv") def get_booths_of_ac(self, dist, ac): ... @cache.disk_memoize("cache/map/{1[state]}/district_{1[district]}_acs.json") def get_district_acs(self, district): ...
@cache.disk_memoize("cache/MP/districts.json") def get_districts(self): return self.browser.get_select_options("ddlDistrict") @cache.disk_memoize("cache/MP/AC{ac:03d}_booths.tsv") def get_booths_of_ac(self, dist, ac): self.browser.select_option('ddlDistrict', dist) self.browser.select_option('ddlAssembly', ac) soup = self.browser.get_soup() ...
def parse_ward(self):
section = self.read_section(self.text,
"2. DETAILS OF PART & POLLING AREA",
"3. POLLING STATION DETAILS")
start_index = self.get_column_index(section,
"Ward No.", "Taluka", "Police Station", "District")
text = self.select_window(section, start_index, 1000)
ward_info = self.extract_text(text, "Ward No.", "Police Station")
def parse_ward(self):
section = self.read_section(self.text,
"2. DETAILS OF PART & POLLING AREA",
"3. POLLING STATION DETAILS")
start_index = self.get_column_index(section,
"Ward No.", "Taluka", "Police Station", "District")
text = self.select_window(section, start_index, 1000)
ward_info = self.extract_text(text, "Ward No.", "Police Station")
def parse_ward(self):
section = self.read_section(self.text,
"2. DETAILS OF PART & POLLING AREA",
"3. POLLING STATION DETAILS")
start_index = self.get_column_index(section,
"Ward No.", "Taluka", "Police Station", "District")
text = self.select_window(section, start_index, 1000)
ward_info = self.extract_text(text, "Ward No.", "Police Station")
def parse_ward(self):
section = self.read_section(self.text,
"2. DETAILS OF PART & POLLING AREA",
"3. POLLING STATION DETAILS")
start_index = self.get_column_index(section,
"Ward No.", "Taluka", "Police Station", "District")
text = self.select_window(section, start_index, 1000)
ward_info = self.extract_text(text, "Ward No.", "Police Station")
Thanks to Open Bangalore and DataMeet group for map boundaries.
I continued to mess with more government data and improve the system.
Interested to collaborate?
Table of Contents | t |
---|---|
Exposé | ESC |
Full screen slides | e |
Presenter View | p |
Source Files | s |
Slide Numbers | n |
Toggle screen blanking | b |
Show/hide slide context | c |
Notes | 2 |
Help | h |