PyCon India 2014
September 27-28, 2014
I volunteered to provide technical assitance to an election campaign in Bangalore (and also in Andhra Pradesh).













from bs4 import BeautifulSoup import urllib2 def parse(html): soup = BeautifulSoup(html) # find all tds in a table rows = soup.select("#ctl00_ContentPlaceHolder1_GridView1 tr") # extract text for all rows except the header row for tr in rows[1:]: tds = tr.find_all("td") yield [td.get_text() for td in tr.find_all("td")] URL = ("http://ceokarnataka.kar.nic.in/ElectionFinalroll2014/" + "Part_List.aspx?ACNO=158") html = urllib2.urlopen(URL).read() data = parse(html)
@cache.disk_memoize("cache/wp.html") def get_wp_page(): return urllib2.urlopen(WP_URL).read() @cache.disk_memoize("cache/table_{0}.json") def get_table_for_state(state): ... @cache.disk_memoize("cache/{state_name}_pc.tsv") def get_pc_list(state_name): return [['PC{0:02d}'.format(int(row[0])), row[1].strip()] for row in get_table_for_state(state_name)]
@cache.disk_memoize("cache/MP/districts.json")
def get_districts(self):
...
@cache.disk_memoize("cache/MP/AC{ac:03d}_booths.tsv")
def get_booths_of_ac(self, dist, ac):
...
@cache.disk_memoize("cache/map/{1[state]}/district_{1[district]}_acs.json")
def get_district_acs(self, district):
...

@cache.disk_memoize("cache/MP/districts.json")
def get_districts(self):
return self.browser.get_select_options("ddlDistrict")
@cache.disk_memoize("cache/MP/AC{ac:03d}_booths.tsv")
def get_booths_of_ac(self, dist, ac):
self.browser.select_option('ddlDistrict', dist)
self.browser.select_option('ddlAssembly', ac)
soup = self.browser.get_soup()
...


def parse_ward(self):
section = self.read_section(self.text,
"2. DETAILS OF PART & POLLING AREA",
"3. POLLING STATION DETAILS")
start_index = self.get_column_index(section,
"Ward No.", "Taluka", "Police Station", "District")
text = self.select_window(section, start_index, 1000)
ward_info = self.extract_text(text, "Ward No.", "Police Station")

def parse_ward(self):
section = self.read_section(self.text,
"2. DETAILS OF PART & POLLING AREA",
"3. POLLING STATION DETAILS")
start_index = self.get_column_index(section,
"Ward No.", "Taluka", "Police Station", "District")
text = self.select_window(section, start_index, 1000)
ward_info = self.extract_text(text, "Ward No.", "Police Station")

def parse_ward(self):
section = self.read_section(self.text,
"2. DETAILS OF PART & POLLING AREA",
"3. POLLING STATION DETAILS")
start_index = self.get_column_index(section,
"Ward No.", "Taluka", "Police Station", "District")
text = self.select_window(section, start_index, 1000)
ward_info = self.extract_text(text, "Ward No.", "Police Station")

def parse_ward(self):
section = self.read_section(self.text,
"2. DETAILS OF PART & POLLING AREA",
"3. POLLING STATION DETAILS")
start_index = self.get_column_index(section,
"Ward No.", "Taluka", "Police Station", "District")
text = self.select_window(section, start_index, 1000)
ward_info = self.extract_text(text, "Ward No.", "Police Station")

Thanks to Open Bangalore and DataMeet group for map boundaries.
I continued to mess with more government data and improve the system.

Interested to collaborate?
| Table of Contents | t |
|---|---|
| Exposé | ESC |
| Full screen slides | e |
| Presenter View | p |
| Source Files | s |
| Slide Numbers | n |
| Toggle screen blanking | b |
| Show/hide slide context | c |
| Notes | 2 |
| Help | h |