import urllib2 import networkx as nx import unicodedata as ud import pydot def remove_accents(input_str): "Removes accents" return ud.normalize('NFKD', input_str.replace('\xc3\x9f','ss').decode('utf8')).encode('ASCII', 'ignore') def get_source(url): usock = urllib2.urlopen(url) data = usock.read() usock.close() return data def get_name(source_lines, name_prefix = 'The Mathematics Genealogy Project - ', name_suffix = ''): for line in source_lines: if name_prefix in line: return remove_accents( line.split(name_prefix)[1].split(name_suffix)[0] ) def get_decendants(url, max_depth = 3, url_prefix = 'http://genealogy.math.ndsu.nodak.edu/', dec_prefix = 'id.php'): source = get_source(url) source_lines = source.split('\n') name = get_name(source_lines) print name, ': ', url if not max_depth: return dec_lines = source.split('Descendants') if len(dec_lines) > 1: dec_lines = [x for x in dec_lines[1].split('table')[0].split('\n') if dec_prefix in x] dec_urls = [x.split('href="')[-1].split('"')[0] for x in dec_lines] for u in dec_urls: get_decendants(url_prefix + u, max_depth-1) return def get_advisors(url, max_depth = 3, url_prefix = 'http://genealogy.math.ndsu.nodak.edu/', advisor_prefix = 'id.php'): source = get_source(url) source_lines = source.split('\n') name = get_name(source_lines) print max_depth, name, ': ', url if not max_depth: return advisor_lines = [x for x in source_lines if 'Advisor' in x] if len(advisor_lines) == 0: return [] else: advisor_urls = [] for line in advisor_lines: advisor_urls += [advisor for advisor in line.split('"') if advisor_prefix in advisor] if advisor_urls: for u in advisor_urls: get_advisors(url_prefix + u, max_depth-1) return def get_decendant_tree(url = 'http://genealogy.math.ndsu.nodak.edu/id.php?id=91041', max_depth = 3, advisor = None, graph = None, url_prefix = 'http://genealogy.math.ndsu.nodak.edu/', dec_prefix = 'id.php', ): ## Add current node to graph source = get_source(url) source_lines = source.split('\n') name = get_name(source_lines) ## Check whether user-called and create graph if so if advisor == None: graph = nx.DiGraph() graph.add_node(name) ## If recursive-call add edge to descendent else: graph.add_edge(advisor, name) if not max_depth: return name, graph dec_lines = source.split('Descendants') if len(dec_lines) > 1: dec_lines = [x for x in dec_lines[1].split('table')[0].split('\n') if dec_prefix in x] dec_urls = [x.split('href="')[-1].split('"')[0] for x in dec_lines] for u in dec_urls: get_decendant_tree(url_prefix + u, max_depth-1, name, graph) return name, graph def get_advisor_tree(url = 'http://genealogy.math.ndsu.nodak.edu/id.php?id=91041', max_depth = 3, descendent = None, graph = None, url_prefix = 'http://genealogy.math.ndsu.nodak.edu/', advisor_prefix = 'id.php', ): ## Add current node to graph source = get_source(url) source_lines = source.split('\n') name = get_name(source_lines) ## Check whether user-called and create graph if so if descendent == None: graph = nx.DiGraph() graph.add_node(name) ## If recursive-call add edge to descendent else: graph.add_edge(name, descendent) if not max_depth: return name, graph advisor_lines = [x for x in source_lines if 'Advisor' in x] if len(advisor_lines) == 0: return [] else: advisor_urls = [] for line in advisor_lines: advisor_urls += [advisor for advisor in line.split('"') if advisor_prefix in advisor] if advisor_urls: for u in advisor_urls: get_advisor_tree(url_prefix + u, max_depth-1, name, graph) return name, graph def store_decendant_tree(url = 'http://genealogy.math.ndsu.nodak.edu/id.php?id=91041', max_depth = 3): """ Gets a URL of a Mathematician from the "Mathematics Genealogy Project Webpage" and generates a *.txt file with the list of desendant and a *.dot file with the family tree (can be easily translated to pdf/ps/png/gif/etc.) [Inputs] @URL: Copy here the URL of the mathematician from the webpage http://genealogy.math.ndsu.nodak.edu @max_depth: the depth of the tree of ancestors you want to search to [Outputs] *.dot file with the tree of desendants *.txt file with a list of ancestors """ name, G = get_decendant_tree(url, max_depth, None, None) f = open('%s_desendants.txt' % name.replace(' ','').replace('(','').replace(')',''), 'w') f.write("\n".join(G.nodes())) f.close() D = nx.to_agraph(G) D.layout(prog = 'dot') D.write('%s_desendant_tree.dot' % name.replace(' ','').replace('(','').replace(')','')) def store_advisor_tree(url = 'http://genealogy.math.ndsu.nodak.edu/id.php?id=91041', max_depth = 3): """ Gets a URL of a Mathematician from the "Mathematics Genealogy Project Webpage" and generates a *.txt file with the list of advisors and a *.dot file with the family tree (can be easily translated to pdf/ps/png/gif/etc.) [Inputs] @URL: Copy here the URL of the mathematician from the webpage http://genealogy.math.ndsu.nodak.edu @max_depth: the depth of the tree of ancestors you want to search to [Outputs] *.dot file with the tree of desendants *.png file with the tree of desendants *.txt file with a list of ancestors """ name, G = get_advisor_tree(url, max_depth, None, None) f = open('%s_ancestors.txt' % name.replace(' ','').replace('(','').replace(')',''), 'w') f.write("\n".join(G.nodes())) f.close() D = nx.to_agraph(G) D.layout(prog = 'dot') filename = '%s_ancestor_tree' % name.replace(' ','').replace('(','').replace(')','') # Make a dot file D.write(filename + '.dot') # Translate dot to png pydot_graph = pydot.graph_from_dot_file(filename + '.dot') pydot_graph.write_png(filename + '.png') def main(): print "aloha" if __name__ == "__main__": main()