I had a program that read in a text file and took out the necessary variables for serialization into turtle format and storing in an RDF graph. The code I had was crude and I was advised to separate it into functions. As I am new to Python, I had no idea how to do this. Below is some of the functions of the program.
I am getting confused as to when parameters should be passed into the functions and when they should be initialized with self. Here are some of my functions. If I could get an explanation as to what I am doing wrong that would be great.
#!/usr/bin/env python
from rdflib import URIRef, Graph
from StringIO import StringIO
import subprocess as sub
class Wordnet():
def __init__(self, graph):
self.graph = Graph()
def process_file(self, file):
file = open("new_2.txt", "r")
return file
def line_for_loop(self, file):
for line in file:
self.split_pointer_part()
self.split_word_part()
self.split_gloss_part()
self.process_lex_filenum()
self.process_synset_offset()
+more functions............
self.print_graph()
def split_pointer_part(self, before_at, after_at, line):
before_at, after_at = line.split('@', 1)
return before_at, after_at
def get_num_words(self, word_part, num_words):
""" 1 as default, may want 0 as an invalid case """
""" do if else statements on l3 variable """
if word_part[3] == '0a':
num_words = 10
else:
num_words = int(word_part[3])
return num_words
def get_pointers_list(self, pointers, after_at, num_pointers, pointerList):
pointers = after_at.split()[0:0 +4 * num_pointers:4]
pointerList = iter(pointers)
return pointerList
............code to create triples for graph...............
def print_graph(self):
print graph.serialize(format='nt')
def main():
wordnet = Wordnet()
my_file = wordnet.process_file()
wordnet.line_for_loop(my_file)
if __name__ == "__main__":
main()