Source code for sparkkgml.semantification

from rdflib import Graph, Literal, Namespace, URIRef,BNode
from datetime import datetime


[docs] class Semantification: """ The Semantification class is designed to semantify data by creating an RDF graph and serializing it to a destination file. Attributes: - _graph (rdflib.Graph): The RDF graph used to store the semantified data. Methods: - __init__(): Initializes the Semantification class by creating an empty RDF graph. - get_graph(): Retrieves the RDF graph. - set_graph(graph): Sets the RDF graph to the specified graph. - semantify(df, namespace, exp_uri='uri', exp_label='label', exp_prediction='prediction', dest='experiment.ttl'): Semantifies the given DataFrame by adding the data to the RDF graph and serializing it. """ def __init__(self): # Create an RDF graph self._graph = Graph() # getter functions
[docs] def get_graph(self): return self._graph
# setter functions
[docs] def set_graph(self, graph): self._graph= graph
[docs] def semantify(self, df, namespace, exp_uri='uri', exp_label='label', exp_prediction='prediction', dest='experiment.ttl'): """ Semantifies the given DataFrame by creating an RDF graph and serializing it to a destination file. Parameters: - df (pyspark.sql.DataFrame): The DataFrame containing the data to semantify. - namespace (str): The namespace string used to define RDF namespaces for the experiment. - exp_uri_column (str): The column name in the DataFrame representing the URI of each instance. - exp_label_column (str): The column name in the DataFrame representing the label of each instance. - exp_prediction_column (str): The column name in the DataFrame representing the prediction of each instance. - dest (str): The destination file path where the RDF graph will be serialized. Default is 'experiment.ttl'. Returns: None """ # Define namespaces EX_NS = Namespace(namespace) self._graph.bind("experiment", EX_NS) # Define experiment type properties experimentTypePropertyURIasString = "http://www.w3.org/1999/02/22-rdf-syntax-ns#type" #experimentTypeURIasString = "https://example/Vocab/experiment" # Create experiment node #metagraphDatetime = datetime.now() #experimentHash = str(hash(str(metagraphDatetime))) #experimentNode = BNode(experimentHash) # Create experiment type properties nodes #experimentTypePropertyNode = URIRef(experimentTypePropertyURIasString) #experimentTypeNode = URIRef(experimentTypeURIasString) # Add experiment type triple #self._graph.add((experimentNode, experimentTypePropertyNode, experimentTypeNode)) for row in df.rdd.collect(): # Get the values from the DataFrame row uri = row[exp_uri] label = row[exp_label] prediction = row[exp_prediction] # Create a URIRef for the instance node instance_node = URIRef(uri) # Add the label and prediction relations self._graph.add((instance_node, EX_NS.label, Literal(label))) self._graph.add((instance_node, EX_NS.prediction, Literal(prediction))) #self._graph.add((experimentNode, EX_NS.hasPrediction, Literal(prediction))) self._graph.serialize(destination=dest)