/*%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% * $Id: convertipsv.pl,v 1.6 2005/12/07 15:02:35 mark Exp $ * * convertipsv.pl * Description: converts IPSV XML version into SKOS RDF/OWL. * * Author: Mark F.J. van Assem (mark@cs.vu.nl) * * %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% * * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ * Unpacking & Instructions * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ * * - place in directory with sub-dirs 'src' and 'rdf' * - place ipsv.xml in 'src' dir * - output is stored in rdf directory * (directories and files are configurable, see below) * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ * Software Requirements * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ * * Programmed for SWI-Prolog 5537 (develop release) * * http://www.swi-prolog.org/ * http://www.swi-prolog.org/packages/sgml2pl.html * * Programmed against IPSV XML version 1.0. * http://www.esd.org.uk/standards/ipsv/ * http://www.esd.org.uk/standards/ipsv/ipsv.xml * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ * To Do * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ * * - Improve error checking * - Improve comments * - Process metadata completely * * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ * Main commands * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ * * - go. * * Recommended usage: * - go. * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ * Debugging * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ * * */ % load the SWI-Prolog SGML library :- use_module(library('sgml')), use_module(library('semweb/rdf_db')). %%%%%%%%%%%%%%%%%%%%% set the namespaces %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% rdf_db:ns(ipsv, 'http://www.esd.org.uk/standards/ipsv/1.00/ipsv#'). rdf_db:ns(skos, 'http://www.w3.org/2004/02/skos/core#'). %%%%%%%%% set the directories/filenames of input and output files %%%%%%%%%%%%%% src_dir(ipsv, 'src'). % don't include slash at end! src_file(ipsv, 'ipsv.xml'). out_dir(ipsv, 'rdf'). out_file(ipsv, 'ipsv.rdf'). %%%%%%%%%%%%%% construct the complete path to output file %%%%%%%%%%%%%%%%%% % F is a code used to match to actual output file, see out_file/2. out_file_path(F, OutFilePath) :- out_dir(ipsv, Dir), out_file(F, File), concat_atom([Dir, '/', File], OutFilePath). %%%%%%%%%%%%%% load IPSV XML file %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% load_ipsv_XML_file(Content) :- src_file(ipsv, File), src_dir(ipsv, Dir), concat_atom([Dir, File], '/', Path), load_xml_file(Path, Content). %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % % Processing XML document contents - main loop % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % XML file consists of with two kinds of subelements: % one and many s. % SGML Parser turns this into a list with one element: % % [element('ControlledList', AttributeList, ContainedElements)] % % The ContainedElements contain more 'element(A,B,C)' clauses and also '\n' tokens. % 'process_content' recursively parses each list and gets out the 'element(...)'s and % processes them, ignoring anything besides the 'element(...)'s. process_content( [element('ControlledList', AttrList, ContainedElements)]) :- process_element('ControlledList', AttrList), process_list(ContainedElements). % asserts all RDF(_,_,_) statements % finds the elements in the list and processes them, ignores any other contents process_list([]) :- write('Finished processing all elements\n'). process_list([Head|Tail]) :- Head = element(Name, A, B), process_element(Name, A, B), process_list(Tail) ; % it's not an element, process rest of list Head \= element(Name, _A, _B), process_list(Tail). process_element('Item', A,B) :- debug(processing, 'going to process an ',[]), process_item(A,B). process_element('Metadata', _A,_B) :- debug(processing, 'Read (data itself not converted into RDF, does create a skos:ConceptScheme instance )\n',[]), rdf_db:ns(ipsv, NS), concat_atom([NS,'IPSV'], URI), rdf_assert(URI, rdf:type, skos:'ConceptScheme'), assert(concept_scheme_uri(URI)). % make this uri available for whole program process_element('ControlledList', _Attr) :- debug(processing, 'Read (not converted into RDF)\n',[]). %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % % Processing s % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % preferred item process_item(Attr,Cont) :- member('Preferred'=Preferred, Attr), Preferred == 'true', member('ConceptId'=ConceptId, Attr), member('AToZ'=AToZ, Attr), get_name(Cont, Name), create_concept_uri(ConceptId, ConceptURI), rdf_assert(ConceptURI, rdf:type, skos:'Concept'), ( % use sub-property of prefLabel if this label should be displayed on websites AToZ == true, rdf_assert(ConceptURI, skos:displayablePrefLabel, literal(Name)) ; AToZ == false, rdf_assert(ConceptURI, skos:prefLabel, literal(Name)) ; % AToZ has neither value true nor false: ERROR \+member(AToZ, ['true','false']), member('Id'=Id, Attr), debug(error, 'Item with Id ~w doesnt have value true or false for AToZ: ', [Id, AToZ]) ), % scopenote, broader items, related items, shortcut process_scopenote(ConceptURI, Cont), get_broader_items(Cont, Broaders), process_broader_items(ConceptURI, Broaders), get_related_items(Cont, Related), process_related_items(ConceptURI, Related), process_shortcut(ConceptURI, Cont), %finally, add that this concept is part of the IPSV concept scheme concept_scheme_uri(CSURI), rdf_assert(ConceptURI, skos:inScheme, CSURI). % non-preferred item process_item(Attr,Cont) :- member('Preferred'=Preferred, Attr), Preferred == 'false', member('Type' = Synonym, Attr), Synonym == 'synonym', member('ConceptId'=ConceptId, Attr), member('AToZ'=AToZ, Attr), get_name(Cont, Name), create_concept_uri(ConceptId, ConceptURI), ( % use sub-property of altLabel if this label should be displayed on websites AToZ == true, rdf_assert(ConceptURI, skos:displayableAltLabel, literal(Name)) ; AToZ == false, rdf_assert(ConceptURI, skos:altLabel, literal(Name)) ; % AToZ has neither value true nor false: ERROR \+member(AToZ, ['true','false']), member('Id'=Id, Attr), debug(error, 'Item with Id ~w doesnt have value true or false for AToZ: ~w', [Id, AToZ]) ), rdf_assert(ConceptURI, rdf:type, skos:'Concept'). % non-preferred item that is a misspelling process_item(Attr,Cont) :- member('Preferred'=Preferred, Attr), Preferred == 'false', member('Type' = Type, Attr), Type == 'misspelling', member('ConceptId'=ConceptId, Attr), get_name(Cont, Name), create_concept_uri(ConceptId, ConceptURI), rdf_assert(ConceptURI, skos:obsoleteTerm, literal(Name)), rdf_assert(ConceptURI, rdf:type, skos:'Concept'). process_item(Attr, Cont) :- member('Id'=Id, Attr), get_name(Cont, Name), debug(error-item, 'Item with Id ~w and name ~w could not be processed\n, attributes are ~w\n contents is: ~w',[Id,Name,Attr,Cont]). create_concept_uri(Id, URI) :- rdf_db:ns(ipsv, NS), concat_atom([NS,Id], URI). get_name(List, Name) :- member(element('Name', [], [Name]), List). %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % source contains either "..." or "" process_scopenote(URI, List) :- % list is empty means there is no scopenote, so assert nothing member(element('ScopeNotes',[], []), List) ; member(element('ScopeNotes',[], [Note]), List), rdf_assert(URI, skos:scopeNote, literal(Note)). %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % source either contains no shortcut element at all or "X" process_shortcut(URI, List) :- % either there is a shortcut or member fails because there is no Shortcut element member(element('Shortcut',[], [SC]), List), rdf_assert(URI, skos:shortcut, literal(SC)) ; true. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % From Cont, get all BroaderItems get_broader_items(Cont, BroaderItems) :- findall(element('BroaderItem', A,B), get_broader(Cont, element('BroaderItem',A,B)), BroaderItems). get_broader(Cont, element('BroaderItem',A,B)) :- member( element('BroaderItem',A,B), Cont). process_broader_items(_URI,[]). process_broader_items(URI,[Head|Tail]) :- process_broader_item(URI, Head), process_broader_items(URI, Tail). % Cont is redundant string representing prefLabel of the broader item pointed at, % so can be ignored process_broader_item(SrcURI, element('BroaderItem',Attr,_Cont)) :- member('ConceptId'=ConceptId, Attr), member('Default'=Default, Attr), create_concept_uri(ConceptId, TargetURI), ( % it is a default BroaderItem Default == true, rdf_assert(SrcURI, ipsv:defaultBroader, TargetURI) ; % it isnt Default == false, rdf_assert(SrcURI, skos:broader, TargetURI) ). %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% get_related_items(Cont, RelatedItems) :- findall(element('RelatedItem', A,B), get_related(Cont, element('RelatedItem',A,B)), RelatedItems). get_related(Cont, element('RelatedItem',A,B)) :- member( element('RelatedItem',A,B), Cont). process_related_items(_URI,[]). process_related_items(URI,[Head|Tail]) :- process_related_item(URI, Head), process_related_items(URI, Tail). % Cont is redundant string representing prefLabel of the related item pointed at, % so can be ignored process_related_item(SrcURI, element('RelatedItem',Attr,_Cont)) :- member('ConceptId'=ConceptId, Attr), create_concept_uri(ConceptId, TargetURI), rdf_assert(SrcURI, skos:related, TargetURI). %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% go :- write('Going to load XML file...\n'), load_ipsv_XML_file(Content), write('Going to process file...\n'), process_content(Content), % assert all RDF(..,..,..) statements write('Saving RDF to file...\n'), out_file_path(ipsv, OutFile), % get name of file to save RDF in debug(create-ipsv-file, 'Now going to save triples to file ~w',[OutFile]), rdf_save(OutFile,[document_language(en)]), % add lang attribute to whole doc debug(create-ipsv-file, 'Saved triples to file, now going to retract all RDF statements',[]), rdf_retractall(_,_,_). % fin!