Source code for tests.testOpenResearch

'''
Created on 27.07.2021

@author: wf
'''
import os
from functools import partial

from lodstorage.lod import LOD

from corpus.datasources.openresearch import OREventSeries, OREventSeriesManager, OREventManager, OREvent, OR
from tests.testSMW import TestSMW
from tests.datasourcetoolbox import DataSourceTest
from corpus.lookup import CorpusLookup, CorpusLookupConfigure


[docs]class TestOpenResearch(DataSourceTest): ''' test the access to OpenResearch '''
[docs] def setUp(self, debug=False,profile=True, **kwargs): super().setUp(debug, profile, **kwargs) # by convention the lookupId "or" is for the OpenResearch via API / WikiUser access # the lookupId "orclone" is for for the access via API on the OpenResearch clone lookupIds=[] self.testWikiId = "orclone" TestSMW.getWikiUser(self.testWikiId) self.testLimit=100 OR.limitFiles=self.testLimit for wikiId in "or","orclone": wikiTextPath=CorpusLookupConfigure.getWikiTextPath(wikiId) if not os.path.exists(wikiTextPath): msg=f"wikibackup for {wikiId} missing you might want to run scripts/getbackup" raise Exception(msg) lookupIds.append(wikiId) lookupIds.append(f"{wikiId}-backup") self.lookup = CorpusLookup(lookupIds=lookupIds,configure=self.configureCorpusLookup)
[docs] def setWikiUserAndOptions(self,manager,wikiUser,debug,profile=True): manager.wikiUser=wikiUser manager.debug=debug manager.config.withShowProgress=profile manager.profile=profile
[docs] def setWikiFileManagerAndOptions(self,manager,fileManager,debug,profile=True): manager.wikiFileManager=fileManager manager.debug=debug manager.config.withShowProgress=profile manager.profile=profile
[docs] def configureCorpusLookup(self,lookup:CorpusLookup): ''' callback to configure the corpus lookup ''' for lookupId in ["or","orclone"]: orDataSource=lookup.getDataSource(lookupId) if orDataSource: orDataSource.profile=True orDataSource.debug=self.debug wikiUser=TestSMW.getSMW_WikiUser(lookupId) self.setWikiUserAndOptions(orDataSource.eventManager, wikiUser, self.debug) self.setWikiUserAndOptions(orDataSource.eventSeriesManager, wikiUser, self.debug) orDataSource=lookup.getDataSource(f'{lookupId}-backup') if orDataSource: orDataSource.profile=True orDataSource.debug=self.debug wikiFileManager = TestSMW.getWikiFileManager(wikiId=lookupId) self.setWikiFileManagerAndOptions(orDataSource.eventManager, wikiFileManager, self.debug) self.setWikiFileManagerAndOptions(orDataSource.eventSeriesManager,wikiFileManager, self.debug)
[docs] def testORDataSourceFromWikiFileManager(self): ''' tests the getting conferences form wiki markup files ''' self.lookup.load() expectedSeries = OR.limitFiles if OR.limitFiles is not None else 1000 expectedEvents = OR.limitFiles if OR.limitFiles is not None else 8000 orDataSource=self.lookup.getDataSource("or-backup") self.checkDataSource(orDataSource,expectedSeries,expectedEvents) orDataSource=self.lookup.getDataSource("orclone-backup") self.checkDataSource(orDataSource,expectedSeries,expectedEvents)
[docs] def testORDataSourceFromWikiUser(self): ''' tests initializing the OREventCorpus from wiki ''' self.lookup.load() expectedSeries = OR.limitFiles if OR.limitFiles is not None else 1000 expectedEvents = OR.limitFiles if OR.limitFiles is not None else 8000 orDataSource=self.lookup.getDataSource("or") self.checkDataSource(orDataSource, expectedSeries, expectedEvents) orDataSource=self.lookup.getDataSource("orclone") self.checkDataSource(orDataSource, expectedSeries, expectedEvents)
[docs] def testAsCsv(self): ''' test csv export of events ''' return orDataSource =self.lookup.getDataSource("orclone-backup") eventManager=orDataSource.eventManager csvString=eventManager.asCsv(selectorCallback=partial(eventManager.getEventsInSeries, "3DUI")) print(csvString)
[docs] def testEventSeriesGetLoDfromWikiFileManager(self): ''' tests getLoDfromWikiFileManager from OREventSeries ''' manager = OREventSeriesManager() wikiFileManager = TestSMW.getWikiFileManager(self.testWikiId) lod = manager.getLoDfromWikiFileManager(wikiFileManager=wikiFileManager, limit=self.testLimit) self.checkEntityLoD(lod, OREventSeries, self.testLimit)
[docs] def testEventGetLoDfromWikiFileManager(self): ''' tests getLoDfromWikiFileManager from OREvent ''' manager = OREventManager() wikiFileManager = TestSMW.getWikiFileManager(self.testWikiId) lod = manager.getLoDfromWikiFileManager(wikiFileManager=wikiFileManager, limit=self.testLimit) self.checkEntityLoD(lod, OREvent, self.testLimit)
[docs] def testEventSeriesGetLoDfromWikiUser(self): ''' tests getLoDfromWikiUser from OREventSeries ''' manager = OREventSeriesManager() wikiUser = TestSMW.getWikiUser(self.testWikiId) lod = manager.getLoDfromWikiUser(wikiuser=wikiUser, limit=self.testLimit) self.checkEntityLoD(lod, OREventSeries, self.testLimit)
[docs] def testEventGetLoDfromWikiUser(self): ''' tests getLoDfromWikiUser from OREvent ''' manager = OREventManager() wikiUser = TestSMW.getWikiUser(self.testWikiId) lod = manager.getLoDfromWikiUser(wikiuser=wikiUser, limit=self.testLimit) self.checkEntityLoD(lod, OREvent, self.testLimit)
[docs] def checkEntityLoD(self, lod:dict, entity:type, expectedRecords:int=None): """ checks if the given lod contains fields from the samples Args: lod: list of entity records entity: entity class containing the samples """ if self.debug: print(lod) entityFields = set(LOD.getFields(lod)) expectedTypes = {k:type(v) for k,v in entity().getSamples()[0].items()} if expectedRecords is not None: self.assertEqual(len(lod), expectedRecords, "LoD does not contain expected number of records") for record in lod: fields = set(record.keys()) self.assertTrue(fields.issubset(entityFields), f"Unexpected fields found: {fields - entityFields} are nt in the samples") self.assertIsNotNone(record["pageTitle"]) for key, value in record.items(): if key in expectedTypes and value is not None: self.assertEqual(expectedTypes[key], type(value), f"{key} has not the expected type ({record})")
if __name__ == "__main__": #import sys;sys.argv = ['', 'Test.testName'] DataSourceTest.main()