'''
Created on 27.07.2021
@author: wf
'''
import os
from functools import partial
from lodstorage.lod import LOD
from corpus.datasources.openresearch import OREventSeries, OREventSeriesManager, OREventManager, OREvent, OR
from tests.testSMW import TestSMW
from tests.datasourcetoolbox import DataSourceTest
from corpus.lookup import CorpusLookup, CorpusLookupConfigure
[docs]class TestOpenResearch(DataSourceTest):
'''
test the access to OpenResearch
'''
[docs] def setUp(self, debug=False,profile=True, **kwargs):
super().setUp(debug, profile, **kwargs)
# by convention the lookupId "or" is for the OpenResearch via API / WikiUser access
# the lookupId "orclone" is for for the access via API on the OpenResearch clone
lookupIds=[]
self.testWikiId = "orclone"
TestSMW.getWikiUser(self.testWikiId)
self.testLimit=100
OR.limitFiles=self.testLimit
for wikiId in "or","orclone":
wikiTextPath=CorpusLookupConfigure.getWikiTextPath(wikiId)
if not os.path.exists(wikiTextPath):
msg=f"wikibackup for {wikiId} missing you might want to run scripts/getbackup"
raise Exception(msg)
lookupIds.append(wikiId)
lookupIds.append(f"{wikiId}-backup")
self.lookup = CorpusLookup(lookupIds=lookupIds,configure=self.configureCorpusLookup)
[docs] def setWikiUserAndOptions(self,manager,wikiUser,debug,profile=True):
manager.wikiUser=wikiUser
manager.debug=debug
manager.config.withShowProgress=profile
manager.profile=profile
[docs] def setWikiFileManagerAndOptions(self,manager,fileManager,debug,profile=True):
manager.wikiFileManager=fileManager
manager.debug=debug
manager.config.withShowProgress=profile
manager.profile=profile
[docs] def testORDataSourceFromWikiFileManager(self):
'''
tests the getting conferences form wiki markup files
'''
self.lookup.load()
expectedSeries = OR.limitFiles if OR.limitFiles is not None else 1000
expectedEvents = OR.limitFiles if OR.limitFiles is not None else 8000
orDataSource=self.lookup.getDataSource("or-backup")
self.checkDataSource(orDataSource,expectedSeries,expectedEvents)
orDataSource=self.lookup.getDataSource("orclone-backup")
self.checkDataSource(orDataSource,expectedSeries,expectedEvents)
[docs] def testORDataSourceFromWikiUser(self):
'''
tests initializing the OREventCorpus from wiki
'''
self.lookup.load()
expectedSeries = OR.limitFiles if OR.limitFiles is not None else 1000
expectedEvents = OR.limitFiles if OR.limitFiles is not None else 8000
orDataSource=self.lookup.getDataSource("or")
self.checkDataSource(orDataSource, expectedSeries, expectedEvents)
orDataSource=self.lookup.getDataSource("orclone")
self.checkDataSource(orDataSource, expectedSeries, expectedEvents)
[docs] def testAsCsv(self):
'''
test csv export of events
'''
return
orDataSource =self.lookup.getDataSource("orclone-backup")
eventManager=orDataSource.eventManager
csvString=eventManager.asCsv(selectorCallback=partial(eventManager.getEventsInSeries, "3DUI"))
print(csvString)
[docs] def testEventSeriesGetLoDfromWikiFileManager(self):
'''
tests getLoDfromWikiFileManager from OREventSeries
'''
manager = OREventSeriesManager()
wikiFileManager = TestSMW.getWikiFileManager(self.testWikiId)
lod = manager.getLoDfromWikiFileManager(wikiFileManager=wikiFileManager, limit=self.testLimit)
self.checkEntityLoD(lod, OREventSeries, self.testLimit)
[docs] def testEventGetLoDfromWikiFileManager(self):
'''
tests getLoDfromWikiFileManager from OREvent
'''
manager = OREventManager()
wikiFileManager = TestSMW.getWikiFileManager(self.testWikiId)
lod = manager.getLoDfromWikiFileManager(wikiFileManager=wikiFileManager, limit=self.testLimit)
self.checkEntityLoD(lod, OREvent, self.testLimit)
[docs] def testEventSeriesGetLoDfromWikiUser(self):
'''
tests getLoDfromWikiUser from OREventSeries
'''
manager = OREventSeriesManager()
wikiUser = TestSMW.getWikiUser(self.testWikiId)
lod = manager.getLoDfromWikiUser(wikiuser=wikiUser, limit=self.testLimit)
self.checkEntityLoD(lod, OREventSeries, self.testLimit)
[docs] def testEventGetLoDfromWikiUser(self):
'''
tests getLoDfromWikiUser from OREvent
'''
manager = OREventManager()
wikiUser = TestSMW.getWikiUser(self.testWikiId)
lod = manager.getLoDfromWikiUser(wikiuser=wikiUser, limit=self.testLimit)
self.checkEntityLoD(lod, OREvent, self.testLimit)
[docs] def checkEntityLoD(self, lod:dict, entity:type, expectedRecords:int=None):
"""
checks if the given lod contains fields from the samples
Args:
lod: list of entity records
entity: entity class containing the samples
"""
if self.debug:
print(lod)
entityFields = set(LOD.getFields(lod))
expectedTypes = {k:type(v) for k,v in entity().getSamples()[0].items()}
if expectedRecords is not None:
self.assertEqual(len(lod), expectedRecords, "LoD does not contain expected number of records")
for record in lod:
fields = set(record.keys())
self.assertTrue(fields.issubset(entityFields), f"Unexpected fields found: {fields - entityFields} are nt in the samples")
self.assertIsNotNone(record["pageTitle"])
for key, value in record.items():
if key in expectedTypes and value is not None:
self.assertEqual(expectedTypes[key], type(value), f"{key} has not the expected type ({record})")
if __name__ == "__main__":
#import sys;sys.argv = ['', 'Test.testName']
DataSourceTest.main()