'''
Created on 2021-04-16
@author: wf
'''
from corpus.event import EventManager, EventSeriesManager, EventStorage
from corpus.config import EventDataSourceConfig
from corpus.quality.rating import RatingManager
from corpus.utils.download import Download
from corpus.utils.download import Profiler
[docs]class EventDataSource(object):
'''
a data source for events
'''
def __init__(self,eventManager:EventManager,eventSeriesManager:EventSeriesManager,sourceConfig=EventDataSourceConfig):
'''
constructor
Args:
sourceConfig(EventDataSourceConfig): the configuration for the EventDataSource
eventManager(EventManager): manager for the events
eventSeriesManager(EventSeriesManager): manager for the eventSeries
'''
self.sourceConfig=sourceConfig
self.name=self.sourceConfig.name
self.eventManager=eventManager
self.eventManager.dataSource=self
self.eventSeriesManager=eventSeriesManager
self.eventSeriesManager.dataSource=self
pass
[docs] def load(self,forceUpdate=False,showProgress=False,debug=False):
'''
load this data source
Args:
forceUpdate(bool): if true force updating this datasource
showProgress(bool): if true show the progress
debug(bool): if true show debug information
'''
msg=f"loading {self.sourceConfig.title}"
profiler=Profiler(msg=msg,profile=showProgress)
self.eventSeriesManager.configure()
self.eventManager.configure()
# first events
self.eventManager.fromCache(force=forceUpdate)
# then series
self.eventSeriesManager.fromCache(force=forceUpdate)
# TODO use same foreign key in all dataSources
self.eventManager.linkSeriesAndEvent(self.eventSeriesManager,"inEventSeries")
profiler.time()
[docs] def rateAll(self,ratingManager:RatingManager):
'''
rate all events and series based on the given rating Manager
'''
self.eventManager.rateAll(ratingManager)
self.eventSeriesManager.rateAll(ratingManager)
[docs]class EventCorpus(object):
'''
Towards a gold standard event corpus and observatory ...
'''
def __init__(self,debug=False,verbose=False):
'''
Constructor
Args:
debug(bool): set debugging if True
verbose(bool): set verbose output if True
'''
self.debug=debug
self.verbose=verbose
self.eventDataSources={}
[docs] def addDataSource(self, eventDataSource:EventDataSource):
'''
adds the given eventDataSource
Args:
eventDataSource: EventDataSource
'''
self.eventDataSources[eventDataSource.sourceConfig.lookupId]=eventDataSource
pass
[docs] def loadAll(self,forceUpdate:bool=False,showProgress=False):
'''
load all eventDataSources
Args:
forceUpdate(bool): True if the data should be fetched from the source instead of the cache
'''
for eventDataSource in self.eventDataSources.values():
eventDataSource.load(forceUpdate=forceUpdate,showProgress=showProgress)
[docs] @staticmethod
def download():
'''
download the EventCorpus.db if needed
'''
fileName="EventCorpus.db"
url = f"https://confident.dbis.rwth-aachen.de/downloads/conferencecorpus/{fileName}.gz"
targetDirectory=EventStorage.getStorageConfig().getCachePath()
Download.downloadBackupFile(url, fileName, targetDirectory)