From f614aca0eb2f93ee5cad573678c3d9ea832859b4 Mon Sep 17 00:00:00 2001 From: Stefan Verhoeven Date: Mon, 10 Aug 2020 12:40:55 +0200 Subject: [PATCH 1/4] Fix crawl test --- jupyterlab_thredds/crawler.py | 19 +- jupyterlab_thredds/fetchers/thredds.py | 3 +- setup.py | 2 +- tests/fixtures/crawler.expected.json | 114 +++++----- tests/fixtures/crawler.vcr.yml | 303 ++++++++++++++----------- tests/test_crawler.py | 9 +- 6 files changed, 245 insertions(+), 205 deletions(-) diff --git a/jupyterlab_thredds/crawler.py b/jupyterlab_thredds/crawler.py index 9a1c7f7..d6849ab 100644 --- a/jupyterlab_thredds/crawler.py +++ b/jupyterlab_thredds/crawler.py @@ -107,24 +107,22 @@ def __init__(self, url: str) -> None: class TDSCrawler: - def __init__(self, rooturl: str, loop, maxtasks=10): + def __init__(self, rooturl: str, maxtasks=10): """Asynchronous THREDDS catalog crawler Crawler based on https://github.com/aio-libs/aiohttp/blob/master/examples/legacy/crawl.py Args: rooturl: URL with THREDDS catalog xml file - loop: Event loop, eg. asyncio.get_event_loop() maxtasks: Number of download tasks to run concurrently """ self.rooturl = rooturl - self.loop = loop self.todo = set() self.busy = set() self.done = {} self.tasks = set() - self.sem = asyncio.Semaphore(maxtasks, loop=loop) - self.session = aiohttp.ClientSession(loop=loop) + self.sem = asyncio.Semaphore(maxtasks) + self.session = aiohttp.ClientSession() self.datasets = [] async def run(self): @@ -134,11 +132,10 @@ async def run(self): List of datasets """ delay = 0.2 - t = asyncio.ensure_future(self.addurls([self.rooturl]), - loop=self.loop) - await asyncio.sleep(delay, loop=self.loop) + t = asyncio.ensure_future(self.addurls([self.rooturl])) + await asyncio.sleep(delay) while self.busy: - await asyncio.sleep(delay, loop=self.loop) + await asyncio.sleep(delay) await t await self.session.close() @@ -151,11 +148,11 @@ async def addurls(self, urls): url not in self.todo): self.todo.add(url) await self.sem.acquire() - task = asyncio.ensure_future(self.process(url), loop=self.loop) + task = asyncio.ensure_future(self.process(url)) task.add_done_callback(lambda t: self.sem.release()) task.add_done_callback(self.tasks.remove) self.tasks.add(task) - await task # TODO waiting for task here will not make multiple tasks run concurrently, but if not exception are not captured + # await task # TODO waiting for task here will not make multiple tasks run concurrently, but if not exception are not captured async def process(self, url): logger.info('processing: %s', url) diff --git a/jupyterlab_thredds/fetchers/thredds.py b/jupyterlab_thredds/fetchers/thredds.py index f367fec..76ec4ce 100644 --- a/jupyterlab_thredds/fetchers/thredds.py +++ b/jupyterlab_thredds/fetchers/thredds.py @@ -16,8 +16,7 @@ async def get(self): catalog_url = self.get_argument('catalog_url') self.set_header('Content-Type', 'application/json') c = ThreddsConfig(config=self.config) - loop = asyncio.get_event_loop() - crawler = TDSCrawler(catalog_url, loop, maxtasks=c.maxtasks) + crawler = TDSCrawler(catalog_url, maxtasks=c.maxtasks) try: datasets = await asyncio.wait_for(crawler.run(), c.timeout) diff --git a/setup.py b/setup.py index d4419a5..e14552e 100644 --- a/setup.py +++ b/setup.py @@ -44,7 +44,7 @@ 'ipyleaflet', 'OWSLib', 'traitlets', - 'aiohttp', + 'aiohttp>=3.5,<4', ], version=get_version('jupyterlab_thredds/version.py'), long_description=readme, diff --git a/tests/fixtures/crawler.expected.json b/tests/fixtures/crawler.expected.json index 6b6fbf3..cf93fea 100644 --- a/tests/fixtures/crawler.expected.json +++ b/tests/fixtures/crawler.expected.json @@ -1,86 +1,86 @@ [ { - "id": "ewc/dischargeEns.nc", - "name": "ewc/dischargeEns.nc", + "name": "ewc/data/dischargeEns.nc", + "id": "ewc/data/dischargeEns.nc", "services": { - "DAP4": "http://localhost:8080/thredds/dap4/ewc/dischargeEns.nc", - "HTTPSERVER": "http://localhost:8080/thredds/fileServer/ewc/dischargeEns.nc", - "NETCDFSUBSET": "http://localhost:8080/thredds/ncss/ewc/dischargeEns.nc", - "OPENDAP": "http://localhost:8080/thredds/dodsC/ewc/dischargeEns.nc", - "WCS": "http://localhost:8080/thredds/wcs/ewc/dischargeEns.nc", - "WMS": "http://localhost:8080/thredds/wms/ewc/dischargeEns.nc" + "OPENDAP": "http://localhost:8080/thredds/dodsC/ewc/data/dischargeEns.nc", + "DAP4": "http://localhost:8080/thredds/dap4/ewc/data/dischargeEns.nc", + "HTTPSERVER": "http://localhost:8080/thredds/fileServer/ewc/data/dischargeEns.nc", + "WCS": "http://localhost:8080/thredds/wcs/ewc/data/dischargeEns.nc", + "WMS": "http://localhost:8080/thredds/wms/ewc/data/dischargeEns.nc", + "NETCDFSUBSET": "http://localhost:8080/thredds/ncss/ewc/data/dischargeEns.nc" } }, { - "id": "ewc/dischargeEnsStdOut.nc", - "name": "ewc/dischargeEnsStdOut.nc", + "name": "ewc/data/dischargeEnsStdOut.nc", + "id": "ewc/data/dischargeEnsStdOut.nc", "services": { - "DAP4": "http://localhost:8080/thredds/dap4/ewc/dischargeEnsStdOut.nc", - "HTTPSERVER": "http://localhost:8080/thredds/fileServer/ewc/dischargeEnsStdOut.nc", - "NETCDFSUBSET": "http://localhost:8080/thredds/ncss/ewc/dischargeEnsStdOut.nc", - "OPENDAP": "http://localhost:8080/thredds/dodsC/ewc/dischargeEnsStdOut.nc", - "WCS": "http://localhost:8080/thredds/wcs/ewc/dischargeEnsStdOut.nc", - "WMS": "http://localhost:8080/thredds/wms/ewc/dischargeEnsStdOut.nc" + "OPENDAP": "http://localhost:8080/thredds/dodsC/ewc/data/dischargeEnsStdOut.nc", + "DAP4": "http://localhost:8080/thredds/dap4/ewc/data/dischargeEnsStdOut.nc", + "HTTPSERVER": "http://localhost:8080/thredds/fileServer/ewc/data/dischargeEnsStdOut.nc", + "WCS": "http://localhost:8080/thredds/wcs/ewc/data/dischargeEnsStdOut.nc", + "WMS": "http://localhost:8080/thredds/wms/ewc/data/dischargeEnsStdOut.nc", + "NETCDFSUBSET": "http://localhost:8080/thredds/ncss/ewc/data/dischargeEnsStdOut.nc" } }, { - "id": "ewc/forcingEnsemble/precipEnsMem01.nc", - "name": "ewc/forcingEnsemble/precipEnsMem01.nc", + "name": "ewc/data/forcingEnsemble/precipEnsMem01.nc", + "id": "ewc/data/forcingEnsemble/precipEnsMem01.nc", "services": { - "DAP4": "http://localhost:8080/thredds/dap4/ewc/forcingEnsemble/precipEnsMem01.nc", - "HTTPSERVER": "http://localhost:8080/thredds/fileServer/ewc/forcingEnsemble/precipEnsMem01.nc", - "NETCDFSUBSET": "http://localhost:8080/thredds/ncss/ewc/forcingEnsemble/precipEnsMem01.nc", - "OPENDAP": "http://localhost:8080/thredds/dodsC/ewc/forcingEnsemble/precipEnsMem01.nc", - "WCS": "http://localhost:8080/thredds/wcs/ewc/forcingEnsemble/precipEnsMem01.nc", - "WMS": "http://localhost:8080/thredds/wms/ewc/forcingEnsemble/precipEnsMem01.nc" + "OPENDAP": "http://localhost:8080/thredds/dodsC/ewc/data/forcingEnsemble/precipEnsMem01.nc", + "DAP4": "http://localhost:8080/thredds/dap4/ewc/data/forcingEnsemble/precipEnsMem01.nc", + "HTTPSERVER": "http://localhost:8080/thredds/fileServer/ewc/data/forcingEnsemble/precipEnsMem01.nc", + "WCS": "http://localhost:8080/thredds/wcs/ewc/data/forcingEnsemble/precipEnsMem01.nc", + "WMS": "http://localhost:8080/thredds/wms/ewc/data/forcingEnsemble/precipEnsMem01.nc", + "NETCDFSUBSET": "http://localhost:8080/thredds/ncss/ewc/data/forcingEnsemble/precipEnsMem01.nc" } }, { - "id": "ewc/forcingEnsemble/tempEnsMem01.nc", - "name": "ewc/forcingEnsemble/tempEnsMem01.nc", + "name": "ewc/data/forcingEnsemble/tempEnsMem01.nc", + "id": "ewc/data/forcingEnsemble/tempEnsMem01.nc", "services": { - "DAP4": "http://localhost:8080/thredds/dap4/ewc/forcingEnsemble/tempEnsMem01.nc", - "HTTPSERVER": "http://localhost:8080/thredds/fileServer/ewc/forcingEnsemble/tempEnsMem01.nc", - "NETCDFSUBSET": "http://localhost:8080/thredds/ncss/ewc/forcingEnsemble/tempEnsMem01.nc", - "OPENDAP": "http://localhost:8080/thredds/dodsC/ewc/forcingEnsemble/tempEnsMem01.nc", - "WCS": "http://localhost:8080/thredds/wcs/ewc/forcingEnsemble/tempEnsMem01.nc", - "WMS": "http://localhost:8080/thredds/wms/ewc/forcingEnsemble/tempEnsMem01.nc" + "OPENDAP": "http://localhost:8080/thredds/dodsC/ewc/data/forcingEnsemble/tempEnsMem01.nc", + "DAP4": "http://localhost:8080/thredds/dap4/ewc/data/forcingEnsemble/tempEnsMem01.nc", + "HTTPSERVER": "http://localhost:8080/thredds/fileServer/ewc/data/forcingEnsemble/tempEnsMem01.nc", + "WCS": "http://localhost:8080/thredds/wcs/ewc/data/forcingEnsemble/tempEnsMem01.nc", + "WMS": "http://localhost:8080/thredds/wms/ewc/data/forcingEnsemble/tempEnsMem01.nc", + "NETCDFSUBSET": "http://localhost:8080/thredds/ncss/ewc/data/forcingEnsemble/tempEnsMem01.nc" } }, { - "id": "ewc/work01/output/netcdf/discharge_dailyTot_output.nc", - "name": "ewc/work01/output/netcdf/discharge_dailyTot_output.nc", + "name": "ewc/data/work01/output/netcdf/discharge_dailyTot_output.nc", + "id": "ewc/data/work01/output/netcdf/discharge_dailyTot_output.nc", "services": { - "DAP4": "http://localhost:8080/thredds/dap4/ewc/work01/output/netcdf/discharge_dailyTot_output.nc", - "HTTPSERVER": "http://localhost:8080/thredds/fileServer/ewc/work01/output/netcdf/discharge_dailyTot_output.nc", - "NETCDFSUBSET": "http://localhost:8080/thredds/ncss/ewc/work01/output/netcdf/discharge_dailyTot_output.nc", - "OPENDAP": "http://localhost:8080/thredds/dodsC/ewc/work01/output/netcdf/discharge_dailyTot_output.nc", - "WCS": "http://localhost:8080/thredds/wcs/ewc/work01/output/netcdf/discharge_dailyTot_output.nc", - "WMS": "http://localhost:8080/thredds/wms/ewc/work01/output/netcdf/discharge_dailyTot_output.nc" + "OPENDAP": "http://localhost:8080/thredds/dodsC/ewc/data/work01/output/netcdf/discharge_dailyTot_output.nc", + "DAP4": "http://localhost:8080/thredds/dap4/ewc/data/work01/output/netcdf/discharge_dailyTot_output.nc", + "HTTPSERVER": "http://localhost:8080/thredds/fileServer/ewc/data/work01/output/netcdf/discharge_dailyTot_output.nc", + "WCS": "http://localhost:8080/thredds/wcs/ewc/data/work01/output/netcdf/discharge_dailyTot_output.nc", + "WMS": "http://localhost:8080/thredds/wms/ewc/data/work01/output/netcdf/discharge_dailyTot_output.nc", + "NETCDFSUBSET": "http://localhost:8080/thredds/ncss/ewc/data/work01/output/netcdf/discharge_dailyTot_output.nc" } }, { - "id": "ewc/work01/output/netcdf/satDegUppSurface_dailyTot_output.nc", - "name": "ewc/work01/output/netcdf/satDegUppSurface_dailyTot_output.nc", + "name": "ewc/data/work01/output/netcdf/satDegUppSurface_dailyTot_output.nc", + "id": "ewc/data/work01/output/netcdf/satDegUppSurface_dailyTot_output.nc", "services": { - "DAP4": "http://localhost:8080/thredds/dap4/ewc/work01/output/netcdf/satDegUppSurface_dailyTot_output.nc", - "HTTPSERVER": "http://localhost:8080/thredds/fileServer/ewc/work01/output/netcdf/satDegUppSurface_dailyTot_output.nc", - "NETCDFSUBSET": "http://localhost:8080/thredds/ncss/ewc/work01/output/netcdf/satDegUppSurface_dailyTot_output.nc", - "OPENDAP": "http://localhost:8080/thredds/dodsC/ewc/work01/output/netcdf/satDegUppSurface_dailyTot_output.nc", - "WCS": "http://localhost:8080/thredds/wcs/ewc/work01/output/netcdf/satDegUppSurface_dailyTot_output.nc", - "WMS": "http://localhost:8080/thredds/wms/ewc/work01/output/netcdf/satDegUppSurface_dailyTot_output.nc" + "OPENDAP": "http://localhost:8080/thredds/dodsC/ewc/data/work01/output/netcdf/satDegUppSurface_dailyTot_output.nc", + "DAP4": "http://localhost:8080/thredds/dap4/ewc/data/work01/output/netcdf/satDegUppSurface_dailyTot_output.nc", + "HTTPSERVER": "http://localhost:8080/thredds/fileServer/ewc/data/work01/output/netcdf/satDegUppSurface_dailyTot_output.nc", + "WCS": "http://localhost:8080/thredds/wcs/ewc/data/work01/output/netcdf/satDegUppSurface_dailyTot_output.nc", + "WMS": "http://localhost:8080/thredds/wms/ewc/data/work01/output/netcdf/satDegUppSurface_dailyTot_output.nc", + "NETCDFSUBSET": "http://localhost:8080/thredds/ncss/ewc/data/work01/output/netcdf/satDegUppSurface_dailyTot_output.nc" } }, { - "id": "ewc/work01/output/netcdf/totalEvaporation_dailyTot_output.nc", - "name": "ewc/work01/output/netcdf/totalEvaporation_dailyTot_output.nc", + "name": "ewc/data/work01/output/netcdf/totalEvaporation_dailyTot_output.nc", + "id": "ewc/data/work01/output/netcdf/totalEvaporation_dailyTot_output.nc", "services": { - "DAP4": "http://localhost:8080/thredds/dap4/ewc/work01/output/netcdf/totalEvaporation_dailyTot_output.nc", - "HTTPSERVER": "http://localhost:8080/thredds/fileServer/ewc/work01/output/netcdf/totalEvaporation_dailyTot_output.nc", - "NETCDFSUBSET": "http://localhost:8080/thredds/ncss/ewc/work01/output/netcdf/totalEvaporation_dailyTot_output.nc", - "OPENDAP": "http://localhost:8080/thredds/dodsC/ewc/work01/output/netcdf/totalEvaporation_dailyTot_output.nc", - "WCS": "http://localhost:8080/thredds/wcs/ewc/work01/output/netcdf/totalEvaporation_dailyTot_output.nc", - "WMS": "http://localhost:8080/thredds/wms/ewc/work01/output/netcdf/totalEvaporation_dailyTot_output.nc" + "OPENDAP": "http://localhost:8080/thredds/dodsC/ewc/data/work01/output/netcdf/totalEvaporation_dailyTot_output.nc", + "DAP4": "http://localhost:8080/thredds/dap4/ewc/data/work01/output/netcdf/totalEvaporation_dailyTot_output.nc", + "HTTPSERVER": "http://localhost:8080/thredds/fileServer/ewc/data/work01/output/netcdf/totalEvaporation_dailyTot_output.nc", + "WCS": "http://localhost:8080/thredds/wcs/ewc/data/work01/output/netcdf/totalEvaporation_dailyTot_output.nc", + "WMS": "http://localhost:8080/thredds/wms/ewc/data/work01/output/netcdf/totalEvaporation_dailyTot_output.nc", + "NETCDFSUBSET": "http://localhost:8080/thredds/ncss/ewc/data/work01/output/netcdf/totalEvaporation_dailyTot_output.nc" } } -] +] \ No newline at end of file diff --git a/tests/fixtures/crawler.vcr.yml b/tests/fixtures/crawler.vcr.yml index a718cd7..731cb96 100644 --- a/tests/fixtures/crawler.vcr.yml +++ b/tests/fixtures/crawler.vcr.yml @@ -5,7 +5,8 @@ interactions: method: GET uri: http://localhost:8080/thredds/catalog.xml response: - body: {string: "\r\n\r\n\r\n \ \r\n \r\n \r\n \ all\r\n GRID\r\n \ netCDF\r\n \r\n \r\n \r\n\r\n"} + name=\"DatasetScan\" value=\"true\" />\r\n \r\n\r\n" headers: - ? !!python/object/new:multidict._istr.istr [Access-Control-Allow-Origin] - : '*' - ? !!python/object/new:multidict._istr.istr [Content-Language] - : en - ? !!python/object/new:multidict._istr.istr [Content-Type] - : application/xml;charset=UTF-8 - ? !!python/object/new:multidict._istr.istr [Date] - : Tue, 04 Dec 2018 14:10:47 GMT - ? !!python/object/new:multidict._istr.istr [Transfer-Encoding] - : chunked - status: {code: 200, message: null} - url: !!python/object/new:yarl.URL - state: !!python/tuple - - !!python/object/new:urllib.parse.SplitResult [http, 'localhost:8080', /thredds/catalog.xml, - '', ''] + Access-Control-Allow-Origin: '*' + Content-Language: en + Content-Type: application/xml;charset=UTF-8 + Date: Mon, 10 Aug 2020 10:37:59 GMT + Server: Apache + Strict-Transport-Security: max-age=0 + Transfer-Encoding: chunked + X-Content-Type-Options: nosniff + X-Frame-Options: SAMEORIGIN + X-XSS-Protection: 1; mode=block + vary: Origin + status: + code: 200 + message: '' + url: http://localhost:8080/thredds/catalog.xml - request: body: null headers: {} method: GET uri: http://localhost:8080/thredds/catalog/ewc/catalog.xml response: - body: {string: "\r\n\r\n\r\n \r\n \r\n \r\n \r\n \r\n all\r\n GRID\r\n \ netCDF\r\n \r\n \r\n \r\n - \ \r\n 335.9\r\n - \ 2018-05-01T11:58:19Z\r\n \r\n - \ \r\n - \ 671.8\r\n 2018-05-01T11:58:17Z\r\n - \ \r\n \r\n\r\n"} + xlink:href=\"data/catalog.xml\" xlink:title=\"data\" ID=\"ewc/data\" name=\"\" + />\r\n \r\n\r\n" headers: - ? !!python/object/new:multidict._istr.istr [Access-Control-Allow-Origin] - : '*' - ? !!python/object/new:multidict._istr.istr [Content-Language] - : en - ? !!python/object/new:multidict._istr.istr [Content-Type] - : application/xml;charset=UTF-8 - ? !!python/object/new:multidict._istr.istr [Date] - : Tue, 04 Dec 2018 14:10:47 GMT - ? !!python/object/new:multidict._istr.istr [Transfer-Encoding] - : chunked - status: {code: 200, message: null} - url: !!python/object/new:yarl.URL - state: !!python/tuple - - !!python/object/new:urllib.parse.SplitResult [http, 'localhost:8080', /thredds/catalog/ewc/catalog.xml, - '', ''] + Access-Control-Allow-Origin: '*' + Content-Language: en + Content-Type: application/xml;charset=UTF-8 + Date: Mon, 10 Aug 2020 10:37:59 GMT + Server: Apache + Strict-Transport-Security: max-age=0 + Transfer-Encoding: chunked + X-Content-Type-Options: nosniff + X-Frame-Options: SAMEORIGIN + X-XSS-Protection: 1; mode=block + vary: Origin + status: + code: 200 + message: '' + url: http://localhost:8080/thredds/catalog/ewc/catalog.xml - request: body: null headers: {} method: GET - uri: http://localhost:8080/thredds/catalog/ewc/work01/catalog.xml + uri: http://localhost:8080/thredds/catalog/ewc/data/catalog.xml response: - body: {string: "\r\n\r\n\r\n \r\n \r\n \r\n \r\n \r\n - \ \r\n \r\n \r\n \r\n \r\n all\r\n GRID\r\n \ netCDF\r\n \r\n \r\n \r\n\r\n"} + xlink:href=\"work01/catalog.xml\" xlink:title=\"work01\" ID=\"ewc/data/work01\" + name=\"\" />\r\n \r\n \r\n 335.9\r\n + \ 2018-05-01T11:58:19Z\r\n \r\n + \ \r\n + \ 671.8\r\n 2018-05-01T11:58:17Z\r\n + \ \r\n \r\n\r\n" headers: - ? !!python/object/new:multidict._istr.istr [Access-Control-Allow-Origin] - : '*' - ? !!python/object/new:multidict._istr.istr [Content-Language] - : en - ? !!python/object/new:multidict._istr.istr [Content-Type] - : application/xml;charset=UTF-8 - ? !!python/object/new:multidict._istr.istr [Date] - : Tue, 04 Dec 2018 14:10:47 GMT - ? !!python/object/new:multidict._istr.istr [Transfer-Encoding] - : chunked - status: {code: 200, message: null} - url: !!python/object/new:yarl.URL - state: !!python/tuple - - !!python/object/new:urllib.parse.SplitResult [http, 'localhost:8080', /thredds/catalog/ewc/work01/catalog.xml, - '', ''] + Access-Control-Allow-Origin: '*' + Content-Language: en + Content-Type: application/xml;charset=UTF-8 + Date: Mon, 10 Aug 2020 10:37:59 GMT + Server: Apache + Strict-Transport-Security: max-age=0 + Transfer-Encoding: chunked + X-Content-Type-Options: nosniff + X-Frame-Options: SAMEORIGIN + X-XSS-Protection: 1; mode=block + vary: Origin + status: + code: 200 + message: '' + url: http://localhost:8080/thredds/catalog/ewc/data/catalog.xml - request: body: null headers: {} method: GET - uri: http://localhost:8080/thredds/catalog/ewc/work01/output/catalog.xml + uri: http://localhost:8080/thredds/catalog/ewc/data/work01/catalog.xml response: - body: {string: "\r\n\r\n\r\n \r\n \r\n \r\n \r\n \r\n - \ \r\n \r\n \r\n \r\n \r\n all\r\n GRID\r\n \ netCDF\r\n \r\n \r\n \r\n\r\n"} + xlink:href=\"output/catalog.xml\" xlink:title=\"output\" ID=\"ewc/data/work01/output\" + name=\"\" />\r\n \r\n\r\n" headers: - ? !!python/object/new:multidict._istr.istr [Access-Control-Allow-Origin] - : '*' - ? !!python/object/new:multidict._istr.istr [Content-Language] - : en - ? !!python/object/new:multidict._istr.istr [Content-Type] - : application/xml;charset=UTF-8 - ? !!python/object/new:multidict._istr.istr [Date] - : Tue, 04 Dec 2018 14:10:47 GMT - ? !!python/object/new:multidict._istr.istr [Transfer-Encoding] - : chunked - status: {code: 200, message: null} - url: !!python/object/new:yarl.URL - state: !!python/tuple - - !!python/object/new:urllib.parse.SplitResult [http, 'localhost:8080', /thredds/catalog/ewc/work01/output/catalog.xml, - '', ''] + Access-Control-Allow-Origin: '*' + Content-Language: en + Content-Type: application/xml;charset=UTF-8 + Date: Mon, 10 Aug 2020 10:37:59 GMT + Server: Apache + Strict-Transport-Security: max-age=0 + Transfer-Encoding: chunked + X-Content-Type-Options: nosniff + X-Frame-Options: SAMEORIGIN + X-XSS-Protection: 1; mode=block + vary: Origin + status: + code: 200 + message: '' + url: http://localhost:8080/thredds/catalog/ewc/data/work01/catalog.xml - request: body: null headers: {} method: GET - uri: http://localhost:8080/thredds/catalog/ewc/forcingEnsemble/catalog.xml + uri: http://localhost:8080/thredds/catalog/ewc/data/forcingEnsemble/catalog.xml response: - body: {string: "\r\n\r\n\r\n \r\n \r\n \r\n \r\n \r\n - \ \r\n \r\n + \ \r\n \r\n \ \r\n all\r\n \ GRID\r\n netCDF\r\n - \ \r\n \r\n 17.63\r\n + \ \r\n \r\n 17.63\r\n \ 2018-05-01T11:58:19Z\r\n \r\n - \ \r\n 17.63\r\n - \ 2018-05-01T11:58:19Z\r\n \r\n - \ \r\n\r\n"} + \ \r\n 17.63\r\n 2018-05-01T11:58:19Z\r\n + \ \r\n \r\n\r\n" + headers: + Access-Control-Allow-Origin: '*' + Content-Language: en + Content-Type: application/xml;charset=UTF-8 + Date: Mon, 10 Aug 2020 10:37:59 GMT + Server: Apache + Strict-Transport-Security: max-age=0 + Transfer-Encoding: chunked + X-Content-Type-Options: nosniff + X-Frame-Options: SAMEORIGIN + X-XSS-Protection: 1; mode=block + vary: Origin + status: + code: 200 + message: '' + url: http://localhost:8080/thredds/catalog/ewc/data/forcingEnsemble/catalog.xml +- request: + body: null + headers: {} + method: GET + uri: http://localhost:8080/thredds/catalog/ewc/data/work01/output/catalog.xml + response: + body: + string: "\r\n\r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n + \ \r\n \r\n + \ \r\n all\r\n + \ GRID\r\n netCDF\r\n + \ \r\n \r\n \r\n\r\n" headers: - ? !!python/object/new:multidict._istr.istr [Access-Control-Allow-Origin] - : '*' - ? !!python/object/new:multidict._istr.istr [Content-Language] - : en - ? !!python/object/new:multidict._istr.istr [Content-Type] - : application/xml;charset=UTF-8 - ? !!python/object/new:multidict._istr.istr [Date] - : Tue, 04 Dec 2018 14:10:47 GMT - ? !!python/object/new:multidict._istr.istr [Transfer-Encoding] - : chunked - status: {code: 200, message: null} - url: !!python/object/new:yarl.URL - state: !!python/tuple - - !!python/object/new:urllib.parse.SplitResult [http, 'localhost:8080', /thredds/catalog/ewc/forcingEnsemble/catalog.xml, - '', ''] + Access-Control-Allow-Origin: '*' + Content-Language: en + Content-Type: application/xml;charset=UTF-8 + Date: Mon, 10 Aug 2020 10:37:59 GMT + Server: Apache + Strict-Transport-Security: max-age=0 + Transfer-Encoding: chunked + X-Content-Type-Options: nosniff + X-Frame-Options: SAMEORIGIN + X-XSS-Protection: 1; mode=block + vary: Origin + status: + code: 200 + message: '' + url: http://localhost:8080/thredds/catalog/ewc/data/work01/output/catalog.xml - request: body: null headers: {} method: GET - uri: http://localhost:8080/thredds/catalog/ewc/work01/output/netcdf/catalog.xml + uri: http://localhost:8080/thredds/catalog/ewc/data/work01/output/netcdf/catalog.xml response: - body: {string: "\r\n\r\n\r\n \r\n \r\n \r\n \r\n \r\n - \ \r\n \r\n + \ \r\n \r\n \ \r\n all\r\n \ GRID\r\n netCDF\r\n \ \r\n \r\n + ID=\"ewc/data/work01/output/netcdf/totalEvaporation_dailyTot_output.nc\" urlPath=\"ewc/data/work01/output/netcdf/totalEvaporation_dailyTot_output.nc\">\r\n \ 335.9\r\n 2018-05-01T11:58:27Z\r\n \ \r\n \r\n + ID=\"ewc/data/work01/output/netcdf/satDegUppSurface_dailyTot_output.nc\" urlPath=\"ewc/data/work01/output/netcdf/satDegUppSurface_dailyTot_output.nc\">\r\n \ 335.9\r\n 2018-05-01T11:58:24Z\r\n - \ \r\n \r\n 335.9\r\n 2018-05-01T11:58:22Z\r\n - \ \r\n \r\n\r\n"} + \ \r\n \r\n + \ 335.9\r\n 2018-05-01T11:58:22Z\r\n + \ \r\n \r\n\r\n" headers: - ? !!python/object/new:multidict._istr.istr [Access-Control-Allow-Origin] - : '*' - ? !!python/object/new:multidict._istr.istr [Content-Language] - : en - ? !!python/object/new:multidict._istr.istr [Content-Type] - : application/xml;charset=UTF-8 - ? !!python/object/new:multidict._istr.istr [Date] - : Tue, 04 Dec 2018 14:10:47 GMT - ? !!python/object/new:multidict._istr.istr [Transfer-Encoding] - : chunked - status: {code: 200, message: null} - url: !!python/object/new:yarl.URL - state: !!python/tuple - - !!python/object/new:urllib.parse.SplitResult [http, 'localhost:8080', /thredds/catalog/ewc/work01/output/netcdf/catalog.xml, - '', ''] + Access-Control-Allow-Origin: '*' + Content-Language: en + Content-Type: application/xml;charset=UTF-8 + Date: Mon, 10 Aug 2020 10:37:59 GMT + Server: Apache + Strict-Transport-Security: max-age=0 + Transfer-Encoding: chunked + X-Content-Type-Options: nosniff + X-Frame-Options: SAMEORIGIN + X-XSS-Protection: 1; mode=block + vary: Origin + status: + code: 200 + message: '' + url: http://localhost:8080/thredds/catalog/ewc/data/work01/output/netcdf/catalog.xml version: 1 diff --git a/tests/test_crawler.py b/tests/test_crawler.py index 418bc32..e985e04 100644 --- a/tests/test_crawler.py +++ b/tests/test_crawler.py @@ -11,13 +11,14 @@ def expected_crawl_result(): with open('tests/fixtures/crawler.expected.json') as f: expected_datasets = json.load(f) - return expected_datasets + return sorted(expected_datasets, key=lambda d: d['id']) @pytest.mark.asyncio @vcr.use_cassette('tests/fixtures/crawler.vcr.yml') -async def test_crawl(event_loop, expected_crawl_result): +async def test_crawl(expected_crawl_result): catalog_url = 'http://localhost:8080/thredds/catalog.xml' - crawler = TDSCrawler(catalog_url, event_loop, maxtasks=5) + crawler = TDSCrawler(catalog_url, maxtasks=5) datasets = await asyncio.wait_for(crawler.run(), timeout=10) - assert sorted(datasets, key=lambda d: d['id']) == expected_crawl_result + sorted_datasets = sorted(datasets, key=lambda d: d['id']) + assert sorted_datasets == expected_crawl_result From 236a8868fd5fea847b4c3371bedd4d3f15e3d3dd Mon Sep 17 00:00:00 2001 From: Stefan Verhoeven Date: Mon, 10 Aug 2020 12:59:30 +0200 Subject: [PATCH 2/4] In pytest use xunit2 --- setup.cfg | 1 + 1 file changed, 1 insertion(+) diff --git a/setup.cfg b/setup.cfg index 52d0f8c..e884775 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,2 +1,3 @@ [tool:pytest] addopts = --cov=jupyterlab_thredds --cov-report xml --cov-report term --cov-branch --junit-xml xunit-reports/xunit-result-jupyterlab_thredds.xml +junit_family=xunit2 From a72dd8145022c16ed0c129dc527ffa3d2b00df7f Mon Sep 17 00:00:00 2001 From: Stefan Verhoeven Date: Mon, 10 Aug 2020 14:50:45 +0200 Subject: [PATCH 3/4] Cleanup --- README.md | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 89d2792..c82e63b 100644 --- a/README.md +++ b/README.md @@ -46,6 +46,7 @@ jlpm build jupyter labextension link . jupyter serverextension enable --sys-prefix jupyterlab_thredds ``` + (`jlpm` command is JupyterLab's pinned version of [yarn](https://yarnpkg.com/) that is installed with JupyterLab.) To rebuild the package and the JupyterLab app: @@ -56,6 +57,7 @@ jupyter lab build ``` Watch mode + ```bash # shell 1 jlpm watch @@ -66,11 +68,12 @@ jupyter lab --ip=0.0.0.0 --no-browser --watch ## Release To make a new release perform the following steps: + 1. Update version in `package.json` and `jupyterlab_thredds/version.py` 2. Record changes in `CHANGELOG.md` 3. Make sure tests pass by running `jlpm test` and `pytest` -5. Commit and push all changes -6. Publish lab extension to npmjs with `jlpm build` and `jlpm publish --access=public` -7. Publish server extension to pypi with `python setup.py sdist bdist_wheel` and `twine upload dist/*` -8. Create GitHub release -9. Update DOI in `CITATION.cff` +4. Commit and push all changes +5. Publish lab extension to npmjs with `jlpm build` and `jlpm publish --access=public` +6. Publish server extension to pypi with `python setup.py sdist bdist_wheel` and `twine upload dist/*` +7. Create GitHub release +8. Update DOI in `CITATION.cff` From 6e0974fb85f6aaa6aa3a480934eaadddff15c454 Mon Sep 17 00:00:00 2001 From: Stefan Verhoeven Date: Mon, 10 Aug 2020 14:50:51 +0200 Subject: [PATCH 4/4] Add crawl error test --- tests/test_crawler.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/tests/test_crawler.py b/tests/test_crawler.py index e985e04..af3d917 100644 --- a/tests/test_crawler.py +++ b/tests/test_crawler.py @@ -4,7 +4,7 @@ import pytest import vcr -from jupyterlab_thredds.crawler import TDSCrawler +from jupyterlab_thredds.crawler import TDSCrawler, CrawlerFetchError @pytest.fixture @@ -22,3 +22,12 @@ async def test_crawl(expected_crawl_result): datasets = await asyncio.wait_for(crawler.run(), timeout=10) sorted_datasets = sorted(datasets, key=lambda d: d['id']) assert sorted_datasets == expected_crawl_result + +@pytest.mark.asyncio +@vcr.use_cassette('tests/fixtures/crawler2.vcr.yml') +async def test_crawl_notfound(): + catalog_url = 'http://localhost:8888/thredds/catalog.xml' + crawler = TDSCrawler(catalog_url, maxtasks=5) + with pytest.raises(CrawlerFetchError) as excinfo: + await asyncio.wait_for(crawler.run(), timeout=10) + assert catalog_url in str(excinfo.value) \ No newline at end of file