[go: up one dir, main page]

Skip to content

Commit

Permalink
[MRG + 1] Add suppress_warnings flag (#155)
Browse files Browse the repository at this point in the history
* Add suppress_warnings flag

* Add --quiet flag to cli (to suppress warnings)

* Remove TODO and update comment
  • Loading branch information
jonathanlloyd authored and vinayak-mehta committed Oct 19, 2018
1 parent 1d064ad commit 3def4a5
Show file tree
Hide file tree
Showing 5 changed files with 50 additions and 14 deletions.
9 changes: 7 additions & 2 deletions camelot/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ def set_config(self, key, value):
' font size. Useful to detect super/subscripts.')
@click.option('-M', '--margins', nargs=3, default=(1.0, 0.5, 0.1),
help='PDFMiner char_margin, line_margin and word_margin.')
@click.option('-q', '--quiet', is_flag=True, help='Suppress warnings.')
@click.pass_context
def cli(ctx, *args, **kwargs):
"""Camelot: PDF Table Extraction for Humans"""
Expand Down Expand Up @@ -89,6 +90,7 @@ def lattice(c, *args, **kwargs):
output = conf.pop('output')
f = conf.pop('format')
compress = conf.pop('zip')
suppress_warnings = conf.pop('quiet')
plot_type = kwargs.pop('plot_type')
filepath = kwargs.pop('filepath')
kwargs.update(conf)
Expand All @@ -99,7 +101,8 @@ def lattice(c, *args, **kwargs):
kwargs['copy_text'] = None if not copy_text else copy_text
kwargs['shift_text'] = list(kwargs['shift_text'])

tables = read_pdf(filepath, pages=pages, flavor='lattice', **kwargs)
tables = read_pdf(filepath, pages=pages, flavor='lattice',
suppress_warnings=suppress_warnings, **kwargs)
click.echo('Found {} tables'.format(tables.n))
if plot_type is not None:
for table in tables:
Expand Down Expand Up @@ -134,6 +137,7 @@ def stream(c, *args, **kwargs):
output = conf.pop('output')
f = conf.pop('format')
compress = conf.pop('zip')
suppress_warnings = conf.pop('quiet')
plot_type = kwargs.pop('plot_type')
filepath = kwargs.pop('filepath')
kwargs.update(conf)
Expand All @@ -143,7 +147,8 @@ def stream(c, *args, **kwargs):
columns = list(kwargs['columns'])
kwargs['columns'] = None if not columns else columns

tables = read_pdf(filepath, pages=pages, flavor='stream', **kwargs)
tables = read_pdf(filepath, pages=pages, flavor='stream',
suppress_warnings=suppress_warnings, **kwargs)
click.echo('Found {} tables'.format(tables.n))
if plot_type is not None:
for table in tables:
Expand Down
20 changes: 14 additions & 6 deletions camelot/io.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
# -*- coding: utf-8 -*-
import warnings

from .handlers import PDFHandler
from .utils import validate_input, remove_extra


def read_pdf(filepath, pages='1', flavor='lattice', **kwargs):
def read_pdf(filepath, pages='1', flavor='lattice', suppress_warnings=False,
**kwargs):
"""Read PDF and return extracted tables.
Note: kwargs annotated with ^ can only be used with flavor='stream'
Expand All @@ -20,6 +22,8 @@ def read_pdf(filepath, pages='1', flavor='lattice', **kwargs):
flavor : str (default: 'lattice')
The parsing method to use ('lattice' or 'stream').
Lattice is used by default.
suppress_warnings : bool, optional (default: False)
Prevent warnings from being emitted by Camelot.
table_area : list, optional (default: None)
List of table area strings of the form x1,y1,x2,y2
where (x1, y1) -> left-top and (x2, y2) -> right-bottom
Expand Down Expand Up @@ -85,8 +89,12 @@ def read_pdf(filepath, pages='1', flavor='lattice', **kwargs):
raise NotImplementedError("Unknown flavor specified."
" Use either 'lattice' or 'stream'")

validate_input(kwargs, flavor=flavor)
p = PDFHandler(filepath, pages)
kwargs = remove_extra(kwargs, flavor=flavor)
tables = p.parse(flavor=flavor, **kwargs)
return tables
with warnings.catch_warnings():
if suppress_warnings:
warnings.simplefilter("ignore")

validate_input(kwargs, flavor=flavor)
p = PDFHandler(filepath, pages)
kwargs = remove_extra(kwargs, flavor=flavor)
tables = p.parse(flavor=flavor, **kwargs)
return tables
14 changes: 14 additions & 0 deletions tests/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,3 +77,17 @@ def test_cli_output_format():
result = runner.invoke(cli, ['--zip', '--format', 'csv', '--output', outfile.format('csv'),
'stream', infile])
assert result.exit_code == 0

def test_cli_quiet_flag():
with TemporaryDirectory() as tempdir:
infile = os.path.join(testdir, 'blank.pdf')
outfile = os.path.join(tempdir, 'blank.csv')
runner = CliRunner()

result = runner.invoke(cli, ['--format', 'csv', '--output', outfile,
'stream', infile])
assert 'No tables found on page-1' in result.output

result = runner.invoke(cli, ['--quiet', '--format', 'csv',
'--output', outfile, 'stream', infile])
assert 'No tables found on page-1' not in result.output
2 changes: 1 addition & 1 deletion tests/test_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,7 @@ def test_lattice_shift_text():
tables = camelot.read_pdf(filename, line_size_scaling=40, shift_text=['r', 'b'])
assert df_rb.equals(tables[0].df)


def test_repr():
filename = os.path.join(testdir, "foo.pdf")
tables = camelot.read_pdf(filename)
Expand Down
19 changes: 14 additions & 5 deletions tests/test_errors.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,11 +43,20 @@ def test_stream_equal_length():

def test_no_tables_found():
filename = os.path.join(testdir, 'blank.pdf')
# TODO: use pytest.warns
with warnings.catch_warnings():
warnings.simplefilter('error')
try:
with pytest.raises(UserWarning) as e:
tables = camelot.read_pdf(filename)
except Exception as e:
assert type(e).__name__ == 'UserWarning'
assert str(e) == 'No tables found on page-1'
assert str(e.value) == 'No tables found on page-1'


def test_no_tables_found_warnings_supressed():
filename = os.path.join(testdir, 'blank.pdf')
with warnings.catch_warnings():
# the test should fail if any warning is thrown
warnings.simplefilter('error')
try:
tables = camelot.read_pdf(filename, suppress_warnings=True)
except Warning as e:
warning_text = str(e)
pytest.fail('Unexpected warning: {}'.format(warning_text))

0 comments on commit 3def4a5

Please sign in to comment.