[go: up one dir, main page]

Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[MRG + 1] Add suppress_warnings flag #155

Merged
merged 3 commits into from
Oct 19, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 7 additions & 2 deletions camelot/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ def set_config(self, key, value):
' font size. Useful to detect super/subscripts.')
@click.option('-M', '--margins', nargs=3, default=(1.0, 0.5, 0.1),
help='PDFMiner char_margin, line_margin and word_margin.')
@click.option('-q', '--quiet', is_flag=True, help='Suppress warnings.')
@click.pass_context
def cli(ctx, *args, **kwargs):
"""Camelot: PDF Table Extraction for Humans"""
Expand Down Expand Up @@ -89,6 +90,7 @@ def lattice(c, *args, **kwargs):
output = conf.pop('output')
f = conf.pop('format')
compress = conf.pop('zip')
suppress_warnings = conf.pop('quiet')
plot_type = kwargs.pop('plot_type')
filepath = kwargs.pop('filepath')
kwargs.update(conf)
Expand All @@ -99,7 +101,8 @@ def lattice(c, *args, **kwargs):
kwargs['copy_text'] = None if not copy_text else copy_text
kwargs['shift_text'] = list(kwargs['shift_text'])

tables = read_pdf(filepath, pages=pages, flavor='lattice', **kwargs)
tables = read_pdf(filepath, pages=pages, flavor='lattice',
suppress_warnings=suppress_warnings, **kwargs)
click.echo('Found {} tables'.format(tables.n))
if plot_type is not None:
for table in tables:
Expand Down Expand Up @@ -134,6 +137,7 @@ def stream(c, *args, **kwargs):
output = conf.pop('output')
f = conf.pop('format')
compress = conf.pop('zip')
suppress_warnings = conf.pop('quiet')
plot_type = kwargs.pop('plot_type')
filepath = kwargs.pop('filepath')
kwargs.update(conf)
Expand All @@ -143,7 +147,8 @@ def stream(c, *args, **kwargs):
columns = list(kwargs['columns'])
kwargs['columns'] = None if not columns else columns

tables = read_pdf(filepath, pages=pages, flavor='stream', **kwargs)
tables = read_pdf(filepath, pages=pages, flavor='stream',
suppress_warnings=suppress_warnings, **kwargs)
click.echo('Found {} tables'.format(tables.n))
if plot_type is not None:
for table in tables:
Expand Down
20 changes: 14 additions & 6 deletions camelot/io.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
# -*- coding: utf-8 -*-
import warnings

from .handlers import PDFHandler
from .utils import validate_input, remove_extra


def read_pdf(filepath, pages='1', flavor='lattice', **kwargs):
def read_pdf(filepath, pages='1', flavor='lattice', suppress_warnings=False,
**kwargs):
"""Read PDF and return extracted tables.

Note: kwargs annotated with ^ can only be used with flavor='stream'
Expand All @@ -20,6 +22,8 @@ def read_pdf(filepath, pages='1', flavor='lattice', **kwargs):
flavor : str (default: 'lattice')
The parsing method to use ('lattice' or 'stream').
Lattice is used by default.
suppress_warnings : bool, optional (default: False)
Prevent warnings from being emitted by Camelot.
table_area : list, optional (default: None)
List of table area strings of the form x1,y1,x2,y2
where (x1, y1) -> left-top and (x2, y2) -> right-bottom
Expand Down Expand Up @@ -85,8 +89,12 @@ def read_pdf(filepath, pages='1', flavor='lattice', **kwargs):
raise NotImplementedError("Unknown flavor specified."
" Use either 'lattice' or 'stream'")

validate_input(kwargs, flavor=flavor)
p = PDFHandler(filepath, pages)
kwargs = remove_extra(kwargs, flavor=flavor)
tables = p.parse(flavor=flavor, **kwargs)
return tables
with warnings.catch_warnings():
if suppress_warnings:
warnings.simplefilter("ignore")

validate_input(kwargs, flavor=flavor)
p = PDFHandler(filepath, pages)
kwargs = remove_extra(kwargs, flavor=flavor)
tables = p.parse(flavor=flavor, **kwargs)
return tables
14 changes: 14 additions & 0 deletions tests/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,3 +77,17 @@ def test_cli_output_format():
result = runner.invoke(cli, ['--zip', '--format', 'csv', '--output', outfile.format('csv'),
'stream', infile])
assert result.exit_code == 0

def test_cli_quiet_flag():
with TemporaryDirectory() as tempdir:
infile = os.path.join(testdir, 'blank.pdf')
outfile = os.path.join(tempdir, 'blank.csv')
runner = CliRunner()

result = runner.invoke(cli, ['--format', 'csv', '--output', outfile,
'stream', infile])
assert 'No tables found on page-1' in result.output

result = runner.invoke(cli, ['--quiet', '--format', 'csv',
'--output', outfile, 'stream', infile])
assert 'No tables found on page-1' not in result.output
2 changes: 1 addition & 1 deletion tests/test_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,7 @@ def test_lattice_shift_text():
tables = camelot.read_pdf(filename, line_size_scaling=40, shift_text=['r', 'b'])
assert df_rb.equals(tables[0].df)


def test_repr():
filename = os.path.join(testdir, "foo.pdf")
tables = camelot.read_pdf(filename)
Expand Down
19 changes: 14 additions & 5 deletions tests/test_errors.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,11 +43,20 @@ def test_stream_equal_length():

def test_no_tables_found():
filename = os.path.join(testdir, 'blank.pdf')
# TODO: use pytest.warns
with warnings.catch_warnings():
warnings.simplefilter('error')
try:
with pytest.raises(UserWarning) as e:
tables = camelot.read_pdf(filename)
except Exception as e:
assert type(e).__name__ == 'UserWarning'
assert str(e) == 'No tables found on page-1'
assert str(e.value) == 'No tables found on page-1'
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@jonathanlloyd Should this be one indent level down?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Its a bit weird but that's actually how it's documented: https://docs.pytest.org/en/latest/assert.html
It seems to work 🤷‍♂️

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh. I looked at the tests for requests and they seem to do it this way too. https://github.com/requests/requests/blob/master/tests/test_requests.py#L817 If you ever find why pytest implemented it this way, do comment here!

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Very odd!



def test_no_tables_found_warnings_supressed():
filename = os.path.join(testdir, 'blank.pdf')
with warnings.catch_warnings():
# the test should fail if any warning is thrown
warnings.simplefilter('error')
try:
tables = camelot.read_pdf(filename, suppress_warnings=True)
except Warning as e:
warning_text = str(e)
pytest.fail('Unexpected warning: {}'.format(warning_text))