[go: up one dir, main page]

Skip to content

Commit

Permalink
Merge pull request #225 from socialcopsdev/fix-204
Browse files Browse the repository at this point in the history
[MRG] Change suppress_warnings to suppress_stdout
  • Loading branch information
vinayak-mehta authored Dec 12, 2018
2 parents 50780e2 + 591cfd5 commit 40217be
Show file tree
Hide file tree
Showing 8 changed files with 35 additions and 18 deletions.
1 change: 1 addition & 0 deletions HISTORY.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ master
**Improvements**

* [#207](https://github.com/socialcopsdev/camelot/issues/207) Add a plot type for Stream text edges and detected table areas. [#224](https://github.com/socialcopsdev/camelot/pull/224) by Vinayak Mehta.
* [#204](https://github.com/socialcopsdev/camelot/issues/204) `suppress_warnings` is now called `suppress_stdout`. [#225](https://github.com/socialcopsdev/camelot/pull/225) by Vinayak Mehta.

**Bugfixes**

Expand Down
10 changes: 5 additions & 5 deletions camelot/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ def set_config(self, key, value):

@click.group()
@click.version_option(version=__version__)
@click.option('-q', '--quiet', is_flag=False, help='Suppress logs and warnings.')
@click.option('-p', '--pages', default='1', help='Comma-separated page numbers.'
' Example: 1,3,4 or 1,4-end.')
@click.option('-pw', '--password', help='Password for decryption.')
Expand All @@ -44,7 +45,6 @@ def set_config(self, key, value):
' font size. Useful to detect super/subscripts.')
@click.option('-M', '--margins', nargs=3, default=(1.0, 0.5, 0.1),
help='PDFMiner char_margin, line_margin and word_margin.')
@click.option('-q', '--quiet', is_flag=True, help='Suppress warnings.')
@click.pass_context
def cli(ctx, *args, **kwargs):
"""Camelot: PDF Table Extraction for Humans"""
Expand Down Expand Up @@ -96,7 +96,7 @@ def lattice(c, *args, **kwargs):
output = conf.pop('output')
f = conf.pop('format')
compress = conf.pop('zip')
suppress_warnings = conf.pop('quiet')
quiet = conf.pop('quiet')
plot_type = kwargs.pop('plot_type')
filepath = kwargs.pop('filepath')
kwargs.update(conf)
Expand All @@ -117,7 +117,7 @@ def lattice(c, *args, **kwargs):
raise click.UsageError('Please specify output file format using --format')

tables = read_pdf(filepath, pages=pages, flavor='lattice',
suppress_warnings=suppress_warnings, **kwargs)
suppress_stdout=quiet, **kwargs)
click.echo('Found {} tables'.format(tables.n))
if plot_type is not None:
for table in tables:
Expand Down Expand Up @@ -149,7 +149,7 @@ def stream(c, *args, **kwargs):
output = conf.pop('output')
f = conf.pop('format')
compress = conf.pop('zip')
suppress_warnings = conf.pop('quiet')
quiet = conf.pop('quiet')
plot_type = kwargs.pop('plot_type')
filepath = kwargs.pop('filepath')
kwargs.update(conf)
Expand All @@ -169,7 +169,7 @@ def stream(c, *args, **kwargs):
raise click.UsageError('Please specify output file format using --format')

tables = read_pdf(filepath, pages=pages, flavor='stream',
suppress_warnings=suppress_warnings, **kwargs)
suppress_stdout=quiet, **kwargs)
click.echo('Found {} tables'.format(tables.n))
if plot_type is not None:
for table in tables:
Expand Down
6 changes: 4 additions & 2 deletions camelot/handlers.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ def _save_page(self, filename, page, temp):
with open(fpath, 'wb') as f:
outfile.write(f)

def parse(self, flavor='lattice', **kwargs):
def parse(self, flavor='lattice', suppress_stdout=False, **kwargs):
"""Extracts tables by calling parser.get_tables on all single
page PDFs.
Expand All @@ -134,6 +134,8 @@ def parse(self, flavor='lattice', **kwargs):
flavor : str (default: 'lattice')
The parsing method to use ('lattice' or 'stream').
Lattice is used by default.
suppress_stdout : str (default: False)
Suppress logs and warnings.
kwargs : dict
See camelot.read_pdf kwargs.
Expand All @@ -151,6 +153,6 @@ def parse(self, flavor='lattice', **kwargs):
for p in self.pages]
parser = Lattice(**kwargs) if flavor == 'lattice' else Stream(**kwargs)
for p in pages:
t = parser.extract_tables(p)
t = parser.extract_tables(p, suppress_stdout=suppress_stdout)
tables.extend(t)
return TableList(tables)
10 changes: 5 additions & 5 deletions camelot/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@


def read_pdf(filepath, pages='1', password=None, flavor='lattice',
suppress_warnings=False, **kwargs):
suppress_stdout=False, **kwargs):
"""Read PDF and return extracted tables.
Note: kwargs annotated with ^ can only be used with flavor='stream'
Expand All @@ -24,8 +24,8 @@ def read_pdf(filepath, pages='1', password=None, flavor='lattice',
flavor : str (default: 'lattice')
The parsing method to use ('lattice' or 'stream').
Lattice is used by default.
suppress_warnings : bool, optional (default: False)
Prevent warnings from being emitted by Camelot.
suppress_stdout : bool, optional (default: True)
Print all logs and warnings.
table_areas : list, optional (default: None)
List of table area strings of the form x1,y1,x2,y2
where (x1, y1) -> left-top and (x2, y2) -> right-bottom
Expand Down Expand Up @@ -92,11 +92,11 @@ def read_pdf(filepath, pages='1', password=None, flavor='lattice',
" Use either 'lattice' or 'stream'")

with warnings.catch_warnings():
if suppress_warnings:
if suppress_stdout:
warnings.simplefilter("ignore")

validate_input(kwargs, flavor=flavor)
p = PDFHandler(filepath, pages=pages, password=password)
kwargs = remove_extra(kwargs, flavor=flavor)
tables = p.parse(flavor=flavor, **kwargs)
tables = p.parse(flavor=flavor, suppress_stdout=suppress_stdout, **kwargs)
return tables
5 changes: 3 additions & 2 deletions camelot/parsers/lattice.py
Original file line number Diff line number Diff line change
Expand Up @@ -345,9 +345,10 @@ def _generate_table(self, table_idx, cols, rows, **kwargs):

return table

def extract_tables(self, filename):
def extract_tables(self, filename, suppress_stdout=False):
self._generate_layout(filename)
logger.info('Processing {}'.format(os.path.basename(self.rootname)))
if not suppress_stdout:
logger.info('Processing {}'.format(os.path.basename(self.rootname)))

if not self.horizontal_text:
warnings.warn("No tables found on {}".format(
Expand Down
5 changes: 3 additions & 2 deletions camelot/parsers/stream.py
Original file line number Diff line number Diff line change
Expand Up @@ -384,9 +384,10 @@ def _generate_table(self, table_idx, cols, rows, **kwargs):

return table

def extract_tables(self, filename):
def extract_tables(self, filename, suppress_stdout=False):
self._generate_layout(filename)
logger.info('Processing {}'.format(os.path.basename(self.rootname)))
if not suppress_stdout:
logger.info('Processing {}'.format(os.path.basename(self.rootname)))

if not self.horizontal_text:
warnings.warn("No tables found on {}".format(
Expand Down
2 changes: 1 addition & 1 deletion docs/user/cli.rst
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ You can print the help for the interface by typing ``camelot --help`` in your fa

Options:
--version Show the version and exit.
-v, --verbose Verbose.
-p, --pages TEXT Comma-separated page numbers. Example: 1,3,4
or 1,4-end.
-pw, --password TEXT Password for decryption.
Expand All @@ -28,7 +29,6 @@ You can print the help for the interface by typing ``camelot --help`` in your fa
-M, --margins <FLOAT FLOAT FLOAT>...
PDFMiner char_margin, line_margin and
word_margin.
-q, --quiet Suppress warnings.
--help Show this message and exit.

Commands:
Expand Down
14 changes: 13 additions & 1 deletion tests/test_errors.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,13 +50,25 @@ def test_no_tables_found():
assert str(e.value) == 'No tables found on page-1'


def test_no_tables_found_logs_suppressed():
filename = os.path.join(testdir, 'foo.pdf')
with warnings.catch_warnings():
# the test should fail if any warning is thrown
warnings.simplefilter('error')
try:
tables = camelot.read_pdf(filename, suppress_stdout=True)
except Warning as e:
warning_text = str(e)
pytest.fail('Unexpected warning: {}'.format(warning_text))


def test_no_tables_found_warnings_suppressed():
filename = os.path.join(testdir, 'blank.pdf')
with warnings.catch_warnings():
# the test should fail if any warning is thrown
warnings.simplefilter('error')
try:
tables = camelot.read_pdf(filename, suppress_warnings=True)
tables = camelot.read_pdf(filename, suppress_stdout=True)
except Warning as e:
warning_text = str(e)
pytest.fail('Unexpected warning: {}'.format(warning_text))
Expand Down

0 comments on commit 40217be

Please sign in to comment.