Comment 28 for bug 1417470

Matic (matic-ivancic) wrote :

I found the same problem. On windows I fixed it with installing Inkscape version 0.48 but I cannot do this under ubuntu 16.04. Therefore I wrote a script in python. There is my simple script for converting .svg images to .pdf_tex format:

'''
Inkscape 0.92 have a bug while exporting svg image to pdf_tex.
The pdf_tex my be quite usefull while writting manuscripts in Latex:
https://waterprogramming.wordpress.com/2015/07/28/embedding-figure-text-into-a-latex-document/

The problems is because number of pages in pdf and pdf_tex and exported
files usually differs (bug). Both options are possible: number of pages
in pdf can be higher or lower to the number of pages, specified in pdf_tex
file.

Presented solution is tested on Ubuntu 16.04 and is written in python 2.7
(pypdf is only available for python 2).

Run this script: put svg figure name as argument:
python pdf_correct.py figure.svg

Created on Thursday 6 Apr 2017.

@author: Matic
'''

import sys
import os
from pyPdf import PdfFileReader
import re
import shutil

if len(sys.argv) != 2:
 print 'You must put figure name as an argument.'
 sys.exit()

svg_name = sys.argv[1]
pdf_name = svg_name.replace('svg', 'pdf')
tex_name = pdf_name.replace('pdf', 'pdf_tex')

# run inkscape and make pdf_tex
# os.system('inkscape fig_sketch.svg --export-pdf fig_sketch.pdf --export-latex')
os.system('inkscape %s --export-pdf %s --export-latex' % (svg_name, pdf_name))

# read number of pages in pdf file
pdf = PdfFileReader(open(pdf_name,'rb'))
n_pages_pdf = pdf.getNumPages()
print '# pages in pdf: ', n_pages_pdf

# find number of pages in pdf_tex file - use regular expresions
pattern = re.compile('page=\d+')
n_pages_tex = 0
with open(tex_name, 'r') as f:
 for line in f:
  res = re.search(pattern, line)
  if res:
   i = int(res.group().replace('page=', ''))
   n_pages_tex = max(i, n_pages_tex)
   # save line form
   page_line = line

print '# pages in pdf_tex: ', n_pages_tex

# make new, corrected pdf_tex file if it is needed
tex_name_new = tex_name + '_new'

if n_pages_tex < n_pages_pdf:
 # in this case you need to add/include additional pages
 with open(tex_name, 'r') as f_old:
  with open(tex_name_new, 'w') as f_new:
   for line in f_old:
    if '\end{picture}%' not in line:
     # rewrite almost all lines
     f_new.write(line)
    else:
     # add missing pages
     for i in range(n_pages_tex+1, n_pages_pdf+1):
      # page_line - use saved form of line
      res = re.search(pattern, page_line)
      old_part = res.group()
      new_part = 'page=%d' % i
      f_new.write(page_line.replace(old_part, new_part))
     f_new.write(line)
 shutil.move(tex_name_new, tex_name)

elif n_pages_tex > n_pages_pdf:
 # you need to delete included pages that not exist in pdf file
 with open(tex_name, 'r') as f_old:
  with open(tex_name_new, 'w') as f_new:
   for line in f_old:
    res = re.search(pattern, line)
    if res:
     # if 'page=' is in line, check the numeber
     i = int(res.group().replace('page=', ''))
     if i <= n_pages_pdf:
      f_new.write(line)
     # else:
     # you have a problem here, don't rewrite line to new file

    else:
     # rewrite all lines without 'page='
     f_new.write(line)
 shutil.move(tex_name_new, tex_name)

#else:
# otherwise: do nothing, the number of pages is already the same in both files