Get texidoc translations out of snippets source files

Texidoc translations are inserted in .ly snippets at build stage instead of makelsr.py run. This simplifies overall maintenance of snippets, in particular this avoids Git committish update headache for translators. See final discussion at http://lists.gnu.org/archive/html/lilypond-devel/2012-06/msg00438.html
author: John Mandereau <john.mandereau@gmail.com> 2012-06-28 13:30:05 +0200
committer: John Mandereau <john.mandereau@gmail.com> 2012-06-29 16:40:55 +0200
commit: 512d405d07aba8742658902b105a94ebbd40c2dc (patch)
tree: 15bb99b1c56385ac6f8a2956ca8e43b01fae8e87 /scripts
parent: 2047ac3ecaefeface8ca66d8199590b39c317a1c (diff)
2 files changed, 213 insertions, 133 deletions
diff --git a/scripts/auxiliar/makelsr.py b/scripts/auxiliar/makelsr.py
index 6927476be9..61526963be 100755
--- a/scripts/auxiliar/makelsr.py
+++ b/scripts/auxiliar/makelsr.py
@@ -4,22 +4,26 @@ import sys
 import os
 import glob
 import re
+import optparse
+import tempfile
 
-sys.path.append ('python')
-import langdefs
+lilypond_flags = "-dno-print-pages -dsafe"
+
+lys_from_lsr = os.path.join ('Documentation', 'snippets')
+new_lys = os.path.join ('Documentation', 'snippets', 'new')
+ly_output = os.path.join (tempfile.gettempdir (), 'lsrtest')
+
+# which convert-ly to use
+if os.path.isfile ("out/bin/convert-ly"):
+    conv_path = "out/bin/"
+elif os.path.isfile ("build/out/bin/convert-ly"):
+    conv_path = "build/out/bin/"
+else:
+    conv_path=''
+convert_ly = conv_path + 'convert-ly'
+lilypond_bin = conv_path + 'lilypond'
 
-DEST = os.path.join ('Documentation', 'snippets')
-NEW_LYS = os.path.join ('Documentation', 'snippets', 'new')
-TEXIDOCS = [os.path.join ('Documentation', language_code, 'texidocs')
-            for language_code in langdefs.LANGDICT]
 
-USAGE = '''  Usage: makelsr.py [LSR_SNIPPETS_DIR]
-This script must be run from top of the source tree;
-it updates snippets %(DEST)s with snippets
-from %(NEW_LYS)s or LSR_SNIPPETS_DIR.
-If a snippet is present in both directories, the one
-from %(NEW_LYS)s is preferred.
-''' % vars ()
 
 LY_HEADER_LSR = '''%% DO NOT EDIT this file manually; it is automatically
 %% generated from LSR http://lsr.dsi.unimi.it
@@ -35,38 +39,141 @@ LY_HEADER_NEW = '''%% DO NOT EDIT this file manually; it is automatically
 %% and then run scripts/auxiliar/makelsr.py
 %%
 %% This file is in the public domain.
-''' % NEW_LYS
+''' % new_lys
+
+options_parser = optparse.OptionParser (
+    description = "makelsr - update snippets directory from LSR",
+    usage = '''%%prog [options] [LSR_SNIPPETS_DIR]
+Unless -s option is specified, this script must be run from top of the
+source tree. If LSR_SNIPPETS_DIR is not specified, it defaults to
+current directory.
+
+Remove snippets in TOP_SOURCE_DIR/%(lys_from_lsr)s and put in snippets
+from LSR_SNIPPETS_DIR run through convert-ly or from
+TOP_SOURCE_DIR/%(new_lys)s; if a snippet is present in both
+directories, the one from TOP_SOURCE_DIR/%(new_lys)s is preferred.
+All written snippets are copied in LY_OUTPUT
+with appending translations from .texidoc files and are tested with
+lilypond with flags %(lilypond_flags)s
+
+''' % vars ())
+
+options_parser.add_option ('-s', '--top-source',
+                           dest="top_source_dir",
+                           action="store",
+                           metavar="TOP_SOURCE_DIR",
+                           default=".",
+                           help="set LilyPond top source directory")
+
+options_parser.add_option ('-o', '--ly-output',
+                           dest="ly_output",
+                           action="store",
+                           metavar="LY_OUTPUT",
+                           default=ly_output,
+                           help="set LilyPond output files temporary directory")
+
+options_parser.add_option ('-p', '--path',
+                           dest="bin_path",
+                           action="store",
+                           metavar="LY_PATH",
+                           default="out/bin",
+                           help="directory where looking for LilyPond binaries")
+
+options_parser.add_option ('-c', '--convert-ly',
+                           dest="convert_ly",
+                           action="store",
+                           metavar="CONVERT-LY",
+                           default="LY_PATH/convert-ly",
+                           help="convert-ly binary to use")
+
+options_parser.add_option ('-l', '--lilypond-binary',
+                           dest="lilypond_bin",
+                           action="store",
+                           metavar="LILYPOND_BIN",
+                           default="LY_PATH/lilypond",
+                           help="lilypond binary to use")
+
+(options, args) = options_parser.parse_args ()
+
+if not os.path.isdir (options.top_source_dir):
+    sys.stderr.write ("Error: top source: %s: not a directory\n" % options.top_source_dir)
+    sys.exit (4)
+
+lys_from_lsr = os.path.normpath (os.path.join (options.top_source_dir, lys_from_lsr))
+new_lys = os.path.normpath (os.path.join (options.top_source_dir, new_lys))
+sys.path.append (os.path.normpath (os.path.join (options.top_source_dir, 'python')))
+import langdefs
+texidoc_dirs = [
+    os.path.normpath (os.path.join (options.top_source_dir, 'Documentation', language_code, 'texidocs'))
+    for language_code in langdefs.LANGDICT]
+
+if not os.path.isdir (lys_from_lsr):
+    sys.stderr.write ("Error: snippets path: %s: not a directory\n" % lys_from_lsr)
+    sys.exit (3)
+if not os.path.isdir (new_lys):
+    sys.stderr.write ("Error: new snippets path: %s: not a directory\n" % lys_from_lsr)
+    sys.exit (3)
+
+ly_output_ok = False
+if os.path.isdir (options.ly_output):
+    ly_output = options.ly_output
+    ly_output_ok = True
+elif os.path.exists (options.ly_output):
+    try:
+        os.unlink (options.ly_output)
+    except Exception as e:
+        sys.stderr.write ("Warning: could not delete file before creating directory: %s\n" % e)
+    else:
+        try:
+            os.makedirs (options.ly_output)
+        except Exception as e:
+            sys.stderr.write ("Warning: could not create directory: %s\n" % e)
+        else:
+            ly_output = options.ly_output
+            ly_output_ok = True
+else:
+    try:
+        os.makedirs (options.ly_output)
+    except Exception as e:
+        sys.stderr.write ("Warning: could not create directory: %s\n" % e)
+    else:
+        ly_output = options.ly_output
+        ly_output_ok = True
+if not ly_output_ok:
+    ly_output = tempfile.gettempdir ()
+    sys.stderr.write ("Warning: could not use or create directory %s, using default %s\n" % (options.ly_output, ly_output))
 
 def exit_with_usage (n=0):
-    sys.stderr.write (USAGE)
+    options_parser.print_help (sys.stderr)
     sys.exit (n)
 
-TAGS = []
-
-if len (sys.argv) >= 2:
-    in_dir = sys.argv[1]
+if len (args):
+    in_dir = args[0]
     if not (os.path.isdir (in_dir)):
-        sys.stderr.write (in_dir + ' is not a directory.\n')
-        exit (2)
-    if len (sys.argv) >= 3:
+        sys.stderr.write ("Error: %s: not a directory\n" % in_dir)
+        sys.exit (4)
+    if len (args) > 1:
         exit_with_usage (2)
-    if not (os.path.isdir (DEST) and os.path.isdir (NEW_LYS)):
-        exit_with_usage (3)
-    TAGS = os.listdir (in_dir)
 else:
-    in_dir = ''
+    in_dir = '.'
 
-# which convert-ly to use
-if os.path.isfile("out/bin/convert-ly"):
-    conv_path='out/bin/'
-elif os.path.isfile("build/out/bin/convert-ly"):
-    conv_path='build/out/bin/'
+if options.convert_ly == "LY_PATH/convert-ly":
+    convert_ly = os.path.join (options.bin_path, "convert-ly")
 else:
-    conv_path=''
-convert_ly=conv_path+'convert-ly'
-lilypond_bin=conv_path+'lilypond'
+    convert_ly = options.convert_ly
+if not os.path.isfile (convert_ly):
+    sys.stderr.write ("Warning: %s: no such file")
+    convert_ly = "convert-ly"
+if options.lilypond_bin == "LY_PATH/lilypond":
+    lilypond_bin = os.path.join (options.bin_path, "lilypond")
+else:
+    lilypond_bin = options.lilypond_bin
+if not os.path.isfile (lilypond_bin):
+    sys.stderr.write ("Warning: %s: no such file")
+    lilypond_bin = "lilypond"
+sys.stderr.write ("Using %s, %s\n" % (convert_ly, lilypond_bin))
 
-print 'using '+convert_ly
+tags = os.listdir (in_dir)
 
 unsafe = []
 unconverted = []
@@ -104,32 +211,15 @@ def add_version (ly_code):
     return '''%% Note: this file works from version ''' + \
         ly_new_version_re.search (ly_code).group (1) + '\n'
 
-s = 'Translation of GIT [Cc]ommittish'
-texidoc_chunk_re = re.compile (r'^(?:%+\s*' + s + \
-    r'.+)?\s*(?:texidoc|doctitle)([a-zA-Z]{2,4})\s+=(?:.|\n)*?(?=%+\s*' + \
-    s + r'|\n\} % begin verbatim|\n  (?:doctitle|texidoc|lsrtags) |$(?!.|\n))', re.M)
-
-def update_translated_texidoc (m, snippet_path, visited_languages):
-    base = os.path.splitext (os.path.basename (snippet_path))[0]
-    language_code = m.group (1)
-    visited_languages.append (language_code)
-    texidoc_path = os.path.join ('Documentation', language_code,
-                                 'texidocs', base + '.texidoc')
-    if os.path.isfile (texidoc_path):
-        return open (texidoc_path).read ()
-    else:
-        return m.group (0)
-
 def escape_backslashes_in_header(snippet):
     # ASSUME: the \header exists.
     header_char_number_start = snippet.find('\header {')
     header_char_number_end = snippet.find('} % begin verbatim')
 
     header = snippet[header_char_number_start:header_char_number_end]
-    # two levels of escaping happening here -- 4\ means 1\
-    # and the 10\ means two \ backslashes (that's 8\ ), and
-    # one backreference to group 1 (that's two 2\ ).
-    new_header = re.sub("@code\{\\\\([a-zA-Z])", "@code{\\\\\\\\\\1", header)
+    # only one level of escaping happening here
+    # thanks to raw strings
+    new_header = re.sub(r"@code\{\\([a-zA-Z])", r"@code{\\\\\1", header)
     escaped_snippet = (snippet[:header_char_number_start] +
         new_header + snippet[header_char_number_end:])
     return escaped_snippet
@@ -137,50 +227,42 @@ def escape_backslashes_in_header(snippet):
 def copy_ly (srcdir, name, tags):
     global unsafe
     global unconverted
-    dest = os.path.join (DEST, name)
+    dest = os.path.join (lys_from_lsr, name)
     tags = ', '.join (tags)
-    s = open (os.path.join (srcdir, name)).read ()
-    sys.stderr.write ('\nmakelsr.py: reading ' + os.path.join (srcdir, name) + '\n')
-
-    for path in TEXIDOCS:
-        texidoc_translation_path = \
-            os.path.join (path, os.path.splitext (name)[0] + '.texidoc')
-        if os.path.exists (texidoc_translation_path):
-            texidoc_translation = open (texidoc_translation_path).read ()
-            # Since we want to insert the translations verbatim using a 
-            # regexp, \\ is understood as ONE escaped backslash. So we have
-            # to escape those backslashes once more...
-            texidoc_translation = texidoc_translation.replace ('\\', '\\\\')
-            s = begin_header_re.sub ('\\g<0>\n' + texidoc_translation, s, 1)
+    file_path = os.path.join (srcdir, name)
+    sys.stderr.write ("\nmakelsr.py: reading %s\n" % file_path)
+    s = open (file_path).read ()
 
     s = doctitle_re.sub (doctitle_sub, s)
-    if in_dir and in_dir in srcdir:
-        s = LY_HEADER_LSR + add_tags (s, tags)
-    else:
+    if "new" in srcdir:
         s = LY_HEADER_NEW + add_version (s) + s
+    else:
+        s = LY_HEADER_LSR + add_tags (s, tags)
 
     s = mark_verbatim_section (s)
     s = lsr_comment_re.sub ('', s)
     s = strip_white_spaces_re.sub ('', s)
     s = escape_backslashes_in_header (s)
+    sys.stderr.write ("makelsr.py: writing %s\n" % dest)
     open (dest, 'w').write (s)
-    sys.stderr.write ('makelsr.py: writing ' + dest + '\n')
 
     e = os.system (convert_ly+(" -d -e '%s'" % dest))
     if e:
         unconverted.append (dest)
     if os.path.exists (dest + '~'):
         os.remove (dest + '~')
-    # no need to check snippets from input/new
-    if in_dir and in_dir in srcdir:
-        e = os.system ("%s -dno-print-pages -dsafe -o /tmp/lsrtest '%s'" %(lilypond_bin, dest))
+    # no need to check snippets from Documentation/snippets/new
+    if not "new" in srcdir:
+        e = os.system (
+            "%s %s -o %s '%s'" %
+            (lilypond_bin, lilypond_flags, ly_output, dest))
         if e:
             unsafe.append (dest)
 
 def read_source_with_dirs (src):
     s = {}
     l = {}
-    for tag in TAGS:
+    for tag in tags:
         srcdir = os.path.join (src, tag)
         l[tag] = set (map (os.path.basename,
                            glob.glob (os.path.join (srcdir, '*.ly'))))
@@ -196,14 +278,14 @@ tags_re = re.compile ('lsrtags\\s*=\\s*"(.+?)"')
 
 def read_source (src):
     s = {}
-    l = dict ([(tag, set()) for tag in TAGS])
+    l = dict ([(tag, set()) for tag in tags])
     for f in glob.glob (os.path.join (src, '*.ly')):
         basename = os.path.basename (f)
         m = tags_re.search (open (f, 'r').read ())
         if m:
             file_tags = [tag.strip() for tag in m.group (1). split(',')]
             s[basename] = (src, file_tags)
-            [l[tag].add (basename) for tag in file_tags if tag in TAGS]
+            [l[tag].add (basename) for tag in file_tags if tag in tags]
         else:
             notags_files.append (f)
     return s, l
@@ -219,62 +301,23 @@ def dump_file_list (file, file_list, update=False):
     f = open (file, 'w')
     f.write ('\n'.join (sorted (new_list)) + '\n')
 
-def update_ly_in_place (snippet_path):
-    visited_languages = []
-    contents = open (snippet_path).read ()
-    contents = texidoc_chunk_re.sub \
-        (lambda m: update_translated_texidoc (m,
-                                              snippet_path,
-                                              visited_languages),
-         contents)
-    need_line_break_workaround = False
-    for language_code in langdefs.LANGDICT:
-        if not language_code in visited_languages:
-            base = os.path.splitext (os.path.basename (snippet_path))[0]
-            texidoc_path = os.path.join ('Documentation', language_code,
-                         'texidocs', base + '.texidoc')
-            if os.path.isfile (texidoc_path):
-                texidoc_translation = open (texidoc_path).read ()
-                texidoc_translation = texidoc_translation.replace ('\\', '\\\\')
-                contents = begin_header_re.sub ('\\g<0>\n' + texidoc_translation, contents, 1)
-        else:
-            need_line_break_workaround = True
-    contents = doctitle_re.sub (doctitle_sub, contents)
-    contents = escape_backslashes_in_header (contents)
-
-    # workaround for a bug in the regex's that I'm not smart
-    # enough to figure out.  -gp
-    if need_line_break_workaround:
-        first_translated = contents.find('%% Translation of')
-        keep = contents[:first_translated+5]
-        contents = keep + contents[first_translated+5:].replace('%% Translation of', '\n%% Translation of')
-
-    open (snippet_path, 'w').write (contents)
-
-if in_dir:
-    ## clean out existing lys and generated files
-    map (os.remove, glob.glob (os.path.join (DEST, '*.ly')) +
-         glob.glob (os.path.join (DEST, '*.snippet-list')))
-
-    # read LSR source where tags are defined by subdirs
-    snippets, tag_lists = read_source_with_dirs (in_dir)
-
-    # read input/new where tags are directly defined
-    s, l = read_source (NEW_LYS)
-    snippets.update (s)
-    for t in TAGS:
-        tag_lists[t].update (l[t])
-else:
-    snippets, tag_lists = read_source (NEW_LYS)
-    ## update texidocs of snippets that don't come from NEW_LYS
-    for snippet_path in glob.glob (os.path.join (DEST, '*.ly')):
-        if not os.path.basename (snippet_path) in snippets:
-            update_ly_in_place (snippet_path)
+## clean out existing lys and generated files
+map (os.remove, glob.glob (os.path.join (lys_from_lsr, '*.ly')) +
+     glob.glob (os.path.join (lys_from_lsr, '*.snippet-list')))
+
+# read LSR source where tags are defined by subdirs
+snippets, tag_lists = read_source_with_dirs (in_dir)
+
+# read input/new where tags are directly defined
+s, l = read_source (new_lys)
+snippets.update (s)
+for t in tags:
+    tag_lists[t].update (l[t])
 
 for (name, (srcdir, tags)) in snippets.items ():
     copy_ly (srcdir, name, tags)
 for (tag, file_set) in tag_lists.items ():
-    dump_file_list (os.path.join (DEST, tag + '.snippet-list'),
+    dump_file_list (os.path.join (lys_from_lsr, tag + '.snippet-list'),
                     file_set, update=not(in_dir))
 if unconverted:
     sys.stderr.write ('These files could not be converted successfully by convert-ly:\n')
@@ -287,7 +330,7 @@ if unsafe:
     sys.stderr.write ('''
 
 Unsafe files printed in lsr-unsafe.txt: CHECK MANUALLY!
-  git add %s/*.ly
+  git add %(lys_from_lsr)s/*.ly
   xargs git diff HEAD < lsr-unsafe.txt
 
-''' % DEST)
+''' % vars ())
diff --git a/scripts/build/makesnippets.py b/scripts/build/makesnippets.py
new file mode 100644
index 0000000000..9e7c5a63fe
--- /dev/null
+++ b/scripts/build/makesnippets.py
@@ -0,0 +1,37 @@
+#!@PYTHON@
+# makesnippets.py
+
+'''USAGE: makesnippets.py INPUT_DIR OUTPUT_DIR DOC_DIR
+
+Read all .ly files from INPUT_DIR, insert translations from .texidoc
+files found in DOC_DIR/LANG/texdiocs, and write ther result to OUTPUT_DIR.'''
+
+import glob
+import sys
+import os.path
+import re
+
+import langdefs
+
+(input_dir, output_dir, doc_dir) = sys.argv[1:4]
+
+texidoc_dirs = [os.path.join (doc_dir, language_code, 'texidocs')
+                for language_code in langdefs.LANGDICT]
+
+begin_header_re = re.compile (r'\\header\s*{', re.M)
+
+for f in glob.glob (os.path.join (input_dir, '*.ly')):
+    name = os.path.basename (f)
+    s = open (f).read ()
+    for path in texidoc_dirs:
+        texidoc_translation_path = \
+            os.path.join (path, os.path.splitext (name)[0] + '.texidoc')
+        if os.path.exists (texidoc_translation_path):
+            texidoc_translation = open (texidoc_translation_path).read ()
+            # Since we want to insert the translations verbatim using a
+            # regexp, \\ is understood as ONE escaped backslash. So we have
+            # to escape those backslashes once more...
+            texidoc_translation = texidoc_translation.replace ('\\', '\\\\')
+            s = begin_header_re.sub ('\\g<0>\n' + texidoc_translation, s, 1)
+    dest = os.path.join (output_dir, name)
+    open (dest, 'w').write (s)
author	John Mandereau <john.mandereau@gmail.com>	2012-06-28 13:30:05 +0200
committer	John Mandereau <john.mandereau@gmail.com>	2012-06-29 16:40:55 +0200
commit	512d405d07aba8742658902b105a94ebbd40c2dc (patch)
tree	15bb99b1c56385ac6f8a2956ca8e43b01fae8e87 /scripts
parent	2047ac3ecaefeface8ca66d8199590b39c317a1c (diff)