Forráskód Böngészése

enhance build process to allow selective -O3 optimization

the motivation for this patch is that the vast majority of libc is
code that does not benefit at all from optimizations, but that certain
components like string/memory operations can be major performance
bottlenecks.

at the same time, the old -falign-*=1 options are removed, since they
were only beneficial for avoiding bloat when global -O3 was used, and
in that case, they may have prevented some of the performance gains.

to be the most useful, this patch will need further tuning. in
particular, research is needed to determine which components should be
built with -O3 by default, and it may be desirable to remove the
hard-coded -O3 and instead allow more customization of the
optimization level used for selected modules.
Rich Felker 11 éve
szülő
commit
a80847d86a
2 módosított fájl, 57 hozzáadás és 12 törlés
  1. 3 0
      Makefile
  2. 54 12
      configure

+ 3 - 0
Makefile

@@ -80,6 +80,9 @@ include/bits/alltypes.h: include/bits/alltypes.h.in include/alltypes.h.in tools/
 
 
 src/ldso/dynlink.lo: arch/$(ARCH)/reloc.h
 src/ldso/dynlink.lo: arch/$(ARCH)/reloc.h
 
 
+OPTIMIZE_SRCS = $(wildcard $(OPTIMIZE_GLOBS:%=src/%))
+$(OPTIMIZE_SRCS:%.c=%.o) $(OPTIMIZE_SRCS:%.c=%.lo): CFLAGS += -O3
+
 %.o: $(ARCH)/%.s
 %.o: $(ARCH)/%.s
 	$(CC) $(CFLAGS_ALL_STATIC) -c -o $@ $<
 	$(CC) $(CFLAGS_ALL_STATIC) -c -o $@ $<
 
 

+ 54 - 12
configure

@@ -24,6 +24,7 @@ System types:
   --host=HOST             same as --target
   --host=HOST             same as --target
 
 
 Optional features:
 Optional features:
+  --enable-optimize=...   optimize listed components for speed over size [auto]
   --enable-debug          build with debugging information [disabled]
   --enable-debug          build with debugging information [disabled]
   --enable-warnings       build with recommended warnings flags [disabled]
   --enable-warnings       build with recommended warnings flags [disabled]
   --enable-gcc-wrapper    build musl-gcc toolchain wrapper [auto]
   --enable-gcc-wrapper    build musl-gcc toolchain wrapper [auto]
@@ -104,6 +105,7 @@ fi
 CFLAGS_C99FSE=
 CFLAGS_C99FSE=
 CFLAGS_AUTO=
 CFLAGS_AUTO=
 LDFLAGS_AUTO=
 LDFLAGS_AUTO=
+OPTIMIZE_GLOBS=
 prefix=/usr/local/musl
 prefix=/usr/local/musl
 exec_prefix='$(prefix)'
 exec_prefix='$(prefix)'
 bindir='$(exec_prefix)/bin'
 bindir='$(exec_prefix)/bin'
@@ -111,6 +113,7 @@ libdir='$(prefix)/lib'
 includedir='$(prefix)/include'
 includedir='$(prefix)/include'
 syslibdir='/lib'
 syslibdir='/lib'
 target=
 target=
+optimize=auto
 debug=no
 debug=no
 warnings=no
 warnings=no
 shared=yes
 shared=yes
@@ -129,6 +132,9 @@ case "$arg" in
 --disable-shared|--enable-shared=no) shared=no ;;
 --disable-shared|--enable-shared=no) shared=no ;;
 --enable-static|--enable-static=yes) static=yes ;;
 --enable-static|--enable-static=yes) static=yes ;;
 --disable-static|--enable-static=no) static=no ;;
 --disable-static|--enable-static=no) static=no ;;
+--enable-optimize) optimize=yes ;;
+--enable-optimize=*) optimize=${arg#*=} ;;
+--disable-optimize) optimize=no ;;
 --enable-debug|--enable-debug=yes) debug=yes ;;
 --enable-debug|--enable-debug=yes) debug=yes ;;
 --disable-debug|--enable-debug=no) debug=no ;;
 --disable-debug|--enable-debug=no) debug=no ;;
 --enable-warnings|--enable-warnings=yes) warnings=yes ;;
 --enable-warnings|--enable-warnings=yes) warnings=yes ;;
@@ -230,14 +236,57 @@ tryflag CFLAGS_C99FSE -fexcess-precision=standard \
 || { test "$ARCH" = i386 && tryflag CFLAGS_C99FSE -ffloat-store ; }
 || { test "$ARCH" = i386 && tryflag CFLAGS_C99FSE -ffloat-store ; }
 tryflag CFLAGS_C99FSE -frounding-math
 tryflag CFLAGS_C99FSE -frounding-math
 
 
+
+#
+# If debugging is explicitly enabled, don't auto-enable optimizations
+#
+if test "$debug" = yes ; then
+CFLAGS_AUTO=-g
+test "$optimize" = auto && optimize=no
+fi
+
 #
 #
-# Setup basic default CFLAGS: debug, optimization, and -pipe
+# Possibly add a -O option to CFLAGS and select modules to optimize with
+# -O3 based on the status of --enable-optimize and provided CFLAGS.
 #
 #
-if fnmatch '-O*|*\ -O*' "$CFLAGS_AUTO $CFLAGS" ; then :
+printf "checking for optimization settings... "
+case "x$optimize" in
+xauto)
+if fnmatch '-O*|*\ -O*' "$CFLAGS_AUTO $CFLAGS" ; then
+printf "using provided CFLAGS\n" ;optimize=no
 else
 else
-tryflag CFLAGS_AUTO -Os || tryflag CFLAGS_AUTO -O2
+printf "using defaults\n" ; optimize=yes
 fi
 fi
-test "x$debug" = xyes && CFLAGS_AUTO="-g"
+;;
+xsize|xnone) printf "minimize size\n" ; optimize=size ;;
+xno|x) printf "disabled\n" ; optimize=no ;;
+*) printf "custom\n" ;;
+esac
+
+test "$optimize" = no || tryflag CFLAGS_AUTO -Os || tryflag CFLAGS_AUTO -O2
+test "$optimize" = yes && optimize="internal,malloc,math,string"
+
+if fnmatch 'no|size' "$optimize" ; then :
+else
+printf "components to be optimized for speed:"
+while test "$optimize" ; do
+case "$optimize" in
+*,*) this=${optimize%%,*} optimize=${optimize#*,} ;;
+*) this=$optimize optimize=
+esac
+printf " $this"
+case "$this" in
+*/*.c) ;;
+*/*) this=$this*.c ;;
+*) this=$this/*.c ;;
+esac
+OPTIMIZE_GLOBS="$OPTIMIZE_GLOBS $this"
+done
+OPTIMIZE_GLOBS=${OPTIMIZE_GLOBS# }
+printf "\n"
+fi
+
+# Always try -pipe
 tryflag CFLAGS_AUTO -pipe
 tryflag CFLAGS_AUTO -pipe
 
 
 #
 #
@@ -266,14 +315,6 @@ tryflag CFLAGS_AUTO -fno-asynchronous-unwind-tables
 #
 #
 tryflag CFLAGS_AUTO -Wa,--noexecstack
 tryflag CFLAGS_AUTO -Wa,--noexecstack
 
 
-#
-# Some optimization levels add bloated alignment that hurt performance
-#
-tryflag CFLAGS_AUTO -falign-functions=1
-tryflag CFLAGS_AUTO -falign-labels=1
-tryflag CFLAGS_AUTO -falign-loops=1
-tryflag CFLAGS_AUTO -falign-jumps=1
-
 #
 #
 # On x86, make sure we don't have incompatible instruction set
 # On x86, make sure we don't have incompatible instruction set
 # extensions enabled by default. This is bad for making static binaries.
 # extensions enabled by default. This is bad for making static binaries.
@@ -366,6 +407,7 @@ CPPFLAGS = $CPPFLAGS
 LDFLAGS = $LDFLAGS_AUTO $LDFLAGS
 LDFLAGS = $LDFLAGS_AUTO $LDFLAGS
 CROSS_COMPILE = $CROSS_COMPILE
 CROSS_COMPILE = $CROSS_COMPILE
 LIBCC = $LIBCC
 LIBCC = $LIBCC
+OPTIMIZE_GLOBS = $OPTIMIZE_GLOBS
 EOF
 EOF
 test "x$static" = xno && echo "STATIC_LIBS ="
 test "x$static" = xno && echo "STATIC_LIBS ="
 test "x$shared" = xno && echo "SHARED_LIBS ="
 test "x$shared" = xno && echo "SHARED_LIBS ="