# DP: hppa has some nasty alignment requirements for certain data structures.
# DP: ldcw, the only parisc atomic operation, needs to have its operand
# DP: 16 byte aligned.  Currently, STACK_BOUNDARY and BIGGEST_ALIGNMENT are
# DP: both 64 bits on pa32, so it's not possible to use a local var for a
# DP: lock, at least not without doing backflips.  This patch increases the
# DP: allowed alignment to 16 bytes, and also introduces another target macro
# DP: to allow local var alignment to 16 bytes without forcing a similar
# DP: alignment on STARTING_FRAME_OFFSET (which would waste stack space).
# DP: 
# DP: gcc/ChangeLog
# DP: 	* config/pa/pa.h (PREFERRED_STACK_BOUNDARY): Define.
# DP: 	(BIGGEST_ALIGNMENT): Change from 64 to 128.
# DP: 	(ALIGN_STARTING_FRAME): Define.
# DP: 	* doc/tm.texi (ALIGN_STARTING_FRAME): Document.
# DP: 	* function.c (assign_stack_local_1): Use ALIGN_STARTING_FRAME.
# DP: 

Index: gcc/doc/tm.texi
===================================================================
RCS file: /cvs/gcc/gcc/gcc/doc/tm.texi,v
retrieving revision 1.63
diff -u -p -r1.63 tm.texi
--- gcc/doc/tm.texi	2001/10/14 17:44:00	1.63
+++ gcc/doc/tm.texi	2001/10/19 10:36:10
@@ -2600,6 +2600,13 @@ value @code{STARTING_FRAME_OFFSET}.
 @c i'm not sure if the above is still correct.. had to change it to get
 @c rid of an overfull.  --mew 2feb93
 
+@findex ALIGN_STARTING_FRAME
+@item ALIGN_STARTING_FRAME
+If defined, local variable stack slots will first have this value added
+to the offset before calculating address alignment padding.  This is
+useful when STARTING_FRAME_OFFSET is not a multiple of BIGGEST_ALIGNMENT
+or PREFERRED_STACK_BOUNDARY.
+
 @findex STACK_POINTER_OFFSET
 @item STACK_POINTER_OFFSET
 Offset from the stack pointer register to the first location at which
Index: gcc/function.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/function.c,v
retrieving revision 1.314
diff -u -p -r1.314 function.c
--- gcc/function.c	2001/10/11 03:15:40	1.314
+++ gcc/function.c	2001/10/19 10:35:54
@@ -526,6 +526,7 @@ assign_stack_local_1 (mode, size, align,
   rtx x, addr;
   int bigend_correction = 0;
   int alignment;
+  HOST_WIDE_INT offset;
 
   if (align == 0)
     {
@@ -552,8 +553,10 @@ assign_stack_local_1 (mode, size, align,
   else
     alignment = align / BITS_PER_UNIT;
 
+  offset = function->x_frame_offset;
+
 #ifdef FRAME_GROWS_DOWNWARD
-  function->x_frame_offset -= size;
+  offset -= size;
 #endif
 
   /* Ignore alignment we can't do with expected alignment of the boundary.  */
@@ -568,11 +571,17 @@ assign_stack_local_1 (mode, size, align,
      division with a negative dividend isn't as well defined as we might
      like.  So we instead assume that ALIGNMENT is a power of two and
      use logical operations which are unambiguous.  */
+#ifdef ALIGN_STARTING_FRAME
+  offset += ALIGN_STARTING_FRAME;
+#endif
 #ifdef FRAME_GROWS_DOWNWARD
-  function->x_frame_offset = FLOOR_ROUND (function->x_frame_offset, alignment);
+  offset = FLOOR_ROUND (offset, alignment);
 #else
-  function->x_frame_offset = CEIL_ROUND (function->x_frame_offset, alignment);
+  offset = CEIL_ROUND (offset, alignment);
 #endif
+#ifdef ALIGN_STARTING_FRAME
+  offset -= ALIGN_STARTING_FRAME;
+#endif
 
   /* On a big-endian machine, if we are allocating more space than we will use,
      use the least significant bytes of those that are allocated.  */
@@ -583,15 +592,16 @@ assign_stack_local_1 (mode, size, align,
      address relative to the frame pointer.  */
   if (function == cfun && virtuals_instantiated)
     addr = plus_constant (frame_pointer_rtx,
-			  (frame_offset + bigend_correction
-			   + STARTING_FRAME_OFFSET));
+			  offset + bigend_correction + STARTING_FRAME_OFFSET);
   else
     addr = plus_constant (virtual_stack_vars_rtx,
-			  function->x_frame_offset + bigend_correction);
+			  offset + bigend_correction);
 
 #ifndef FRAME_GROWS_DOWNWARD
-  function->x_frame_offset += size;
+  offset += size;
 #endif
+
+  function->x_frame_offset = offset;
 
   x = gen_rtx_MEM (mode, addr);
 
Index: gcc/config/pa/pa.h
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/pa/pa.h,v
retrieving revision 1.126
diff -u -p -r1.126 pa.h
--- gcc/config/pa/pa.h	2001/09/24 16:21:09	1.126
+++ gcc/config/pa/pa.h	2001/10/19 10:35:57
@@ -423,6 +423,8 @@ extern int target_flags;
    but that happens late in the compilation process.  */
 #define STACK_BOUNDARY (TARGET_64BIT ? 128 : 64)
 
+#define PREFERRED_STACK_BOUNDARY 512
+
 /* Allocation boundary (in *bits*) for the code of a function.  */
 #define FUNCTION_BOUNDARY (TARGET_64BIT ? 64 : 32)
 
@@ -436,7 +438,7 @@ extern int target_flags;
 #define PCC_BITFIELD_TYPE_MATTERS 1
 
 /* No data type wants to be aligned rounder than this.  */
-#define BIGGEST_ALIGNMENT 64
+#define BIGGEST_ALIGNMENT 128
 
 /* Get around hp-ux assembler bug, and make strcpy of constants fast. */
 #define CONSTANT_ALIGNMENT(CODE, TYPEALIGN) \
@@ -621,6 +623,7 @@ extern struct rtx_def *hppa_pic_save_rtx
    first local allocated.  Otherwise, it is the offset to the BEGINNING
    of the first local allocated.  */
 #define STARTING_FRAME_OFFSET 8
+#define ALIGN_STARTING_FRAME STARTING_FRAME_OFFSET
 
 /* If we generate an insn to push BYTES bytes,
    this says how many the stack pointer really advances by.

diff -urpN -xCVS gnu_gcc/configure.in gcc/configure.in
--- gnu_gcc/configure.in	Mon Sep  3 02:50:38 2001
+++ gcc/configure.in	Mon Sep  3 03:43:14 2001
@@ -303,7 +303,7 @@ if [ x${shared} = xyes ]; then
     arm*-*-*)
       host_makefile_frag="${host_makefile_frag} config/mh-armpic"
       ;;
-    hppa*-*-*)
+    parisc*-*-* | hppa*-*-*)
       host_makefile_frag="${host_makefile_frag} config/mh-papic"
       ;;
     i[3456]86-*-cygwin*)
@@ -729,8 +729,14 @@ case "${target}" in
   h8500-*-*)
     noconfigdirs="$noconfigdirs ${libstdcxx_version} target-librx target-libgloss ${libgcj}"
     ;;
+  hppa*64*-*-linux*)
+    # In this case, it's because the hppa64-linux target is for
+    # the kernel only at this point and has no libc, and thus no
+    # headers, crt*.o, etc., all of which are needed by these.
+    noconfigdirs="$noconfigdirs target-zlib"
+    ;;
   hppa*-*-*elf* | \
-  hppa*-*-linux* | \
+  parisc*-*-linux* | hppa*-*-linux* | \
   hppa*-*-lites* | \
   hppa*64*-*-*)
     noconfigdirs="$noconfigdirs ${libgcj}"
@@ -1232,7 +1238,7 @@ fi

 if [ x${shared} = xyes ]; then
   case "${target}" in
-    hppa*)
+    parisc* | hppa*)
       target_makefile_frag="${target_makefile_frag} config/mt-papic"
       ;;
     i[3456]86-*)
diff -urpN -xCVS gnu_gcc/fastjar/Makefile.in gcc/fastjar/Makefile.in
--- gnu_gcc/fastjar/Makefile.in	Thu Jun 28 21:31:32 2001
+++ gcc/fastjar/Makefile.in	Thu Jun 28 21:59:50 2001
@@ -325,7 +325,7 @@ distdir: $(DISTFILES)
 	@for file in $(DISTFILES); do \
 	  d=$(srcdir); \
 	  if test -d $$d/$$file; then \
-	    cp -pr $$/$$file $(distdir)/$$file; \
+	    cp -pr $$d/$$file $(distdir)/$$file; \
 	  else \
 	    test -f $(distdir)/$$file \
 	    || ln $$d/$$file $(distdir)/$$file 2> /dev/null \
diff -urpN -xCVS gnu_gcc/gcc/config/elfos.h gcc/gcc/config/elfos.h
--- gnu_gcc/gcc/config/elfos.h	Mon Apr 16 18:12:31 2001
+++ gcc/gcc/config/elfos.h	Mon Apr 16 18:44:38 2001
@@ -479,6 +479,7 @@ dtors_section ()						\
    or a constant of some sort.  RELOC indicates whether forming
    the initial value of DECL requires link-time relocations.  */

+#undef SELECT_SECTION
 #define SELECT_SECTION(DECL, RELOC)				\
 {								\
   if (TREE_CODE (DECL) == STRING_CST)				\
diff -urpN -xCVS gnu_gcc/gcc/config/pa/milli32.S gcc/gcc/config/pa/milli32.S
--- gnu_gcc/gcc/config/pa/milli32.S	Wed Dec 31 17:00:00 1969
+++ gcc/gcc/config/pa/milli32.S	Mon Feb 19 06:54:41 2001
@@ -0,0 +1,1134 @@
+;  Low level integer divide, multiply, remainder, etc routines for the HPPA.
+;  Copyright 1995, 2000, 2001 Free Software Foundation, Inc.
+
+;  This file is part of GNU CC.
+
+;  GNU CC is free software; you can redistribute it and/or modify
+;  it under the terms of the GNU General Public License as published by
+;  the Free Software Foundation; either version 2, or (at your option)
+;  any later version.
+
+;  In addition to the permissions in the GNU General Public License, the
+;  Free Software Foundation gives you unlimited permission to link the
+;  compiled version of this file with other programs, and to distribute
+;  those programs without any restriction coming from the use of this
+;  file.  (The General Public License restrictions do apply in other
+;  respects; for example, they cover modification of the file, and
+;  distribution when not linked into another program.)
+
+;  GNU CC is distributed in the hope that it will be useful,
+;  but WITHOUT ANY WARRANTY; without even the implied warranty of
+;  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;  GNU General Public License for more details.
+
+;  You should have received a copy of the GNU General Public License
+;  along with GNU CC; see the file COPYING.  If not, write to
+;  the Free Software Foundation, 59 Temple Place - Suite 330,
+;  Boston, MA 02111-1307, USA.
+
+#ifdef __STDC__
+#define CAT(a,b)	a##b
+#else
+#define CAT(a,b)	a/**/b
+#endif
+
+#ifdef ELF
+
+#define SPACE \
+! .text! .align 4
+#define GSYM(sym) \
+! .export sym,millicode!sym:
+#define LSYM(sym) \
+!CAT(.L,sym:)
+#define LREF(sym) CAT(.L,sym)
+
+#else
+
+#define SPACE \
+! .space $TEXT$! .subspa $MILLICODE$,quad=0,align=8,access=0x2c,sort=8! .align 4
+#define GSYM(sym) \
+! .export sym,millicode!sym
+#define LSYM(sym) \
+!CAT(L$,sym)
+#define LREF(sym) CAT(L$,sym)
+#endif
+
+#ifdef L_dyncall
+SPACE
+GSYM($$dyncall)
+	.proc
+	.callinfo	frame=0,no_calls
+	.entry
+	bb,>=,n	%r22,30,LREF(1)		; branch if not plabel address
+	depi	0,31,2,%r22		; clear the two least significant bits
+	ldw	4(%r22),%r19		; load new LTP value
+	ldw	0(%r22),%r22		; load address of target
+LSYM(1)
+#ifdef LINUX
+	bv	%r0(%r22)		; branch to the real target
+#else
+	ldsid	(%sr0,%r22),%r1		; get the "space ident" selected by r22
+	mtsp	%r1,%sr0		; move that space identifier into sr0
+	be	0(%sr0,%r22)		; branch to the real target
+#endif
+	stw	%r2,-24(%r30)		; save return address into frame marker
+	.exit
+	.procend
+#endif
+
+
+#ifdef L_multiply
+#define	op0	%r26
+#define	op1	%r25
+#define res	%r29
+#define ret	%r31
+#define tmp	%r1
+
+SPACE
+GSYM($$mulU)
+GSYM($$mulI)
+	.proc
+	.callinfo	frame=0,no_calls
+	.entry
+	addi,tr		0,%r0,res	; clear out res, skip next insn
+LSYM(loop)
+	zdep		op1,26,27,op1	; shift up op1 by 5
+LSYM(lo)
+	zdep		op0,30,5,tmp	; extract next 5 bits and shift up
+	blr		tmp,%r0
+	extru		op0,26,27,op0	; shift down op0 by 5
+LSYM(0)
+	comib,<>	0,op0,LREF(lo)
+	zdep		op1,26,27,op1	; shift up op1 by 5
+	bv		%r0(ret)
+	nop
+LSYM(1)
+	b		LREF(loop)
+	addl		op1,res,res
+	nop
+	nop
+LSYM(2)
+	b		LREF(loop)
+	sh1addl		op1,res,res
+	nop
+	nop
+LSYM(3)
+	sh1addl		op1,op1,tmp	; 3x
+	b		LREF(loop)
+	addl		tmp,res,res
+	nop
+LSYM(4)
+	b		LREF(loop)
+	sh2addl		op1,res,res
+	nop
+	nop
+LSYM(5)
+	sh2addl		op1,op1,tmp	; 5x
+	b		LREF(loop)
+	addl		tmp,res,res
+	nop
+LSYM(6)
+	sh1addl		op1,op1,tmp	; 3x
+	b		LREF(loop)
+	sh1addl		tmp,res,res
+	nop
+LSYM(7)
+	zdep		op1,28,29,tmp	; 8x
+	sub		tmp,op1,tmp	; 7x
+	b		LREF(loop)
+	addl		tmp,res,res
+LSYM(8)
+	b		LREF(loop)
+	sh3addl		op1,res,res
+	nop
+	nop
+LSYM(9)
+	sh3addl		op1,op1,tmp	; 9x
+	b		LREF(loop)
+	addl		tmp,res,res
+	nop
+LSYM(10)
+	sh2addl		op1,op1,tmp	; 5x
+	b		LREF(loop)
+	sh1addl		tmp,res,res
+	nop
+LSYM(11)
+	sh2addl		op1,op1,tmp	; 5x
+	sh1addl		tmp,op1,tmp	; 11x
+	b		LREF(loop)
+	addl		tmp,res,res
+LSYM(12)
+	sh1addl		op1,op1,tmp	; 3x
+	b		LREF(loop)
+	sh2addl		tmp,res,res
+	nop
+LSYM(13)
+	sh1addl		op1,op1,tmp	; 3x
+	sh2addl		tmp,op1,tmp	; 13x
+	b		LREF(loop)
+	addl		tmp,res,res
+LSYM(14)
+	zdep		op1,28,29,tmp	; 8x
+	sub		tmp,op1,tmp	; 7x
+	b		LREF(loop)
+	sh1addl		tmp,res,res
+LSYM(15)
+	zdep		op1,27,28,tmp	; 16x
+	sub		tmp,op1,tmp	; 15x
+	b		LREF(loop)
+	addl		tmp,res,res
+LSYM(16)
+	zdep		op1,27,28,tmp	; 16x
+	b		LREF(loop)
+	addl		tmp,res,res
+	nop
+LSYM(17)
+	zdep		op1,27,28,tmp	; 16x
+	addl		tmp,op1,tmp	; 17x
+	b		LREF(loop)
+	addl		tmp,res,res
+LSYM(18)
+	sh3addl		op1,op1,tmp	; 9x
+	b		LREF(loop)
+	sh1addl		tmp,res,res
+	nop
+LSYM(19)
+	sh3addl		op1,op1,tmp	; 9x
+	sh1addl		tmp,op1,tmp	; 19x
+	b		LREF(loop)
+	addl		tmp,res,res
+LSYM(20)
+	sh2addl		op1,op1,tmp	; 5x
+	b		LREF(loop)
+	sh2addl		tmp,res,res
+	nop
+LSYM(21)
+	sh2addl		op1,op1,tmp	; 5x
+	sh2addl		tmp,op1,tmp	; 21x
+	b		LREF(loop)
+	addl		tmp,res,res
+LSYM(22)
+	sh2addl		op1,op1,tmp	; 5x
+	sh1addl		tmp,op1,tmp	; 11x
+	b		LREF(loop)
+	sh1addl		tmp,res,res
+LSYM(23)
+	sh1addl		op1,op1,tmp	; 3x
+	sh3addl		tmp,res,res	; += 8x3
+	b		LREF(loop)
+	sub		res,op1,res	; -= x
+LSYM(24)
+	sh1addl		op1,op1,tmp	; 3x
+	b		LREF(loop)
+	sh3addl		tmp,res,res	; += 8x3
+	nop
+LSYM(25)
+	sh2addl		op1,op1,tmp	; 5x
+	sh2addl		tmp,tmp,tmp	; 25x
+	b		LREF(loop)
+	addl		tmp,res,res
+LSYM(26)
+	sh1addl		op1,op1,tmp	; 3x
+	sh2addl		tmp,op1,tmp	; 13x
+	b		LREF(loop)
+	sh1addl		tmp,res,res	; += 2x13
+LSYM(27)
+	sh1addl		op1,op1,tmp	; 3x
+	sh3addl		tmp,tmp,tmp	; 27x
+	b		LREF(loop)
+	addl		tmp,res,res
+LSYM(28)
+	zdep		op1,28,29,tmp	; 8x
+	sub		tmp,op1,tmp	; 7x
+	b		LREF(loop)
+	sh2addl		tmp,res,res	; += 4x7
+LSYM(29)
+	sh1addl		op1,op1,tmp	; 3x
+	sub		res,tmp,res	; -= 3x
+	b		LREF(foo)
+	zdep		op1,26,27,tmp	; 32x
+LSYM(30)
+	zdep		op1,27,28,tmp	; 16x
+	sub		tmp,op1,tmp	; 15x
+	b		LREF(loop)
+	sh1addl		tmp,res,res	; += 2x15
+LSYM(31)
+	zdep		op1,26,27,tmp	; 32x
+	sub		tmp,op1,tmp	; 31x
+LSYM(foo)
+	b		LREF(loop)
+	addl		tmp,res,res
+	.exit
+	.procend
+#endif
+
+
+#ifdef L_divU
+#define dividend %r26
+#define divisor %r25
+#define tmp %r1
+#define quotient %r29
+#define ret %r31
+
+SPACE
+GSYM($$divU)
+	.proc
+	.callinfo	frame=0,no_calls
+	.entry
+	comb,<		divisor,0,LREF(largedivisor)
+	 sub		%r0,divisor,%r1		; clear cy as side-effect
+	ds		%r0,%r1,%r0
+	addc		dividend,dividend,dividend
+	ds		%r0,divisor,%r1
+	addc		dividend,dividend,dividend
+	ds		%r1,divisor,%r1
+	addc		dividend,dividend,dividend
+	ds		%r1,divisor,%r1
+	addc		dividend,dividend,dividend
+	ds		%r1,divisor,%r1
+	addc		dividend,dividend,dividend
+	ds		%r1,divisor,%r1
+	addc		dividend,dividend,dividend
+	ds		%r1,divisor,%r1
+	addc		dividend,dividend,dividend
+	ds		%r1,divisor,%r1
+	addc		dividend,dividend,dividend
+	ds		%r1,divisor,%r1
+	addc		dividend,dividend,dividend
+	ds		%r1,divisor,%r1
+	addc		dividend,dividend,dividend
+	ds		%r1,divisor,%r1
+	addc		dividend,dividend,dividend
+	ds		%r1,divisor,%r1
+	addc		dividend,dividend,dividend
+	ds		%r1,divisor,%r1
+	addc		dividend,dividend,dividend
+	ds		%r1,divisor,%r1
+	addc		dividend,dividend,dividend
+	ds		%r1,divisor,%r1
+	addc		dividend,dividend,dividend
+	ds		%r1,divisor,%r1
+	addc		dividend,dividend,dividend
+	ds		%r1,divisor,%r1
+	addc		dividend,dividend,dividend
+	ds		%r1,divisor,%r1
+	addc		dividend,dividend,dividend
+	ds		%r1,divisor,%r1
+	addc		dividend,dividend,dividend
+	ds		%r1,divisor,%r1
+	addc		dividend,dividend,dividend
+	ds		%r1,divisor,%r1
+	addc		dividend,dividend,dividend
+	ds		%r1,divisor,%r1
+	addc		dividend,dividend,dividend
+	ds		%r1,divisor,%r1
+	addc		dividend,dividend,dividend
+	ds		%r1,divisor,%r1
+	addc		dividend,dividend,dividend
+	ds		%r1,divisor,%r1
+	addc		dividend,dividend,dividend
+	ds		%r1,divisor,%r1
+	addc		dividend,dividend,dividend
+	ds		%r1,divisor,%r1
+	addc		dividend,dividend,dividend
+	ds		%r1,divisor,%r1
+	addc		dividend,dividend,dividend
+	ds		%r1,divisor,%r1
+	addc		dividend,dividend,dividend
+	ds		%r1,divisor,%r1
+	addc		dividend,dividend,dividend
+	ds		%r1,divisor,%r1
+	addc		dividend,dividend,dividend
+	ds		%r1,divisor,%r1
+	addc		dividend,dividend,quotient
+	ds		%r1,divisor,%r1
+	bv		%r0(ret)
+	addc		quotient,quotient,quotient
+LSYM(largedivisor)
+	comclr,<<	dividend,divisor,quotient
+	ldi		1,quotient
+	bv,n		%r0(ret)
+	.exit
+	.procend
+#endif
+
+
+#ifdef L_remU
+#define dividend %r26
+#define divisor %r25
+#define quotient %r29
+#define tmp %r1
+#define ret %r31
+
+SPACE
+GSYM($$remU)
+	.proc
+	.callinfo	frame=0,no_calls
+	.entry
+	comb,<		divisor,0,LREF(largedivisor)
+	 sub		%r0,divisor,%r1		; clear cy as side-effect
+	ds		%r0,%r1,%r0
+	addc		dividend,dividend,dividend
+	ds		%r0,divisor,%r1
+	addc		dividend,dividend,dividend
+	ds		%r1,divisor,%r1
+	addc		dividend,dividend,dividend
+	ds		%r1,divisor,%r1
+	addc		dividend,dividend,dividend
+	ds		%r1,divisor,%r1
+	addc		dividend,dividend,dividend
+	ds		%r1,divisor,%r1
+	addc		dividend,dividend,dividend
+	ds		%r1,divisor,%r1
+	addc		dividend,dividend,dividend
+	ds		%r1,divisor,%r1
+	addc		dividend,dividend,dividend
+	ds		%r1,divisor,%r1
+	addc		dividend,dividend,dividend
+	ds		%r1,divisor,%r1
+	addc		dividend,dividend,dividend
+	ds		%r1,divisor,%r1
+	addc		dividend,dividend,dividend
+	ds		%r1,divisor,%r1
+	addc		dividend,dividend,dividend
+	ds		%r1,divisor,%r1
+	addc		dividend,dividend,dividend
+	ds		%r1,divisor,%r1
+	addc		dividend,dividend,dividend
+	ds		%r1,divisor,%r1
+	addc		dividend,dividend,dividend
+	ds		%r1,divisor,%r1
+	addc		dividend,dividend,dividend
+	ds		%r1,divisor,%r1
+	addc		dividend,dividend,dividend
+	ds		%r1,divisor,%r1
+	addc		dividend,dividend,dividend
+	ds		%r1,divisor,%r1
+	addc		dividend,dividend,dividend
+	ds		%r1,divisor,%r1
+	addc		dividend,dividend,dividend
+	ds		%r1,divisor,%r1
+	addc		dividend,dividend,dividend
+	ds		%r1,divisor,%r1
+	addc		dividend,dividend,dividend
+	ds		%r1,divisor,%r1
+	addc		dividend,dividend,dividend
+	ds		%r1,divisor,%r1
+	addc		dividend,dividend,dividend
+	ds		%r1,divisor,%r1
+	addc		dividend,dividend,dividend
+	ds		%r1,divisor,%r1
+	addc		dividend,dividend,dividend
+	ds		%r1,divisor,%r1
+	addc		dividend,dividend,dividend
+	ds		%r1,divisor,%r1
+	addc		dividend,dividend,dividend
+	ds		%r1,divisor,%r1
+	addc		dividend,dividend,dividend
+	ds		%r1,divisor,%r1
+	addc		dividend,dividend,dividend
+	ds		%r1,divisor,%r1
+	addc		dividend,dividend,dividend
+	ds		%r1,divisor,%r1
+	addc		dividend,dividend,quotient
+	ds		%r1,divisor,%r1
+	comclr,>=	%r1,%r0,%r0
+	addl		%r1,divisor,%r1
+	bv		%r0(ret)
+	copy		%r1,quotient
+LSYM(largedivisor)
+	sub,>>=		dividend,divisor,quotient
+	copy		dividend,quotient
+	bv,n		%r0(ret)
+	.exit
+	.procend
+#endif
+
+
+#ifdef L_divI
+#define dividend %r26
+#define divisor %r25
+#define quotient %r29
+#define tmp %r1
+#define ret %r31
+
+SPACE
+GSYM($$divI)
+	.proc
+	.callinfo	frame=0,no_calls
+	.entry
+	xor		dividend,divisor,quotient	; result sign
+	comclr,>=	divisor,%r0,%r0			; get absolute values
+	sub		%r0,divisor,divisor
+	comclr,>=	dividend,%r0,%r0
+	sub		%r0,dividend,dividend
+
+	comb,<		divisor,0,LREF(largedivisor)
+	 sub		%r0,divisor,%r1		; clear cy as side-effect
+	ds		%r0,%r1,%r0
+	addc		dividend,dividend,dividend
+	ds		%r0,divisor,%r1
+	addc		dividend,dividend,dividend
+	ds		%r1,divisor,%r1
+	addc		dividend,dividend,dividend
+	ds		%r1,divisor,%r1
+	addc		dividend,dividend,dividend
+	ds		%r1,divisor,%r1
+	addc		dividend,dividend,dividend
+	ds		%r1,divisor,%r1
+	addc		dividend,dividend,dividend
+	ds		%r1,divisor,%r1
+	addc		dividend,dividend,dividend
+	ds		%r1,divisor,%r1
+	addc		dividend,dividend,dividend
+	ds		%r1,divisor,%r1
+	addc		dividend,dividend,dividend
+	ds		%r1,divisor,%r1
+	addc		dividend,dividend,dividend
+	ds		%r1,divisor,%r1
+	addc		dividend,dividend,dividend
+	ds		%r1,divisor,%r1
+	addc		dividend,dividend,dividend
+	ds		%r1,divisor,%r1
+	addc		dividend,dividend,dividend
+	ds		%r1,divisor,%r1
+	addc		dividend,dividend,dividend
+	ds		%r1,divisor,%r1
+	addc		dividend,dividend,dividend
+	ds		%r1,divisor,%r1
+	addc		dividend,dividend,dividend
+	ds		%r1,divisor,%r1
+	addc		dividend,dividend,dividend
+	ds		%r1,divisor,%r1
+	addc		dividend,dividend,dividend
+	ds		%r1,divisor,%r1
+	addc		dividend,dividend,dividend
+	ds		%r1,divisor,%r1
+	addc		dividend,dividend,dividend
+	ds		%r1,divisor,%r1
+	addc		dividend,dividend,dividend
+	ds		%r1,divisor,%r1
+	addc		dividend,dividend,dividend
+	ds		%r1,divisor,%r1
+	addc		dividend,dividend,dividend
+	ds		%r1,divisor,%r1
+	addc		dividend,dividend,dividend
+	ds		%r1,divisor,%r1
+	addc		dividend,dividend,dividend
+	ds		%r1,divisor,%r1
+	addc		dividend,dividend,dividend
+	ds		%r1,divisor,%r1
+	addc		dividend,dividend,dividend
+	ds		%r1,divisor,%r1
+	addc		dividend,dividend,dividend
+	ds		%r1,divisor,%r1
+	addc		dividend,dividend,dividend
+	ds		%r1,divisor,%r1
+	addc		dividend,dividend,dividend
+	ds		%r1,divisor,%r1
+	addc		dividend,dividend,dividend
+	ds		%r1,divisor,%r1
+	addc		dividend,dividend,dividend
+	ds		%r1,divisor,%r1
+	addc		dividend,dividend,dividend
+	comclr,>=	%r1,%r0,%r0
+	addl		%r1,divisor,%r1
+	comclr,>=	quotient,%r0,%r0	; skip of no need to negate
+	sub		%r0,dividend,dividend
+	bv		%r0(ret)
+	copy		dividend,quotient
+LSYM(largedivisor)
+	comclr,<<	dividend,divisor,quotient
+	ldi		1,quotient
+	bv,n		%r0(ret)
+	.exit
+	.procend
+#endif
+
+
+#ifdef L_remI
+#define dividend %r26
+#define divisor %r25
+#define quotient %r29
+#define tmp %r1
+#define ret %r31
+
+SPACE
+GSYM($$remI)
+	.proc
+	.callinfo	frame=0,no_calls
+	.entry
+	xor		dividend,%r0,quotient		; result sign
+	comclr,>=	divisor,%r0,%r0			; get absolute values
+	sub		%r0,divisor,divisor
+	comclr,>=	dividend,%r0,%r0
+	sub		%r0,dividend,dividend
+
+	comb,<		divisor,0,LREF(largedivisor)
+	 sub		%r0,divisor,%r1		; clear cy as side-effect
+	ds		%r0,%r1,%r0
+	addc		dividend,dividend,dividend
+	ds		%r0,divisor,%r1
+	addc		dividend,dividend,dividend
+	ds		%r1,divisor,%r1
+	addc		dividend,dividend,dividend
+	ds		%r1,divisor,%r1
+	addc		dividend,dividend,dividend
+	ds		%r1,divisor,%r1
+	addc		dividend,dividend,dividend
+	ds		%r1,divisor,%r1
+	addc		dividend,dividend,dividend
+	ds		%r1,divisor,%r1
+	addc		dividend,dividend,dividend
+	ds		%r1,divisor,%r1
+	addc		dividend,dividend,dividend
+	ds		%r1,divisor,%r1
+	addc		dividend,dividend,dividend
+	ds		%r1,divisor,%r1
+	addc		dividend,dividend,dividend
+	ds		%r1,divisor,%r1
+	addc		dividend,dividend,dividend
+	ds		%r1,divisor,%r1
+	addc		dividend,dividend,dividend
+	ds		%r1,divisor,%r1
+	addc		dividend,dividend,dividend
+	ds		%r1,divisor,%r1
+	addc		dividend,dividend,dividend
+	ds		%r1,divisor,%r1
+	addc		dividend,dividend,dividend
+	ds		%r1,divisor,%r1
+	addc		dividend,dividend,dividend
+	ds		%r1,divisor,%r1
+	addc		dividend,dividend,dividend
+	ds		%r1,divisor,%r1
+	addc		dividend,dividend,dividend
+	ds		%r1,divisor,%r1
+	addc		dividend,dividend,dividend
+	ds		%r1,divisor,%r1
+	addc		dividend,dividend,dividend
+	ds		%r1,divisor,%r1
+	addc		dividend,dividend,dividend
+	ds		%r1,divisor,%r1
+	addc		dividend,dividend,dividend
+	ds		%r1,divisor,%r1
+	addc		dividend,dividend,dividend
+	ds		%r1,divisor,%r1
+	addc		dividend,dividend,dividend
+	ds		%r1,divisor,%r1
+	addc		dividend,dividend,dividend
+	ds		%r1,divisor,%r1
+	addc		dividend,dividend,dividend
+	ds		%r1,divisor,%r1
+	addc		dividend,dividend,dividend
+	ds		%r1,divisor,%r1
+	addc		dividend,dividend,dividend
+	ds		%r1,divisor,%r1
+	addc		dividend,dividend,dividend
+	ds		%r1,divisor,%r1
+	addc		dividend,dividend,dividend
+	ds		%r1,divisor,%r1
+	addc		dividend,dividend,dividend
+	ds		%r1,divisor,%r1
+	addc		dividend,dividend,dividend
+	ds		%r1,divisor,%r1
+	addc		dividend,dividend,dividend
+	comclr,>=	%r1,%r0,%r0
+	addl		%r1,divisor,%r1
+	comclr,>=	quotient,%r0,%r0	; skip of no need to negate
+	sub		%r0,%r1,%r1
+	bv		%r0(ret)
+	copy		%r1,quotient
+LSYM(largedivisor)
+	sub,>>=		dividend,divisor,quotient
+	copy		dividend,quotient
+	bv,n		%r0(ret)
+	.exit
+	.procend
+#endif
+
+
+#if defined (L_divU_3) && !defined (SMALL_LIB)
+#undef L_divU_3
+#define dividend %r26
+#define divisor %r25
+#define tmp %r1
+#define result %r29
+#define ret %r31
+
+SPACE
+GSYM($$divU_3)
+	.proc
+	.callinfo	frame=0,no_calls
+	.entry
+	sh2add	%r26,%r26,%r29		; r29 = lo(101 x r)
+	shd	%r0,%r26,30,%r1		;  r1 = hi(100 x r)
+	addc	%r1,%r0,%r1		;  r1 = hi(101 x r)
+; r in r1,,r29
+	zdep	%r29,27,28,%r25		; r25 = lo(10000 x r)
+	add	%r25,%r29,%r25		; r25 = lo(10001 x r)
+	shd	%r1,%r29,28,%r29	; r29 = hi(10000 x r)
+	addc	%r29,%r1,%r29		; r29 = hi(10001 x r)
+; r in r29,,r25
+	zdep	%r25,23,24,%r1		;  r1 = lo(100000000 x r)
+	add	%r1,%r25,%r1		;  r1 = lo(100000001 x r)
+	shd	%r29,%r25,24,%r25	; r25 = hi(100000000 x r)
+	addc	%r25,%r29,%r25		; r25 = hi(100000001 x r)
+; r in r25,,r1
+	zdep	%r1,15,16,%r29
+	add	%r29,%r1,%r29
+	shd	%r25,%r1,16,%r1
+	addc	%r1,%r25,%r1
+; r in r1,,r29
+	sh1add	%r29,%r26,%r0		;  r0 = lo(10 x r) + dividend
+	shd	%r1,%r29,31,%r29	; r29 = hi(10 x r)
+	addc	%r29,%r0,%r29
+	bv	%r0(ret)
+	extru	%r29,30,31,result
+	.exit
+	.procend
+#endif
+
+
+#if defined (L_divU_5) && !defined (SMALL_LIB)
+#undef L_divU_5
+#define dividend %r26
+#define divisor %r25
+#define tmp %r1
+#define result %r29
+#define ret %r31
+
+SPACE
+GSYM($$divU_5)
+	.proc
+	.callinfo	frame=0,no_calls
+	.entry
+	sh1add	%r26,%r26,%r29		; r29 = lo(11 x r)
+	shd	%r0,%r26,31,%r1		;  r1 = hi(10 x r)
+	addc	%r1,%r0,%r1		;  r1 = hi(11 x r)
+; r in r1,,r29
+	zdep	%r29,27,28,%r25		; r25 = lo(10000 x r)
+	add	%r25,%r29,%r25		; r25 = lo(10001 x r)
+	shd	%r1,%r29,28,%r29	; r29 = hi(10000 x r)
+	addc	%r29,%r1,%r29		; r29 = hi(10001 x r)
+; r in r29,,r25
+	zdep	%r25,23,24,%r1		;  r1 = lo(100000000 x r)
+	add	%r1,%r25,%r1		;  r1 = lo(100000001 x r)
+	shd	%r29,%r25,24,%r25	; r25 = hi(100000000 x r)
+	addc	%r25,%r29,%r25		; r25 = hi(100000001 x r)
+; r in r25,,r1
+	zdep	%r1,15,16,%r29
+	add	%r29,%r1,%r29
+	shd	%r25,%r1,16,%r1
+	addc	%r1,%r25,%r1
+; r in r1,,r29
+	sh2add	%r29,%r26,%r0		;  r0 = lo(1000 x r) + dividend
+	shd	%r1,%r29,30,%r29	; r29 = hi(1000 x r)
+	addc	%r29,%r0,%r29
+	bv	%r0(ret)
+	extru	%r29,29,30,result
+	.exit
+	.procend
+#endif
+
+
+#if defined (L_divU_6) && !defined (SMALL_LIB)
+#undef L_divU_6
+#define dividend %r26
+#define divisor %r25
+#define tmp %r1
+#define result %r29
+#define ret %r31
+
+SPACE
+GSYM($$divU_6)
+	.proc
+	.callinfo	frame=0,no_calls
+	.entry
+	sh2add	%r26,%r26,%r29		; r29 = lo(101 x r)
+	shd	%r0,%r26,30,%r1		;  r1 = hi(100 x r)
+	addc	%r1,%r0,%r1		;  r1 = hi(101 x r)
+; r in r1,,r29
+	zdep	%r29,27,28,%r25		; r25 = lo(10000 x r)
+	add	%r25,%r29,%r25		; r25 = lo(10001 x r)
+	shd	%r1,%r29,28,%r29	; r29 = hi(10000 x r)
+	addc	%r29,%r1,%r29		; r29 = hi(10001 x r)
+; r in r29,,r25
+	zdep	%r25,23,24,%r1		;  r1 = lo(100000000 x r)
+	add	%r1,%r25,%r1		;  r1 = lo(100000001 x r)
+	shd	%r29,%r25,24,%r25	; r25 = hi(100000000 x r)
+	addc	%r25,%r29,%r25		; r25 = hi(100000001 x r)
+; r in r25,,r1
+	zdep	%r1,15,16,%r29
+	add	%r29,%r1,%r29
+	shd	%r25,%r1,16,%r1
+	addc	%r1,%r25,%r1
+; r in r1,,r29
+	sh1add	%r29,%r26,%r0		;  r0 = lo(10 x r) + dividend
+	shd	%r1,%r29,31,%r29	; r29 = hi(10 x r)
+	addc	%r29,%r0,%r29
+	bv	%r0(ret)
+	extru	%r29,29,30,result
+	.exit
+	.procend
+#endif
+
+
+#if defined (L_divU_9) && !defined (SMALL_LIB)
+#undef L_divU_9
+#define dividend %r26
+#define divisor %r25
+#define tmp %r1
+#define result %r29
+#define ret %r31
+
+SPACE
+GSYM($$divU_9)
+	.proc
+	.callinfo	frame=0,no_calls
+	.entry
+	zdep	%r26,28,29,%r29
+	sub	%r29,%r26,%r29
+	shd	0,%r26,29,%r1
+	subb	%r1,0,%r1		/* 111 */
+
+	zdep	%r29,25,26,%r25
+	add	%r25,%r29,%r25
+	shd	%r1,%r29,26,%r29
+	addc	%r29,%r1,%r29		/* 111000111 */
+
+	sh3add	%r25,%r26,%r1
+	shd	%r29,%r25,29,%r25
+	addc	%r25,0,%r25		/* 111000111001 */
+
+	zdep	%r1,16,17,%r29
+	sub	%r29,%r1,%r29
+	shd	%r25,%r1,17,%r1
+	subb	%r1,%r25,%r1		/* 111000111000111000111000111 */
+
+	sh3add	%r29,%r26,%r0
+	shd	%r1,%r29,29,%r29
+	addc	%r29,0,%r29		/* 111000111000111000111000111001 */
+	bv	%r0(ret)
+	extru	%r29,30,31,result
+	.exit
+	.procend
+#endif
+
+
+#if defined (L_divU_10) && !defined (SMALL_LIB)
+#undef L_divU_10
+#define dividend %r26
+#define divisor %r25
+#define tmp %r1
+#define result %r29
+#define ret %r31
+
+SPACE
+GSYM($$divU_10)
+	.proc
+	.callinfo	frame=0,no_calls
+	.entry
+	sh1add	%r26,%r26,%r29		; r29 = lo(11 x r)
+	shd	%r0,%r26,31,%r1		;  r1 = hi(10 x r)
+	addc	%r1,%r0,%r1		;  r1 = hi(11 x r)
+; r in r1,,r29
+	zdep	%r29,27,28,%r25		; r25 = lo(10000 x r)
+	add	%r25,%r29,%r25		; r25 = lo(10001 x r)
+	shd	%r1,%r29,28,%r29	; r29 = hi(10000 x r)
+	addc	%r29,%r1,%r29		; r29 = hi(10001 x r)
+; r in r29,,r25
+	zdep	%r25,23,24,%r1		;  r1 = lo(100000000 x r)
+	add	%r1,%r25,%r1		;  r1 = lo(100000001 x r)
+	shd	%r29,%r25,24,%r25	; r25 = hi(100000000 x r)
+	addc	%r25,%r29,%r25		; r25 = hi(100000001 x r)
+; r in r25,,r1
+	zdep	%r1,15,16,%r29
+	add	%r29,%r1,%r29
+	shd	%r25,%r1,16,%r1
+	addc	%r1,%r25,%r1
+; r in r1,,r29
+	sh2add	%r29,%r26,%r0		;  r0 = lo(1000 x r) + dividend
+	shd	%r1,%r29,30,%r29	; r29 = hi(1000 x r)
+	addc	%r29,%r0,%r29
+	bv	%r0(ret)
+	extru	%r29,28,29,result
+	.exit
+	.procend
+#endif
+
+
+#if defined (L_divU_12) && !defined (SMALL_LIB)
+#undef L_divU_12
+#define dividend %r26
+#define divisor %r25
+#define tmp %r1
+#define result %r29
+#define ret %r31
+
+SPACE
+GSYM($$divU_12)
+	.proc
+	.callinfo	frame=0,no_calls
+	.entry
+	sh2add	%r26,%r26,%r29		; r29 = lo(101 x r)
+	shd	%r0,%r26,30,%r1		;  r1 = hi(100 x r)
+	addc	%r1,%r0,%r1		;  r1 = hi(101 x r)
+; r in r1,,r29
+	zdep	%r29,27,28,%r25		; r25 = lo(10000 x r)
+	add	%r25,%r29,%r25		; r25 = lo(10001 x r)
+	shd	%r1,%r29,28,%r29	; r29 = hi(10000 x r)
+	addc	%r29,%r1,%r29		; r29 = hi(10001 x r)
+; r in r29,,r25
+	zdep	%r25,23,24,%r1		;  r1 = lo(100000000 x r)
+	add	%r1,%r25,%r1		;  r1 = lo(100000001 x r)
+	shd	%r29,%r25,24,%r25	; r25 = hi(100000000 x r)
+	addc	%r25,%r29,%r25		; r25 = hi(100000001 x r)
+; r in r25,,r1
+	zdep	%r1,15,16,%r29
+	add	%r29,%r1,%r29
+	shd	%r25,%r1,16,%r1
+	addc	%r1,%r25,%r1
+; r in r1,,r29
+	sh1add	%r29,%r26,%r0		;  r0 = lo(10 x r) + dividend
+	shd	%r1,%r29,31,%r29	; r29 = hi(10 x r)
+	addc	%r29,%r0,%r29
+	bv	%r0(ret)
+	extru	%r29,28,29,result
+	.exit
+	.procend
+#endif
+
+
+#ifdef L_divU_3
+SPACE
+GSYM($$divU_3)
+	.proc
+	.callinfo	frame=0,no_calls
+	.entry
+	b		$$divU
+	ldi		3,%r25
+	.exit
+	.procend
+	.import		$$divU,MILLICODE
+#endif
+
+#ifdef L_divU_5
+SPACE
+GSYM($$divU_5)
+	.proc
+	.callinfo	frame=0,no_calls
+	.entry
+	b		$$divU
+	ldi		5,%r25
+	.exit
+	.procend
+	.import		$$divU,MILLICODE
+#endif
+
+#ifdef L_divU_6
+SPACE
+GSYM($$divU_6)
+	.proc
+	.callinfo	frame=0,no_calls
+	.entry
+	b		$$divU
+	ldi		6,%r25
+	.exit
+	.procend
+	.import		$$divU,MILLICODE
+#endif
+
+#ifdef L_divU_7
+SPACE
+GSYM($$divU_7)
+	.proc
+	.callinfo	frame=0,no_calls
+	.entry
+	b		$$divU
+	ldi		7,%r25
+	.exit
+	.procend
+	.import		$$divU,MILLICODE
+#endif
+
+#ifdef L_divU_9
+SPACE
+GSYM($$divU_9)
+	.proc
+	.callinfo	frame=0,no_calls
+	.entry
+	b		$$divU
+	ldi		9,%r25
+	.exit
+	.procend
+	.import		$$divU,MILLICODE
+#endif
+
+#ifdef L_divU_10
+SPACE
+GSYM($$divU_10)
+	.proc
+	.callinfo	frame=0,no_calls
+	.entry
+	b		$$divU
+	ldi		10,%r25
+	.exit
+	.procend
+	.import		$$divU,MILLICODE
+#endif
+
+#ifdef L_divU_12
+SPACE
+GSYM($$divU_12)
+	.proc
+	.callinfo	frame=0,no_calls
+	.entry
+	b		$$divU
+	ldi		12,%r25
+	.exit
+	.procend
+	.import		$$divU,MILLICODE
+#endif
+
+#ifdef L_divU_14
+SPACE
+GSYM($$divU_14)
+	.proc
+	.callinfo	frame=0,no_calls
+	.entry
+	b		$$divU
+	ldi		14,%r25
+	.exit
+	.procend
+	.import		$$divU,MILLICODE
+#endif
+
+#ifdef L_divU_15
+SPACE
+GSYM($$divU_15)
+	.proc
+	.callinfo	frame=0,no_calls
+	.entry
+	b		$$divU
+	ldi		15,%r25
+	.exit
+	.procend
+	.import		$$divU,MILLICODE
+#endif
+
+#ifdef L_divI_3
+SPACE
+GSYM($$divI_3)
+	.proc
+	.callinfo	frame=0,no_calls
+	.entry
+	b		$$divI
+	ldi		3,%r25
+	.exit
+	.procend
+	.import		$$divI,MILLICODE
+#endif
+
+#ifdef L_divI_5
+SPACE
+GSYM($$divI_5)
+	.proc
+	.callinfo	frame=0,no_calls
+	.entry
+	b		$$divI
+	ldi		5,%r25
+	.exit
+	.procend
+	.import		$$divI,MILLICODE
+#endif
+
+#ifdef L_divI_6
+SPACE
+GSYM($$divI_6)
+	.proc
+	.callinfo	frame=0,no_calls
+	.entry
+	b		$$divI
+	ldi		6,%r25
+	.exit
+	.procend
+	.import		$$divI,MILLICODE
+#endif
+
+#ifdef L_divI_7
+SPACE
+GSYM($$divI_7)
+	.proc
+	.callinfo	frame=0,no_calls
+	.entry
+	b		$$divI
+	ldi		7,%r25
+	.exit
+	.procend
+	.import		$$divI,MILLICODE
+#endif
+
+#ifdef L_divI_9
+SPACE
+GSYM($$divI_9)
+	.proc
+	.callinfo	frame=0,no_calls
+	.entry
+	b		$$divI
+	ldi		9,%r25
+	.exit
+	.procend
+	.import		$$divI,MILLICODE
+#endif
+
+#ifdef L_divI_10
+SPACE
+GSYM($$divI_10)
+	.proc
+	.callinfo	frame=0,no_calls
+	.entry
+	b		$$divI
+	ldi		10,%r25
+	.exit
+	.procend
+	.import		$$divI,MILLICODE
+#endif
+
+#ifdef L_divI_12
+SPACE
+GSYM($$divI_12)
+	.proc
+	.callinfo	frame=0,no_calls
+	.entry
+	b		$$divI
+	ldi		12,%r25
+	.exit
+	.procend
+	.import		$$divI,MILLICODE
+#endif
+
+#ifdef L_divI_14
+SPACE
+GSYM($$divI_14)
+	.proc
+	.callinfo	frame=0,no_calls
+	.entry
+	b		$$divI
+	ldi		14,%r25
+	.exit
+	.procend
+	.import		$$divI,MILLICODE
+#endif
+
+#ifdef L_divI_15
+SPACE
+GSYM($$divI_15)
+	.proc
+	.callinfo	frame=0,no_calls
+	.entry
+	b		$$divI
+	ldi		15,%r25
+	.exit
+	.procend
+	.import		$$divI,MILLICODE
+#endif
diff -urpN -xCVS gnu_gcc/gcc/config/pa/milli64.S gcc/gcc/config/pa/milli64.S
--- gnu_gcc/gcc/config/pa/milli64.S	Wed Dec 31 17:00:00 1969
+++ gcc/gcc/config/pa/milli64.S	Thu Mar  1 16:24:07 2001
@@ -0,0 +1,2096 @@
+/* 64-bit millicode, original author Hewlett-Packard
+   adapted for gcc by Paul Bame <bame@debian.org>
+   and Alan Modra <alan@linuxcare.com.au>
+
+   Copyright 2001 Free Software Foundation, Inc.
+
+   This file is part of GNU CC and is released under the terms of
+   of the GNU General Public License as published by the Free Software
+   Foundation; either version 2, or (at your option) any later version.
+   See the file COPYING in the top-level GNU CC source directory for a copy
+   of the license.  */
+
+
+#ifdef pa64
+        .level  2.0w
+#endif
+
+/* Hardware General Registers.  */
+r0:	.reg	%r0
+r1:	.reg	%r1
+r2:	.reg	%r2
+r3:	.reg	%r3
+r4:	.reg	%r4
+r5:	.reg	%r5
+r6:	.reg	%r6
+r7:	.reg	%r7
+r8:	.reg	%r8
+r9:	.reg	%r9
+r10:	.reg	%r10
+r11:	.reg	%r11
+r12:	.reg	%r12
+r13:	.reg	%r13
+r14:	.reg	%r14
+r15:	.reg	%r15
+r16:	.reg	%r16
+r17:	.reg	%r17
+r18:	.reg	%r18
+r19:	.reg	%r19
+r20:	.reg	%r20
+r21:	.reg	%r21
+r22:	.reg	%r22
+r23:	.reg	%r23
+r24:	.reg	%r24
+r25:	.reg	%r25
+r26:	.reg	%r26
+r27:	.reg	%r27
+r28:	.reg	%r28
+r29:	.reg	%r29
+r30:	.reg	%r30
+r31:	.reg	%r31
+
+/* Hardware Space Registers.  */
+sr0:	.reg	%sr0
+sr1:	.reg	%sr1
+sr2:	.reg	%sr2
+sr3:	.reg	%sr3
+sr4:	.reg	%sr4
+sr5:	.reg	%sr5
+sr6:	.reg	%sr6
+sr7:	.reg	%sr7
+
+/* Hardware Floating Point Registers.  */
+fr0:	.reg	%fr0
+fr1:	.reg	%fr1
+fr2:	.reg	%fr2
+fr3:	.reg	%fr3
+fr4:	.reg	%fr4
+fr5:	.reg	%fr5
+fr6:	.reg	%fr6
+fr7:	.reg	%fr7
+fr8:	.reg	%fr8
+fr9:	.reg	%fr9
+fr10:	.reg	%fr10
+fr11:	.reg	%fr11
+fr12:	.reg	%fr12
+fr13:	.reg	%fr13
+fr14:	.reg	%fr14
+fr15:	.reg	%fr15
+
+/* Hardware Control Registers.  */
+cr11:	.reg	%cr11
+sar:	.reg	%cr11	/* Shift Amount Register */
+
+/* Software Architecture General Registers.  */
+rp:	.reg    r2	/* return pointer */
+#ifdef pa64
+mrp:	.reg	r2 	/* millicode return pointer */
+#else
+mrp:	.reg	r31	/* millicode return pointer */
+#endif
+ret0:	.reg    r28	/* return value */
+ret1:	.reg    r29	/* return value (high part of double) */
+sp:	.reg 	r30	/* stack pointer */
+dp:	.reg	r27	/* data pointer */
+arg0:	.reg	r26	/* argument */
+arg1:	.reg	r25	/* argument or high part of double argument */
+arg2:	.reg	r24	/* argument */
+arg3:	.reg	r23	/* argument or high part of double argument */
+
+/* Software Architecture Space Registers.  */
+/* 		sr0	; return link from BLE */
+sret:	.reg	sr1	/* return value */
+sarg:	.reg	sr1	/* argument */
+/* 		sr4	; PC SPACE tracker */
+/* 		sr5	; process private data */
+
+/* Frame Offsets (millicode convention!)  Used when calling other
+   millicode routines.  Stack unwinding is dependent upon these
+   definitions.  */
+r31_slot:	.equ	-20	/* "current RP" slot */
+sr0_slot:	.equ	-16     /* "static link" slot */
+#if defined(pa64)
+mrp_slot:       .equ    -16	/* "current RP" slot */
+psp_slot:       .equ    -8	/* "previous SP" slot */
+#else
+mrp_slot:	.equ	-20     /* "current RP" slot (replacing "r31_slot") */
+#endif
+
+
+#define DEFINE(name,value)name:	.EQU	value
+#define RDEFINE(name,value)name:	.REG	value
+#ifdef milliext
+#define MILLI_BE(lbl)   BE    lbl(sr7,r0)
+#define MILLI_BEN(lbl)  BE,n  lbl(sr7,r0)
+#define MILLI_BLE(lbl)	BLE   lbl(sr7,r0)
+#define MILLI_BLEN(lbl)	BLE,n lbl(sr7,r0)
+#define MILLIRETN	BE,n  0(sr0,mrp)
+#define MILLIRET	BE    0(sr0,mrp)
+#define MILLI_RETN	BE,n  0(sr0,mrp)
+#define MILLI_RET	BE    0(sr0,mrp)
+#else
+#define MILLI_BE(lbl)	B     lbl
+#define MILLI_BEN(lbl)  B,n   lbl
+#define MILLI_BLE(lbl)	BL    lbl,mrp
+#define MILLI_BLEN(lbl)	BL,n  lbl,mrp
+#define MILLIRETN	BV,n  0(mrp)
+#define MILLIRET	BV    0(mrp)
+#define MILLI_RETN	BV,n  0(mrp)
+#define MILLI_RET	BV    0(mrp)
+#endif
+
+#ifdef __STDC__
+#define CAT(a,b)	a##b
+#else
+#define CAT(a,b)	a/**/b
+#endif
+
+#ifdef ELF
+#define SUBSPA_MILLI	 .section .text
+#define SUBSPA_MILLI_DIV .section .text.div,"ax",@progbits! .align 16
+#define SUBSPA_MILLI_MUL .section .text.mul,"ax",@progbits! .align 16
+#define ATTR_MILLI
+#define SUBSPA_DATA	 .section .data
+#define ATTR_DATA
+#define GLOBAL		 $global$
+#define GSYM(sym) 	 !sym:
+#define LSYM(sym)	 !CAT(.L,sym:)
+#define LREF(sym)	 CAT(.L,sym)
+
+#else
+
+#ifdef coff
+/* This used to be .milli but since link32 places different named
+   sections in different segments millicode ends up a long ways away
+   from .text (1meg?).  This way they will be a lot closer.
+
+   The SUBSPA_MILLI_* specify locality sets for certain millicode
+   modules in order to ensure that modules that call one another are
+   placed close together. Without locality sets this is unlikely to
+   happen because of the Dynamite linker library search algorithm. We
+   want these modules close together so that short calls always reach
+   (we don't want to require long calls or use long call stubs).  */
+
+#define SUBSPA_MILLI	 .subspa .text
+#define SUBSPA_MILLI_DIV .subspa .text$dv,align=16
+#define SUBSPA_MILLI_MUL .subspa .text$mu,align=16
+#define ATTR_MILLI	 .attr code,read,execute
+#define SUBSPA_DATA	 .subspa .data
+#define ATTR_DATA	 .attr init_data,read,write
+#define GLOBAL		 _gp
+#else
+#define SUBSPA_MILLI	 .subspa $MILLICODE$,QUAD=0,ALIGN=4,ACCESS=0x2c,SORT=8
+#define SUBSPA_MILLI_DIV SUBSPA_MILLI
+#define SUBSPA_MILLI_MUL SUBSPA_MILLI
+#define ATTR_MILLI
+#define SUBSPA_DATA	 .subspa $BSS$,quad=1,align=8,access=0x1f,sort=80,zero
+#define ATTR_DATA
+#define GLOBAL		 $global$
+#endif
+#define SPACE_DATA	 .space $PRIVATE$,spnum=1,sort=16
+
+#define GSYM(sym)	 !sym
+#define LSYM(sym)	 !CAT(L$,sym)
+#define LREF(sym)	 CAT(L$,sym)
+#endif
+
+
+#ifdef L_divI
+/* ROUTINES:	$$divI, $$divoI
+
+   Single precision divide for signed binary integers.
+
+   The quotient is truncated towards zero.
+   The sign of the quotient is the XOR of the signs of the dividend and
+   divisor.
+   Divide by zero is trapped.
+   Divide of -2**31 by -1 is trapped for $$divoI but not for $$divI.
+
+   INPUT REGISTERS:
+   .	arg0 ==	dividend
+   .	arg1 ==	divisor
+   .	mrp  == return pc
+   .	sr0  == return space when called externally
+
+   OUTPUT REGISTERS:
+   .	arg0 =	undefined
+   .	arg1 =	undefined
+   .	ret1 =	quotient
+
+   OTHER REGISTERS AFFECTED:
+   .	r1   =	undefined
+
+   SIDE EFFECTS:
+   .	Causes a trap under the following conditions:
+   .		divisor is zero  (traps with ADDIT,=  0,25,0)
+   .		dividend==-2**31  and divisor==-1 and routine is $$divoI
+   .				 (traps with ADDO  26,25,0)
+   .	Changes memory at the following places:
+   .		NONE
+
+   PERMISSIBLE CONTEXT:
+   .	Unwindable.
+   .	Suitable for internal or external millicode.
+   .	Assumes the special millicode register conventions.
+
+   DISCUSSION:
+   .	Branchs to other millicode routines using BE
+   .		$$div_# for # being 2,3,4,5,6,7,8,9,10,12,14,15
+   .
+   .	For selected divisors, calls a divide by constant routine written by
+   .	Karl Pettis.  Eligible divisors are 1..15 excluding 11 and 13.
+   .
+   .	The only overflow case is -2**31 divided by -1.
+   .	Both routines return -2**31 but only $$divoI traps.  */
+
+RDEFINE(temp,r1)
+RDEFINE(retreg,ret1)	/*  r29 */
+RDEFINE(temp1,arg0)
+	SUBSPA_MILLI_DIV
+	ATTR_MILLI
+	.import $$divI_2,millicode
+	.import $$divI_3,millicode
+	.import $$divI_4,millicode
+	.import $$divI_5,millicode
+	.import $$divI_6,millicode
+	.import $$divI_7,millicode
+	.import $$divI_8,millicode
+	.import $$divI_9,millicode
+	.import $$divI_10,millicode
+	.import $$divI_12,millicode
+	.import $$divI_14,millicode
+	.import $$divI_15,millicode
+	.export $$divI,millicode
+	.export	$$divoI,millicode
+	.proc
+	.callinfo	millicode
+	.entry
+GSYM($$divoI)
+	comib,=,n  -1,arg1,LREF(negative1)	/*  when divisor == -1 */
+GSYM($$divI)
+	ldo	-1(arg1),temp		/*  is there at most one bit set ? */
+	and,<>	arg1,temp,r0		/*  if not, don't use power of 2 divide */
+	addi,>	0,arg1,r0		/*  if divisor > 0, use power of 2 divide */
+	b,n	LREF(neg_denom)
+LSYM(pow2)
+	addi,>=	0,arg0,retreg		/*  if numerator is negative, add the */
+	add	arg0,temp,retreg	/*  (denominaotr -1) to correct for shifts */
+	extru,=	arg1,15,16,temp		/*  test denominator with 0xffff0000 */
+	extrs	retreg,15,16,retreg	/*  retreg = retreg >> 16 */
+	or	arg1,temp,arg1		/*  arg1 = arg1 | (arg1 >> 16) */
+	ldi	0xcc,temp1		/*  setup 0xcc in temp1 */
+	extru,= arg1,23,8,temp		/*  test denominator with 0xff00 */
+	extrs	retreg,23,24,retreg	/*  retreg = retreg >> 8 */
+	or	arg1,temp,arg1		/*  arg1 = arg1 | (arg1 >> 8) */
+	ldi	0xaa,temp		/*  setup 0xaa in temp */
+	extru,= arg1,27,4,r0		/*  test denominator with 0xf0 */
+	extrs	retreg,27,28,retreg	/*  retreg = retreg >> 4 */
+	and,=	arg1,temp1,r0		/*  test denominator with 0xcc */
+	extrs	retreg,29,30,retreg	/*  retreg = retreg >> 2 */
+	and,=	arg1,temp,r0		/*  test denominator with 0xaa */
+	extrs	retreg,30,31,retreg	/*  retreg = retreg >> 1 */
+	MILLIRETN
+LSYM(neg_denom)
+	addi,<	0,arg1,r0		/*  if arg1 >= 0, it's not power of 2 */
+	b,n	LREF(regular_seq)
+	sub	r0,arg1,temp		/*  make denominator positive */
+	comb,=,n  arg1,temp,LREF(regular_seq)	/*  test against 0x80000000 and 0 */
+	ldo	-1(temp),retreg		/*  is there at most one bit set ? */
+	and,=	temp,retreg,r0		/*  if so, the denominator is power of 2 */
+	b,n	LREF(regular_seq)
+	sub	r0,arg0,retreg		/*  negate numerator */
+	comb,=,n arg0,retreg,LREF(regular_seq) /*  test against 0x80000000 */
+	copy	retreg,arg0		/*  set up arg0, arg1 and temp	*/
+	copy	temp,arg1		/*  before branching to pow2 */
+	b	LREF(pow2)
+	ldo	-1(arg1),temp
+LSYM(regular_seq)
+	comib,>>=,n 15,arg1,LREF(small_divisor)
+	add,>=	0,arg0,retreg		/*  move dividend, if retreg < 0, */
+LSYM(normal)
+	subi	0,retreg,retreg		/*    make it positive */
+	sub	0,arg1,temp		/*  clear carry,  */
+					/*    negate the divisor */
+	ds	0,temp,0		/*  set V-bit to the comple- */
+					/*    ment of the divisor sign */
+	add	retreg,retreg,retreg	/*  shift msb bit into carry */
+	ds	r0,arg1,temp		/*  1st divide step, if no carry */
+	addc	retreg,retreg,retreg	/*  shift retreg with/into carry */
+	ds	temp,arg1,temp		/*  2nd divide step */
+	addc	retreg,retreg,retreg	/*  shift retreg with/into carry */
+	ds	temp,arg1,temp		/*  3rd divide step */
+	addc	retreg,retreg,retreg	/*  shift retreg with/into carry */
+	ds	temp,arg1,temp		/*  4th divide step */
+	addc	retreg,retreg,retreg	/*  shift retreg with/into carry */
+	ds	temp,arg1,temp		/*  5th divide step */
+	addc	retreg,retreg,retreg	/*  shift retreg with/into carry */
+	ds	temp,arg1,temp		/*  6th divide step */
+	addc	retreg,retreg,retreg	/*  shift retreg with/into carry */
+	ds	temp,arg1,temp		/*  7th divide step */
+	addc	retreg,retreg,retreg	/*  shift retreg with/into carry */
+	ds	temp,arg1,temp		/*  8th divide step */
+	addc	retreg,retreg,retreg	/*  shift retreg with/into carry */
+	ds	temp,arg1,temp		/*  9th divide step */
+	addc	retreg,retreg,retreg	/*  shift retreg with/into carry */
+	ds	temp,arg1,temp		/*  10th divide step */
+	addc	retreg,retreg,retreg	/*  shift retreg with/into carry */
+	ds	temp,arg1,temp		/*  11th divide step */
+	addc	retreg,retreg,retreg	/*  shift retreg with/into carry */
+	ds	temp,arg1,temp		/*  12th divide step */
+	addc	retreg,retreg,retreg	/*  shift retreg with/into carry */
+	ds	temp,arg1,temp		/*  13th divide step */
+	addc	retreg,retreg,retreg	/*  shift retreg with/into carry */
+	ds	temp,arg1,temp		/*  14th divide step */
+	addc	retreg,retreg,retreg	/*  shift retreg with/into carry */
+	ds	temp,arg1,temp		/*  15th divide step */
+	addc	retreg,retreg,retreg	/*  shift retreg with/into carry */
+	ds	temp,arg1,temp		/*  16th divide step */
+	addc	retreg,retreg,retreg	/*  shift retreg with/into carry */
+	ds	temp,arg1,temp		/*  17th divide step */
+	addc	retreg,retreg,retreg	/*  shift retreg with/into carry */
+	ds	temp,arg1,temp		/*  18th divide step */
+	addc	retreg,retreg,retreg	/*  shift retreg with/into carry */
+	ds	temp,arg1,temp		/*  19th divide step */
+	addc	retreg,retreg,retreg	/*  shift retreg with/into carry */
+	ds	temp,arg1,temp		/*  20th divide step */
+	addc	retreg,retreg,retreg	/*  shift retreg with/into carry */
+	ds	temp,arg1,temp		/*  21st divide step */
+	addc	retreg,retreg,retreg	/*  shift retreg with/into carry */
+	ds	temp,arg1,temp		/*  22nd divide step */
+	addc	retreg,retreg,retreg	/*  shift retreg with/into carry */
+	ds	temp,arg1,temp		/*  23rd divide step */
+	addc	retreg,retreg,retreg	/*  shift retreg with/into carry */
+	ds	temp,arg1,temp		/*  24th divide step */
+	addc	retreg,retreg,retreg	/*  shift retreg with/into carry */
+	ds	temp,arg1,temp		/*  25th divide step */
+	addc	retreg,retreg,retreg	/*  shift retreg with/into carry */
+	ds	temp,arg1,temp		/*  26th divide step */
+	addc	retreg,retreg,retreg	/*  shift retreg with/into carry */
+	ds	temp,arg1,temp		/*  27th divide step */
+	addc	retreg,retreg,retreg	/*  shift retreg with/into carry */
+	ds	temp,arg1,temp		/*  28th divide step */
+	addc	retreg,retreg,retreg	/*  shift retreg with/into carry */
+	ds	temp,arg1,temp		/*  29th divide step */
+	addc	retreg,retreg,retreg	/*  shift retreg with/into carry */
+	ds	temp,arg1,temp		/*  30th divide step */
+	addc	retreg,retreg,retreg	/*  shift retreg with/into carry */
+	ds	temp,arg1,temp		/*  31st divide step */
+	addc	retreg,retreg,retreg	/*  shift retreg with/into carry */
+	ds	temp,arg1,temp		/*  32nd divide step, */
+	addc	retreg,retreg,retreg	/*  shift last retreg bit into retreg */
+	xor,>=	arg0,arg1,0		/*  get correct sign of quotient */
+	  sub	0,retreg,retreg		/*    based on operand signs */
+	MILLIRETN
+	nop
+
+LSYM(small_divisor)
+
+#if defined(pa64)
+/*  Clear the upper 32 bits of the arg1 register.  We are working with	*/
+/*  small divisors (and 32 bit integers)   We must not be mislead  */
+/*  by "1" bits left in the upper 32 bits. */
+	depd r0,31,32,arg1
+#endif
+	blr,n	arg1,r0
+	nop
+/*  table for divisor == 0,1, ... ,15 */
+	addit,=	0,arg1,r0	/*  trap if divisor == 0 */
+	nop
+	MILLIRET		/*  divisor == 1 */
+	copy	arg0,retreg
+	MILLI_BEN($$divI_2)	/*  divisor == 2 */
+	nop
+	MILLI_BEN($$divI_3)	/*  divisor == 3 */
+	nop
+	MILLI_BEN($$divI_4)	/*  divisor == 4 */
+	nop
+	MILLI_BEN($$divI_5)	/*  divisor == 5 */
+	nop
+	MILLI_BEN($$divI_6)	/*  divisor == 6 */
+	nop
+	MILLI_BEN($$divI_7)	/*  divisor == 7 */
+	nop
+	MILLI_BEN($$divI_8)	/*  divisor == 8 */
+	nop
+	MILLI_BEN($$divI_9)	/*  divisor == 9 */
+	nop
+	MILLI_BEN($$divI_10)	/*  divisor == 10 */
+	nop
+	b	LREF(normal)		/*  divisor == 11 */
+	add,>=	0,arg0,retreg
+	MILLI_BEN($$divI_12)	/*  divisor == 12 */
+	nop
+	b	LREF(normal)		/*  divisor == 13 */
+	add,>=	0,arg0,retreg
+	MILLI_BEN($$divI_14)	/*  divisor == 14 */
+	nop
+	MILLI_BEN($$divI_15)	/*  divisor == 15 */
+	nop
+
+LSYM(negative1)
+	sub	0,arg0,retreg	/*  result is negation of dividend */
+	MILLIRET
+	addo	arg0,arg1,r0	/*  trap iff dividend==0x80000000 && divisor==-1 */
+	.exit
+	.procend
+	.end
+#endif
+
+#ifdef L_divU
+/* ROUTINE:	$$divU
+   .
+   .	Single precision divide for unsigned integers.
+   .
+   .	Quotient is truncated towards zero.
+   .	Traps on divide by zero.
+
+   INPUT REGISTERS:
+   .	arg0 ==	dividend
+   .	arg1 ==	divisor
+   .	mrp  == return pc
+   .	sr0  == return space when called externally
+
+   OUTPUT REGISTERS:
+   .	arg0 =	undefined
+   .	arg1 =	undefined
+   .	ret1 =	quotient
+
+   OTHER REGISTERS AFFECTED:
+   .	r1   =	undefined
+
+   SIDE EFFECTS:
+   .	Causes a trap under the following conditions:
+   .		divisor is zero
+   .	Changes memory at the following places:
+   .		NONE
+
+   PERMISSIBLE CONTEXT:
+   .	Unwindable.
+   .	Does not create a stack frame.
+   .	Suitable for internal or external millicode.
+   .	Assumes the special millicode register conventions.
+
+   DISCUSSION:
+   .	Branchs to other millicode routines using BE:
+   .		$$divU_# for 3,5,6,7,9,10,12,14,15
+   .
+   .	For selected small divisors calls the special divide by constant
+   .	routines written by Karl Pettis.  These are: 3,5,6,7,9,10,12,14,15.  */
+
+RDEFINE(temp,r1)
+RDEFINE(retreg,ret1)	/* r29 */
+RDEFINE(temp1,arg0)
+	SUBSPA_MILLI_DIV
+	ATTR_MILLI
+	.export $$divU,millicode
+	.import $$divU_3,millicode
+	.import $$divU_5,millicode
+	.import $$divU_6,millicode
+	.import $$divU_7,millicode
+	.import $$divU_9,millicode
+	.import $$divU_10,millicode
+	.import $$divU_12,millicode
+	.import $$divU_14,millicode
+	.import $$divU_15,millicode
+	.proc
+	.callinfo	millicode
+	.entry
+GSYM($$divU)
+/* The subtract is not nullified since it does no harm and can be used
+   by the two cases that branch back to "normal".  */
+	ldo	-1(arg1),temp		/* is there at most one bit set ? */
+	and,=	arg1,temp,r0		/* if so, denominator is power of 2 */
+	b	LREF(regular_seq)
+	addit,=	0,arg1,0		/* trap for zero dvr */
+	copy	arg0,retreg
+	extru,= arg1,15,16,temp		/* test denominator with 0xffff0000 */
+	extru	retreg,15,16,retreg	/* retreg = retreg >> 16 */
+	or	arg1,temp,arg1		/* arg1 = arg1 | (arg1 >> 16) */
+	ldi	0xcc,temp1		/* setup 0xcc in temp1 */
+	extru,= arg1,23,8,temp		/* test denominator with 0xff00 */
+	extru	retreg,23,24,retreg	/* retreg = retreg >> 8 */
+	or	arg1,temp,arg1		/* arg1 = arg1 | (arg1 >> 8) */
+	ldi	0xaa,temp		/* setup 0xaa in temp */
+	extru,= arg1,27,4,r0		/* test denominator with 0xf0 */
+	extru	retreg,27,28,retreg	/* retreg = retreg >> 4 */
+	and,=	arg1,temp1,r0		/* test denominator with 0xcc */
+	extru	retreg,29,30,retreg	/* retreg = retreg >> 2 */
+	and,=	arg1,temp,r0		/* test denominator with 0xaa */
+	extru	retreg,30,31,retreg	/* retreg = retreg >> 1 */
+	MILLIRETN
+	nop
+LSYM(regular_seq)
+	comib,>=  15,arg1,LREF(special_divisor)
+	subi	0,arg1,temp		/* clear carry, negate the divisor */
+	ds	r0,temp,r0		/* set V-bit to 1 */
+LSYM(normal)
+	add	arg0,arg0,retreg	/* shift msb bit into carry */
+	ds	r0,arg1,temp		/* 1st divide step, if no carry */
+	addc	retreg,retreg,retreg	/* shift retreg with/into carry */
+	ds	temp,arg1,temp		/* 2nd divide step */
+	addc	retreg,retreg,retreg	/* shift retreg with/into carry */
+	ds	temp,arg1,temp		/* 3rd divide step */
+	addc	retreg,retreg,retreg	/* shift retreg with/into carry */
+	ds	temp,arg1,temp		/* 4th divide step */
+	addc	retreg,retreg,retreg	/* shift retreg with/into carry */
+	ds	temp,arg1,temp		/* 5th divide step */
+	addc	retreg,retreg,retreg	/* shift retreg with/into carry */
+	ds	temp,arg1,temp		/* 6th divide step */
+	addc	retreg,retreg,retreg	/* shift retreg with/into carry */
+	ds	temp,arg1,temp		/* 7th divide step */
+	addc	retreg,retreg,retreg	/* shift retreg with/into carry */
+	ds	temp,arg1,temp		/* 8th divide step */
+	addc	retreg,retreg,retreg	/* shift retreg with/into carry */
+	ds	temp,arg1,temp		/* 9th divide step */
+	addc	retreg,retreg,retreg	/* shift retreg with/into carry */
+	ds	temp,arg1,temp		/* 10th divide step */
+	addc	retreg,retreg,retreg	/* shift retreg with/into carry */
+	ds	temp,arg1,temp		/* 11th divide step */
+	addc	retreg,retreg,retreg	/* shift retreg with/into carry */
+	ds	temp,arg1,temp		/* 12th divide step */
+	addc	retreg,retreg,retreg	/* shift retreg with/into carry */
+	ds	temp,arg1,temp		/* 13th divide step */
+	addc	retreg,retreg,retreg	/* shift retreg with/into carry */
+	ds	temp,arg1,temp		/* 14th divide step */
+	addc	retreg,retreg,retreg	/* shift retreg with/into carry */
+	ds	temp,arg1,temp		/* 15th divide step */
+	addc	retreg,retreg,retreg	/* shift retreg with/into carry */
+	ds	temp,arg1,temp		/* 16th divide step */
+	addc	retreg,retreg,retreg	/* shift retreg with/into carry */
+	ds	temp,arg1,temp		/* 17th divide step */
+	addc	retreg,retreg,retreg	/* shift retreg with/into carry */
+	ds	temp,arg1,temp		/* 18th divide step */
+	addc	retreg,retreg,retreg	/* shift retreg with/into carry */
+	ds	temp,arg1,temp		/* 19th divide step */
+	addc	retreg,retreg,retreg	/* shift retreg with/into carry */
+	ds	temp,arg1,temp		/* 20th divide step */
+	addc	retreg,retreg,retreg	/* shift retreg with/into carry */
+	ds	temp,arg1,temp		/* 21st divide step */
+	addc	retreg,retreg,retreg	/* shift retreg with/into carry */
+	ds	temp,arg1,temp		/* 22nd divide step */
+	addc	retreg,retreg,retreg	/* shift retreg with/into carry */
+	ds	temp,arg1,temp		/* 23rd divide step */
+	addc	retreg,retreg,retreg	/* shift retreg with/into carry */
+	ds	temp,arg1,temp		/* 24th divide step */
+	addc	retreg,retreg,retreg	/* shift retreg with/into carry */
+	ds	temp,arg1,temp		/* 25th divide step */
+	addc	retreg,retreg,retreg	/* shift retreg with/into carry */
+	ds	temp,arg1,temp		/* 26th divide step */
+	addc	retreg,retreg,retreg	/* shift retreg with/into carry */
+	ds	temp,arg1,temp		/* 27th divide step */
+	addc	retreg,retreg,retreg	/* shift retreg with/into carry */
+	ds	temp,arg1,temp		/* 28th divide step */
+	addc	retreg,retreg,retreg	/* shift retreg with/into carry */
+	ds	temp,arg1,temp		/* 29th divide step */
+	addc	retreg,retreg,retreg	/* shift retreg with/into carry */
+	ds	temp,arg1,temp		/* 30th divide step */
+	addc	retreg,retreg,retreg	/* shift retreg with/into carry */
+	ds	temp,arg1,temp		/* 31st divide step */
+	addc	retreg,retreg,retreg	/* shift retreg with/into carry */
+	ds	temp,arg1,temp		/* 32nd divide step, */
+	MILLIRET
+	addc	retreg,retreg,retreg	/* shift last retreg bit into retreg */
+
+/* Handle the cases where divisor is a small constant or has high bit on.  */
+LSYM(special_divisor)
+/*	blr	arg1,r0 */
+/*	comib,>,n  0,arg1,LREF(big_divisor) ; nullify previous instruction */
+
+/* Pratap 8/13/90. The 815 Stirling chip set has a bug that prevents us from
+   generating such a blr, comib sequence. A problem in nullification. So I
+   rewrote this code.  */
+
+#if defined(pa64)
+/* Clear the upper 32 bits of the arg1 register.  We are working with
+   small divisors (and 32 bit unsigned integers)   We must not be mislead
+   by "1" bits left in the upper 32 bits.  */
+	depd r0,31,32,arg1
+#endif
+	comib,>	0,arg1,LREF(big_divisor)
+	nop
+	blr	arg1,r0
+	nop
+
+LSYM(zero_divisor)	/* this label is here to provide external visibility */
+	addit,=	0,arg1,0		/* trap for zero dvr */
+	nop
+	MILLIRET			/* divisor == 1 */
+	copy	arg0,retreg
+	MILLIRET			/* divisor == 2 */
+	extru	arg0,30,31,retreg
+	MILLI_BEN($$divU_3)		/* divisor == 3 */
+	nop
+	MILLIRET			/* divisor == 4 */
+	extru	arg0,29,30,retreg
+	MILLI_BEN($$divU_5)		/* divisor == 5 */
+	nop
+	MILLI_BEN($$divU_6)		/* divisor == 6 */
+	nop
+	MILLI_BEN($$divU_7)		/* divisor == 7 */
+	nop
+	MILLIRET			/* divisor == 8 */
+	extru	arg0,28,29,retreg
+	MILLI_BEN($$divU_9)		/* divisor == 9 */
+	nop
+	MILLI_BEN($$divU_10)		/* divisor == 10 */
+	nop
+	b	LREF(normal)		/* divisor == 11 */
+	ds	r0,temp,r0		/* set V-bit to 1 */
+	MILLI_BEN($$divU_12)		/* divisor == 12 */
+	nop
+	b	LREF(normal)		/* divisor == 13 */
+	ds	r0,temp,r0		/* set V-bit to 1 */
+	MILLI_BEN($$divU_14)		/* divisor == 14 */
+	nop
+	MILLI_BEN($$divU_15)		/* divisor == 15 */
+	nop
+
+/* Handle the case where the high bit is on in the divisor.
+   Compute:	if( dividend>=divisor) quotient=1; else quotient=0;
+   Note:	dividend>==divisor iff dividend-divisor does not borrow
+   and		not borrow iff carry.  */
+LSYM(big_divisor)
+	sub	arg0,arg1,r0
+	MILLIRET
+	addc	r0,r0,retreg
+	.exit
+	.procend
+	.end
+#endif
+
+#ifdef L_remI
+/* ROUTINE:	$$remI
+
+   DESCRIPTION:
+   .	$$remI returns the remainder of the division of two signed 32-bit
+   .	integers.  The sign of the remainder is the same as the sign of
+   .	the dividend.
+
+
+   INPUT REGISTERS:
+   .	arg0 == dividend
+   .	arg1 == divisor
+   .	mrp  == return pc
+   .	sr0  == return space when called externally
+
+   OUTPUT REGISTERS:
+   .	arg0 = destroyed
+   .	arg1 = destroyed
+   .	ret1 = remainder
+
+   OTHER REGISTERS AFFECTED:
+   .	r1   = undefined
+
+   SIDE EFFECTS:
+   .	Causes a trap under the following conditions:  DIVIDE BY ZERO
+   .	Changes memory at the following places:  NONE
+
+   PERMISSIBLE CONTEXT:
+   .	Unwindable
+   .	Does not create a stack frame
+   .	Is usable for internal or external microcode
+
+   DISCUSSION:
+   .	Calls other millicode routines via mrp:  NONE
+   .	Calls other millicode routines:  NONE  */
+
+RDEFINE(tmp,r1)
+RDEFINE(retreg,ret1)
+
+	SUBSPA_MILLI
+	ATTR_MILLI
+	.proc
+	.callinfo millicode
+	.entry
+GSYM($$remI)
+GSYM($$remoI)
+	.export $$remI,MILLICODE
+	.export $$remoI,MILLICODE
+	ldo		-1(arg1),tmp		/*  is there at most one bit set ? */
+	and,<>		arg1,tmp,r0		/*  if not, don't use power of 2 */
+	addi,>		0,arg1,r0		/*  if denominator > 0, use power */
+						/*  of 2 */
+	b,n		LREF(neg_denom)
+LSYM(pow2)
+	comb,>,n	0,arg0,LREF(neg_num)	/*  is numerator < 0 ? */
+	and		arg0,tmp,retreg		/*  get the result */
+	MILLIRETN
+LSYM(neg_num)
+	subi		0,arg0,arg0		/*  negate numerator */
+	and		arg0,tmp,retreg		/*  get the result */
+	subi		0,retreg,retreg		/*  negate result */
+	MILLIRETN
+LSYM(neg_denom)
+	addi,<		0,arg1,r0		/*  if arg1 >= 0, it's not power */
+						/*  of 2 */
+	b,n		LREF(regular_seq)
+	sub		r0,arg1,tmp		/*  make denominator positive */
+	comb,=,n	arg1,tmp,LREF(regular_seq) /*  test against 0x80000000 and 0 */
+	ldo		-1(tmp),retreg		/*  is there at most one bit set ? */
+	and,=		tmp,retreg,r0		/*  if not, go to regular_seq */
+	b,n		LREF(regular_seq)
+	comb,>,n	0,arg0,LREF(neg_num_2)	/*  if arg0 < 0, negate it  */
+	and		arg0,retreg,retreg
+	MILLIRETN
+LSYM(neg_num_2)
+	subi		0,arg0,tmp		/*  test against 0x80000000 */
+	and		tmp,retreg,retreg
+	subi		0,retreg,retreg
+	MILLIRETN
+LSYM(regular_seq)
+	addit,=		0,arg1,0		/*  trap if div by zero */
+	add,>=		0,arg0,retreg		/*  move dividend, if retreg < 0, */
+	sub		0,retreg,retreg		/*    make it positive */
+	sub		0,arg1, tmp		/*  clear carry,  */
+						/*    negate the divisor */
+	ds		0, tmp,0		/*  set V-bit to the comple- */
+						/*    ment of the divisor sign */
+	or		0,0, tmp		/*  clear  tmp */
+	add		retreg,retreg,retreg	/*  shift msb bit into carry */
+	ds		 tmp,arg1, tmp		/*  1st divide step, if no carry */
+						/*    out, msb of quotient = 0 */
+	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */
+LSYM(t1)
+	ds		 tmp,arg1, tmp		/*  2nd divide step */
+	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */
+	ds		 tmp,arg1, tmp		/*  3rd divide step */
+	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */
+	ds		 tmp,arg1, tmp		/*  4th divide step */
+	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */
+	ds		 tmp,arg1, tmp		/*  5th divide step */
+	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */
+	ds		 tmp,arg1, tmp		/*  6th divide step */
+	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */
+	ds		 tmp,arg1, tmp		/*  7th divide step */
+	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */
+	ds		 tmp,arg1, tmp		/*  8th divide step */
+	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */
+	ds		 tmp,arg1, tmp		/*  9th divide step */
+	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */
+	ds		 tmp,arg1, tmp		/*  10th divide step */
+	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */
+	ds		 tmp,arg1, tmp		/*  11th divide step */
+	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */
+	ds		 tmp,arg1, tmp		/*  12th divide step */
+	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */
+	ds		 tmp,arg1, tmp		/*  13th divide step */
+	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */
+	ds		 tmp,arg1, tmp		/*  14th divide step */
+	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */
+	ds		 tmp,arg1, tmp		/*  15th divide step */
+	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */
+	ds		 tmp,arg1, tmp		/*  16th divide step */
+	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */
+	ds		 tmp,arg1, tmp		/*  17th divide step */
+	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */
+	ds		 tmp,arg1, tmp		/*  18th divide step */
+	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */
+	ds		 tmp,arg1, tmp		/*  19th divide step */
+	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */
+	ds		 tmp,arg1, tmp		/*  20th divide step */
+	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */
+	ds		 tmp,arg1, tmp		/*  21st divide step */
+	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */
+	ds		 tmp,arg1, tmp		/*  22nd divide step */
+	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */
+	ds		 tmp,arg1, tmp		/*  23rd divide step */
+	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */
+	ds		 tmp,arg1, tmp		/*  24th divide step */
+	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */
+	ds		 tmp,arg1, tmp		/*  25th divide step */
+	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */
+	ds		 tmp,arg1, tmp		/*  26th divide step */
+	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */
+	ds		 tmp,arg1, tmp		/*  27th divide step */
+	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */
+	ds		 tmp,arg1, tmp		/*  28th divide step */
+	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */
+	ds		 tmp,arg1, tmp		/*  29th divide step */
+	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */
+	ds		 tmp,arg1, tmp		/*  30th divide step */
+	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */
+	ds		 tmp,arg1, tmp		/*  31st divide step */
+	addc		retreg,retreg,retreg	/*  shift retreg with/into carry */
+	ds		 tmp,arg1, tmp		/*  32nd divide step, */
+	addc		retreg,retreg,retreg	/*  shift last bit into retreg */
+	movb,>=,n	 tmp,retreg,LREF(finish) /*  branch if pos.  tmp */
+	add,<		arg1,0,0		/*  if arg1 > 0, add arg1 */
+	add,tr		 tmp,arg1,retreg	/*    for correcting remainder tmp */
+	sub		 tmp,arg1,retreg	/*  else add absolute value arg1 */
+LSYM(finish)
+	add,>=		arg0,0,0		/*  set sign of remainder */
+	sub		0,retreg,retreg		/*    to sign of dividend */
+	MILLIRET
+	nop
+	.exit
+	.procend
+#ifdef milliext
+	.origin 0x00000200
+#endif
+	.end
+#endif
+
+#ifdef L_remU
+/* ROUTINE:	$$remU
+   .	Single precision divide for remainder with unsigned binary integers.
+   .
+   .	The remainder must be dividend-(dividend/divisor)*divisor.
+   .	Divide by zero is trapped.
+
+   INPUT REGISTERS:
+   .	arg0 ==	dividend
+   .	arg1 == divisor
+   .	mrp  == return pc
+   .	sr0  == return space when called externally
+
+   OUTPUT REGISTERS:
+   .	arg0 =	undefined
+   .	arg1 =	undefined
+   .	ret1 =	remainder
+
+   OTHER REGISTERS AFFECTED:
+   .	r1   =	undefined
+
+   SIDE EFFECTS:
+   .	Causes a trap under the following conditions:  DIVIDE BY ZERO
+   .	Changes memory at the following places:  NONE
+
+   PERMISSIBLE CONTEXT:
+   .	Unwindable.
+   .	Does not create a stack frame.
+   .	Suitable for internal or external millicode.
+   .	Assumes the special millicode register conventions.
+
+   DISCUSSION:
+   .	Calls other millicode routines using mrp: NONE
+   .	Calls other millicode routines: NONE  */
+
+
+RDEFINE(temp,r1)
+RDEFINE(rmndr,ret1)	/*  r29 */
+	SUBSPA_MILLI
+	ATTR_MILLI
+	.export $$remU,millicode
+	.proc
+	.callinfo	millicode
+	.entry
+GSYM($$remU)
+	ldo	-1(arg1),temp		/*  is there at most one bit set ? */
+	and,=	arg1,temp,r0		/*  if not, don't use power of 2 */
+	b	LREF(regular_seq)
+	addit,=	0,arg1,r0		/*  trap on div by zero */
+	and	arg0,temp,rmndr		/*  get the result for power of 2 */
+	MILLIRETN
+LSYM(regular_seq)
+	comib,>=,n  0,arg1,LREF(special_case)
+	subi	0,arg1,rmndr		/*  clear carry, negate the divisor */
+	ds	r0,rmndr,r0		/*  set V-bit to 1 */
+	add	arg0,arg0,temp		/*  shift msb bit into carry */
+	ds	r0,arg1,rmndr		/*  1st divide step, if no carry */
+	addc	temp,temp,temp		/*  shift temp with/into carry */
+	ds	rmndr,arg1,rmndr		/*  2nd divide step */
+	addc	temp,temp,temp		/*  shift temp with/into carry */
+	ds	rmndr,arg1,rmndr		/*  3rd divide step */
+	addc	temp,temp,temp		/*  shift temp with/into carry */
+	ds	rmndr,arg1,rmndr		/*  4th divide step */
+	addc	temp,temp,temp		/*  shift temp with/into carry */
+	ds	rmndr,arg1,rmndr		/*  5th divide step */
+	addc	temp,temp,temp		/*  shift temp with/into carry */
+	ds	rmndr,arg1,rmndr		/*  6th divide step */
+	addc	temp,temp,temp		/*  shift temp with/into carry */
+	ds	rmndr,arg1,rmndr		/*  7th divide step */
+	addc	temp,temp,temp		/*  shift temp with/into carry */
+	ds	rmndr,arg1,rmndr		/*  8th divide step */
+	addc	temp,temp,temp		/*  shift temp with/into carry */
+	ds	rmndr,arg1,rmndr		/*  9th divide step */
+	addc	temp,temp,temp		/*  shift temp with/into carry */
+	ds	rmndr,arg1,rmndr		/*  10th divide step */
+	addc	temp,temp,temp		/*  shift temp with/into carry */
+	ds	rmndr,arg1,rmndr		/*  11th divide step */
+	addc	temp,temp,temp		/*  shift temp with/into carry */
+	ds	rmndr,arg1,rmndr		/*  12th divide step */
+	addc	temp,temp,temp		/*  shift temp with/into carry */
+	ds	rmndr,arg1,rmndr		/*  13th divide step */
+	addc	temp,temp,temp		/*  shift temp with/into carry */
+	ds	rmndr,arg1,rmndr		/*  14th divide step */
+	addc	temp,temp,temp		/*  shift temp with/into carry */
+	ds	rmndr,arg1,rmndr		/*  15th divide step */
+	addc	temp,temp,temp		/*  shift temp with/into carry */
+	ds	rmndr,arg1,rmndr		/*  16th divide step */
+	addc	temp,temp,temp		/*  shift temp with/into carry */
+	ds	rmndr,arg1,rmndr		/*  17th divide step */
+	addc	temp,temp,temp		/*  shift temp with/into carry */
+	ds	rmndr,arg1,rmndr		/*  18th divide step */
+	addc	temp,temp,temp		/*  shift temp with/into carry */
+	ds	rmndr,arg1,rmndr		/*  19th divide step */
+	addc	temp,temp,temp		/*  shift temp with/into carry */
+	ds	rmndr,arg1,rmndr		/*  20th divide step */
+	addc	temp,temp,temp		/*  shift temp with/into carry */
+	ds	rmndr,arg1,rmndr		/*  21st divide step */
+	addc	temp,temp,temp		/*  shift temp with/into carry */
+	ds	rmndr,arg1,rmndr		/*  22nd divide step */
+	addc	temp,temp,temp		/*  shift temp with/into carry */
+	ds	rmndr,arg1,rmndr		/*  23rd divide step */
+	addc	temp,temp,temp		/*  shift temp with/into carry */
+	ds	rmndr,arg1,rmndr		/*  24th divide step */
+	addc	temp,temp,temp		/*  shift temp with/into carry */
+	ds	rmndr,arg1,rmndr		/*  25th divide step */
+	addc	temp,temp,temp		/*  shift temp with/into carry */
+	ds	rmndr,arg1,rmndr		/*  26th divide step */
+	addc	temp,temp,temp		/*  shift temp with/into carry */
+	ds	rmndr,arg1,rmndr		/*  27th divide step */
+	addc	temp,temp,temp		/*  shift temp with/into carry */
+	ds	rmndr,arg1,rmndr		/*  28th divide step */
+	addc	temp,temp,temp		/*  shift temp with/into carry */
+	ds	rmndr,arg1,rmndr		/*  29th divide step */
+	addc	temp,temp,temp		/*  shift temp with/into carry */
+	ds	rmndr,arg1,rmndr		/*  30th divide step */
+	addc	temp,temp,temp		/*  shift temp with/into carry */
+	ds	rmndr,arg1,rmndr		/*  31st divide step */
+	addc	temp,temp,temp		/*  shift temp with/into carry */
+	ds	rmndr,arg1,rmndr		/*  32nd divide step, */
+	comiclr,<= 0,rmndr,r0
+	  add	rmndr,arg1,rmndr	/*  correction */
+	MILLIRETN
+	nop
+
+/* Putting >= on the last DS and deleting COMICLR does not work!  */
+LSYM(special_case)
+	sub,>>=	arg0,arg1,rmndr
+	  copy	arg0,rmndr
+	MILLIRETN
+	nop
+	.exit
+	.procend
+	.end
+#endif
+
+#ifdef L_div_const
+/* ROUTINE:	$$divI_2
+   .		$$divI_3	$$divU_3
+   .		$$divI_4
+   .		$$divI_5	$$divU_5
+   .		$$divI_6	$$divU_6
+   .		$$divI_7	$$divU_7
+   .		$$divI_8
+   .		$$divI_9	$$divU_9
+   .		$$divI_10	$$divU_10
+   .
+   .		$$divI_12	$$divU_12
+   .
+   .		$$divI_14	$$divU_14
+   .		$$divI_15	$$divU_15
+   .		$$divI_16
+   .		$$divI_17	$$divU_17
+   .
+   .	Divide by selected constants for single precision binary integers.
+
+   INPUT REGISTERS:
+   .	arg0 ==	dividend
+   .	mrp  == return pc
+   .	sr0  == return space when called externally
+
+   OUTPUT REGISTERS:
+   .	arg0 =	undefined
+   .	arg1 =	undefined
+   .	ret1 =	quotient
+
+   OTHER REGISTERS AFFECTED:
+   .	r1   =	undefined
+
+   SIDE EFFECTS:
+   .	Causes a trap under the following conditions: NONE
+   .	Changes memory at the following places:  NONE
+
+   PERMISSIBLE CONTEXT:
+   .	Unwindable.
+   .	Does not create a stack frame.
+   .	Suitable for internal or external millicode.
+   .	Assumes the special millicode register conventions.
+
+   DISCUSSION:
+   .	Calls other millicode routines using mrp:  NONE
+   .	Calls other millicode routines:  NONE  */
+
+
+/* TRUNCATED DIVISION BY SMALL INTEGERS
+
+   We are interested in q(x) = floor(x/y), where x >= 0 and y > 0
+   (with y fixed).
+
+   Let a = floor(z/y), for some choice of z.  Note that z will be
+   chosen so that division by z is cheap.
+
+   Let r be the remainder(z/y).  In other words, r = z - ay.
+
+   Now, our method is to choose a value for b such that
+
+   q'(x) = floor((ax+b)/z)
+
+   is equal to q(x) over as large a range of x as possible.  If the
+   two are equal over a sufficiently large range, and if it is easy to
+   form the product (ax), and it is easy to divide by z, then we can
+   perform the division much faster than the general division algorithm.
+
+   So, we want the following to be true:
+
+   .	For x in the following range:
+   .
+   .	    ky <= x < (k+1)y
+   .
+   .	implies that
+   .
+   .	    k <= (ax+b)/z < (k+1)
+
+   We want to determine b such that this is true for all k in the
+   range {0..K} for some maximum K.
+
+   Since (ax+b) is an increasing function of x, we can take each
+   bound separately to determine the "best" value for b.
+
+   (ax+b)/z < (k+1)	       implies
+
+   (a((k+1)y-1)+b < (k+1)z     implies
+
+   b < a + (k+1)(z-ay)	       implies
+
+   b < a + (k+1)r
+
+   This needs to be true for all k in the range {0..K}.  In
+   particular, it is true for k = 0 and this leads to a maximum
+   acceptable value for b.
+
+   b < a+r   or   b <= a+r-1
+
+   Taking the other bound, we have
+
+   k <= (ax+b)/z	       implies
+
+   k <= (aky+b)/z	       implies
+
+   k(z-ay) <= b		       implies
+
+   kr <= b
+
+   Clearly, the largest range for k will be achieved by maximizing b,
+   when r is not zero.	When r is zero, then the simplest choice for b
+   is 0.  When r is not 0, set
+
+   .	b = a+r-1
+
+   Now, by construction, q'(x) = floor((ax+b)/z) = q(x) = floor(x/y)
+   for all x in the range:
+
+   .	0 <= x < (K+1)y
+
+   We need to determine what K is.  Of our two bounds,
+
+   .	b < a+(k+1)r	is satisfied for all k >= 0, by construction.
+
+   The other bound is
+
+   .	kr <= b
+
+   This is always true if r = 0.  If r is not 0 (the usual case), then
+   K = floor((a+r-1)/r), is the maximum value for k.
+
+   Therefore, the formula q'(x) = floor((ax+b)/z) yields the correct
+   answer for q(x) = floor(x/y) when x is in the range
+
+   (0,(K+1)y-1)	       K = floor((a+r-1)/r)
+
+   To be most useful, we want (K+1)y-1 = (max x) >= 2**32-1 so that
+   the formula for q'(x) yields the correct value of q(x) for all x
+   representable by a single word in HPPA.
+
+   We are also constrained in that computing the product (ax), adding
+   b, and dividing by z must all be done quickly, otherwise we will be
+   better off going through the general algorithm using the DS
+   instruction, which uses approximately 70 cycles.
+
+   For each y, there is a choice of z which satisfies the constraints
+   for (K+1)y >= 2**32.  We may not, however, be able to satisfy the
+   timing constraints for arbitrary y.	It seems that z being equal to
+   a power of 2 or a power of 2 minus 1 is as good as we can do, since
+   it minimizes the time to do division by z.  We want the choice of z
+   to also result in a value for (a) that minimizes the computation of
+   the product (ax).  This is best achieved if (a) has a regular bit
+   pattern (so the multiplication can be done with shifts and adds).
+   The value of (a) also needs to be less than 2**32 so the product is
+   always guaranteed to fit in 2 words.
+
+   In actual practice, the following should be done:
+
+   1) For negative x, you should take the absolute value and remember
+   .  the fact so that the result can be negated.  This obviously does
+   .  not apply in the unsigned case.
+   2) For even y, you should factor out the power of 2 that divides y
+   .  and divide x by it.  You can then proceed by dividing by the
+   .  odd factor of y.
+
+   Here is a table of some odd values of y, and corresponding choices
+   for z which are "good".
+
+    y	  z	  r	 a (hex)     max x (hex)
+
+    3	2**32	  1	55555555      100000001
+    5	2**32	  1	33333333      100000003
+    7  2**24-1	  0	  249249     (infinite)
+    9  2**24-1	  0	  1c71c7     (infinite)
+   11  2**20-1	  0	   1745d     (infinite)
+   13  2**24-1	  0	  13b13b     (infinite)
+   15	2**32	  1	11111111      10000000d
+   17	2**32	  1	 f0f0f0f      10000000f
+
+   If r is 1, then b = a+r-1 = a.  This simplifies the computation
+   of (ax+b), since you can compute (x+1)(a) instead.  If r is 0,
+   then b = 0 is ok to use which simplifies (ax+b).
+
+   The bit patterns for 55555555, 33333333, and 11111111 are obviously
+   very regular.  The bit patterns for the other values of a above are:
+
+    y	   (hex)	  (binary)
+
+    7	  249249  001001001001001001001001  << regular >>
+    9	  1c71c7  000111000111000111000111  << regular >>
+   11	   1745d  000000010111010001011101  << irregular >>
+   13	  13b13b  000100111011000100111011  << irregular >>
+
+   The bit patterns for (a) corresponding to (y) of 11 and 13 may be
+   too irregular to warrant using this method.
+
+   When z is a power of 2 minus 1, then the division by z is slightly
+   more complicated, involving an iterative solution.
+
+   The code presented here solves division by 1 through 17, except for
+   11 and 13. There are algorithms for both signed and unsigned
+   quantities given.
+
+   TIMINGS (cycles)
+
+   divisor  positive  negative	unsigned
+
+   .   1	2	   2	     2
+   .   2	4	   4	     2
+   .   3       19	  21	    19
+   .   4	4	   4	     2
+   .   5       18	  22	    19
+   .   6       19	  22	    19
+   .   8	4	   4	     2
+   .  10       18	  19	    17
+   .  12       18	  20	    18
+   .  15       16	  18	    16
+   .  16	4	   4	     2
+   .  17       16	  18	    16
+
+   Now, the algorithm for 7, 9, and 14 is an iterative one.  That is,
+   a loop body is executed until the tentative quotient is 0.  The
+   number of times the loop body is executed varies depending on the
+   dividend, but is never more than two times.	If the dividend is
+   less than the divisor, then the loop body is not executed at all.
+   Each iteration adds 4 cycles to the timings.
+
+   divisor  positive  negative	unsigned
+
+   .   7       19+4n	 20+4n	   20+4n    n = number of iterations
+   .   9       21+4n	 22+4n	   21+4n
+   .  14       21+4n	 22+4n	   20+4n
+
+   To give an idea of how the number of iterations varies, here is a
+   table of dividend versus number of iterations when dividing by 7.
+
+   smallest	 largest       required
+   dividend	dividend      iterations
+
+   .	0	     6		    0
+   .	7	 0x6ffffff	    1
+   0x1000006	0xffffffff	    2
+
+   There is some overlap in the range of numbers requiring 1 and 2
+   iterations.	*/
+
+RDEFINE(t2,r1)
+RDEFINE(x2,arg0)	/*  r26 */
+RDEFINE(t1,arg1)	/*  r25 */
+RDEFINE(x1,ret1)	/*  r29 */
+
+	SUBSPA_MILLI_DIV
+	ATTR_MILLI
+
+	.proc
+	.callinfo	millicode
+	.entry
+/* NONE of these routines require a stack frame
+   ALL of these routines are unwindable from millicode	*/
+
+GSYM($$divide_by_constant)
+	.export $$divide_by_constant,millicode
+/*  Provides a "nice" label for the code covered by the unwind descriptor
+    for things like gprof.  */
+
+/* DIVISION BY 2 (shift by 1) */
+GSYM($$divI_2)
+	.export		$$divI_2,millicode
+	comclr,>=	arg0,0,0
+	addi		1,arg0,arg0
+	MILLIRET
+	extrs		arg0,30,31,ret1
+
+
+/* DIVISION BY 4 (shift by 2) */
+GSYM($$divI_4)
+	.export		$$divI_4,millicode
+	comclr,>=	arg0,0,0
+	addi		3,arg0,arg0
+	MILLIRET
+	extrs		arg0,29,30,ret1
+
+
+/* DIVISION BY 8 (shift by 3) */
+GSYM($$divI_8)
+	.export		$$divI_8,millicode
+	comclr,>=	arg0,0,0
+	addi		7,arg0,arg0
+	MILLIRET
+	extrs		arg0,28,29,ret1
+
+/* DIVISION BY 16 (shift by 4) */
+GSYM($$divI_16)
+	.export		$$divI_16,millicode
+	comclr,>=	arg0,0,0
+	addi		15,arg0,arg0
+	MILLIRET
+	extrs		arg0,27,28,ret1
+
+/****************************************************************************
+*
+*	DIVISION BY DIVISORS OF FFFFFFFF, and powers of 2 times these
+*
+*	includes 3,5,15,17 and also 6,10,12
+*
+****************************************************************************/
+
+/* DIVISION BY 3 (use z = 2**32; a = 55555555) */
+
+GSYM($$divI_3)
+	.export		$$divI_3,millicode
+	comb,<,N	x2,0,LREF(neg3)
+
+	addi		1,x2,x2		/* this can not overflow	*/
+	extru		x2,1,2,x1	/* multiply by 5 to get started */
+	sh2add		x2,x2,x2
+	b		LREF(pos)
+	addc		x1,0,x1
+
+LSYM(neg3)
+	subi		1,x2,x2		/* this can not overflow	*/
+	extru		x2,1,2,x1	/* multiply by 5 to get started */
+	sh2add		x2,x2,x2
+	b		LREF(neg)
+	addc		x1,0,x1
+
+GSYM($$divU_3)
+	.export		$$divU_3,millicode
+	addi		1,x2,x2		/* this CAN overflow */
+	addc		0,0,x1
+	shd		x1,x2,30,t1	/* multiply by 5 to get started */
+	sh2add		x2,x2,x2
+	b		LREF(pos)
+	addc		x1,t1,x1
+
+/* DIVISION BY 5 (use z = 2**32; a = 33333333) */
+
+GSYM($$divI_5)
+	.export		$$divI_5,millicode
+	comb,<,N	x2,0,LREF(neg5)
+
+	addi		3,x2,t1		/* this can not overflow	*/
+	sh1add		x2,t1,x2	/* multiply by 3 to get started */
+	b		LREF(pos)
+	addc		0,0,x1
+
+LSYM(neg5)
+	sub		0,x2,x2		/* negate x2			*/
+	addi		1,x2,x2		/* this can not overflow	*/
+	shd		0,x2,31,x1	/* get top bit (can be 1)	*/
+	sh1add		x2,x2,x2	/* multiply by 3 to get started */
+	b		LREF(neg)
+	addc		x1,0,x1
+
+GSYM($$divU_5)
+	.export		$$divU_5,millicode
+	addi		1,x2,x2		/* this CAN overflow */
+	addc		0,0,x1
+	shd		x1,x2,31,t1	/* multiply by 3 to get started */
+	sh1add		x2,x2,x2
+	b		LREF(pos)
+	addc		t1,x1,x1
+
+/* DIVISION BY	6 (shift to divide by 2 then divide by 3) */
+GSYM($$divI_6)
+	.export		$$divI_6,millicode
+	comb,<,N	x2,0,LREF(neg6)
+	extru		x2,30,31,x2	/* divide by 2			*/
+	addi		5,x2,t1		/* compute 5*(x2+1) = 5*x2+5	*/
+	sh2add		x2,t1,x2	/* multiply by 5 to get started */
+	b		LREF(pos)
+	addc		0,0,x1
+
+LSYM(neg6)
+	subi		2,x2,x2		/* negate, divide by 2, and add 1 */
+					/* negation and adding 1 are done */
+					/* at the same time by the SUBI   */
+	extru		x2,30,31,x2
+	shd		0,x2,30,x1
+	sh2add		x2,x2,x2	/* multiply by 5 to get started */
+	b		LREF(neg)
+	addc		x1,0,x1
+
+GSYM($$divU_6)
+	.export		$$divU_6,millicode
+	extru		x2,30,31,x2	/* divide by 2 */
+	addi		1,x2,x2		/* can not carry */
+	shd		0,x2,30,x1	/* multiply by 5 to get started */
+	sh2add		x2,x2,x2
+	b		LREF(pos)
+	addc		x1,0,x1
+
+/* DIVISION BY 10 (shift to divide by 2 then divide by 5) */
+GSYM($$divU_10)
+	.export		$$divU_10,millicode
+	extru		x2,30,31,x2	/* divide by 2 */
+	addi		3,x2,t1		/* compute 3*(x2+1) = (3*x2)+3	*/
+	sh1add		x2,t1,x2	/* multiply by 3 to get started */
+	addc		0,0,x1
+LSYM(pos)
+	shd		x1,x2,28,t1	/* multiply by 0x11 */
+	shd		x2,0,28,t2
+	add		x2,t2,x2
+	addc		x1,t1,x1
+LSYM(pos_for_17)
+	shd		x1,x2,24,t1	/* multiply by 0x101 */
+	shd		x2,0,24,t2
+	add		x2,t2,x2
+	addc		x1,t1,x1
+
+	shd		x1,x2,16,t1	/* multiply by 0x10001 */
+	shd		x2,0,16,t2
+	add		x2,t2,x2
+	MILLIRET
+	addc		x1,t1,x1
+
+GSYM($$divI_10)
+	.export		$$divI_10,millicode
+	comb,<		x2,0,LREF(neg10)
+	copy		0,x1
+	extru		x2,30,31,x2	/* divide by 2 */
+	addib,TR	1,x2,LREF(pos)	/* add 1 (can not overflow)     */
+	sh1add		x2,x2,x2	/* multiply by 3 to get started */
+
+LSYM(neg10)
+	subi		2,x2,x2		/* negate, divide by 2, and add 1 */
+					/* negation and adding 1 are done */
+					/* at the same time by the SUBI   */
+	extru		x2,30,31,x2
+	sh1add		x2,x2,x2	/* multiply by 3 to get started */
+LSYM(neg)
+	shd		x1,x2,28,t1	/* multiply by 0x11 */
+	shd		x2,0,28,t2
+	add		x2,t2,x2
+	addc		x1,t1,x1
+LSYM(neg_for_17)
+	shd		x1,x2,24,t1	/* multiply by 0x101 */
+	shd		x2,0,24,t2
+	add		x2,t2,x2
+	addc		x1,t1,x1
+
+	shd		x1,x2,16,t1	/* multiply by 0x10001 */
+	shd		x2,0,16,t2
+	add		x2,t2,x2
+	addc		x1,t1,x1
+	MILLIRET
+	sub		0,x1,x1
+
+/* DIVISION BY 12 (shift to divide by 4 then divide by 3) */
+GSYM($$divI_12)
+	.export		$$divI_12,millicode
+	comb,<		x2,0,LREF(neg12)
+	copy		0,x1
+	extru		x2,29,30,x2	/* divide by 4			*/
+	addib,tr	1,x2,LREF(pos)	/* compute 5*(x2+1) = 5*x2+5    */
+	sh2add		x2,x2,x2	/* multiply by 5 to get started */
+
+LSYM(neg12)
+	subi		4,x2,x2		/* negate, divide by 4, and add 1 */
+					/* negation and adding 1 are done */
+					/* at the same time by the SUBI   */
+	extru		x2,29,30,x2
+	b		LREF(neg)
+	sh2add		x2,x2,x2	/* multiply by 5 to get started */
+
+GSYM($$divU_12)
+	.export		$$divU_12,millicode
+	extru		x2,29,30,x2	/* divide by 4   */
+	addi		5,x2,t1		/* can not carry */
+	sh2add		x2,t1,x2	/* multiply by 5 to get started */
+	b		LREF(pos)
+	addc		0,0,x1
+
+/* DIVISION BY 15 (use z = 2**32; a = 11111111) */
+GSYM($$divI_15)
+	.export		$$divI_15,millicode
+	comb,<		x2,0,LREF(neg15)
+	copy		0,x1
+	addib,tr	1,x2,LREF(pos)+4
+	shd		x1,x2,28,t1
+
+LSYM(neg15)
+	b		LREF(neg)
+	subi		1,x2,x2
+
+GSYM($$divU_15)
+	.export		$$divU_15,millicode
+	addi		1,x2,x2		/* this CAN overflow */
+	b		LREF(pos)
+	addc		0,0,x1
+
+/* DIVISION BY 17 (use z = 2**32; a =  f0f0f0f) */
+GSYM($$divI_17)
+	.export		$$divI_17,millicode
+	comb,<,n	x2,0,LREF(neg17)
+	addi		1,x2,x2		/* this can not overflow */
+	shd		0,x2,28,t1	/* multiply by 0xf to get started */
+	shd		x2,0,28,t2
+	sub		t2,x2,x2
+	b		LREF(pos_for_17)
+	subb		t1,0,x1
+
+LSYM(neg17)
+	subi		1,x2,x2		/* this can not overflow */
+	shd		0,x2,28,t1	/* multiply by 0xf to get started */
+	shd		x2,0,28,t2
+	sub		t2,x2,x2
+	b		LREF(neg_for_17)
+	subb		t1,0,x1
+
+GSYM($$divU_17)
+	.export		$$divU_17,millicode
+	addi		1,x2,x2		/* this CAN overflow */
+	addc		0,0,x1
+	shd		x1,x2,28,t1	/* multiply by 0xf to get started */
+LSYM(u17)
+	shd		x2,0,28,t2
+	sub		t2,x2,x2
+	b		LREF(pos_for_17)
+	subb		t1,x1,x1
+
+
+/* DIVISION BY DIVISORS OF FFFFFF, and powers of 2 times these
+   includes 7,9 and also 14
+
+
+   z = 2**24-1
+   r = z mod x = 0
+
+   so choose b = 0
+
+   Also, in order to divide by z = 2**24-1, we approximate by dividing
+   by (z+1) = 2**24 (which is easy), and then correcting.
+
+   (ax) = (z+1)q' + r
+   .	= zq' + (q'+r)
+
+   So to compute (ax)/z, compute q' = (ax)/(z+1) and r = (ax) mod (z+1)
+   Then the true remainder of (ax)/z is (q'+r).  Repeat the process
+   with this new remainder, adding the tentative quotients together,
+   until a tentative quotient is 0 (and then we are done).  There is
+   one last correction to be done.  It is possible that (q'+r) = z.
+   If so, then (q'+r)/(z+1) = 0 and it looks like we are done.	But,
+   in fact, we need to add 1 more to the quotient.  Now, it turns
+   out that this happens if and only if the original value x is
+   an exact multiple of y.  So, to avoid a three instruction test at
+   the end, instead use 1 instruction to add 1 to x at the beginning.  */
+
+/* DIVISION BY 7 (use z = 2**24-1; a = 249249) */
+GSYM($$divI_7)
+	.export		$$divI_7,millicode
+	comb,<,n	x2,0,LREF(neg7)
+LSYM(7)
+	addi		1,x2,x2		/* can not overflow */
+	shd		0,x2,29,x1
+	sh3add		x2,x2,x2
+	addc		x1,0,x1
+LSYM(pos7)
+	shd		x1,x2,26,t1
+	shd		x2,0,26,t2
+	add		x2,t2,x2
+	addc		x1,t1,x1
+
+	shd		x1,x2,20,t1
+	shd		x2,0,20,t2
+	add		x2,t2,x2
+	addc		x1,t1,t1
+
+	/* computed <t1,x2>.  Now divide it by (2**24 - 1)	*/
+
+	copy		0,x1
+	shd,=		t1,x2,24,t1	/* tentative quotient  */
+LSYM(1)
+	addb,tr		t1,x1,LREF(2)	/* add to previous quotient   */
+	extru		x2,31,24,x2	/* new remainder (unadjusted) */
+
+	MILLIRETN
+
+LSYM(2)
+	addb,tr		t1,x2,LREF(1)	/* adjust remainder */
+	extru,=		x2,7,8,t1	/* new quotient     */
+
+LSYM(neg7)
+	subi		1,x2,x2		/* negate x2 and add 1 */
+LSYM(8)
+	shd		0,x2,29,x1
+	sh3add		x2,x2,x2
+	addc		x1,0,x1
+
+LSYM(neg7_shift)
+	shd		x1,x2,26,t1
+	shd		x2,0,26,t2
+	add		x2,t2,x2
+	addc		x1,t1,x1
+
+	shd		x1,x2,20,t1
+	shd		x2,0,20,t2
+	add		x2,t2,x2
+	addc		x1,t1,t1
+
+	/* computed <t1,x2>.  Now divide it by (2**24 - 1)	*/
+
+	copy		0,x1
+	shd,=		t1,x2,24,t1	/* tentative quotient  */
+LSYM(3)
+	addb,tr		t1,x1,LREF(4)	/* add to previous quotient   */
+	extru		x2,31,24,x2	/* new remainder (unadjusted) */
+
+	MILLIRET
+	sub		0,x1,x1		/* negate result    */
+
+LSYM(4)
+	addb,tr		t1,x2,LREF(3)	/* adjust remainder */
+	extru,=		x2,7,8,t1	/* new quotient     */
+
+GSYM($$divU_7)
+	.export		$$divU_7,millicode
+	addi		1,x2,x2		/* can carry */
+	addc		0,0,x1
+	shd		x1,x2,29,t1
+	sh3add		x2,x2,x2
+	b		LREF(pos7)
+	addc		t1,x1,x1
+
+/* DIVISION BY 9 (use z = 2**24-1; a = 1c71c7) */
+GSYM($$divI_9)
+	.export		$$divI_9,millicode
+	comb,<,n	x2,0,LREF(neg9)
+	addi		1,x2,x2		/* can not overflow */
+	shd		0,x2,29,t1
+	shd		x2,0,29,t2
+	sub		t2,x2,x2
+	b		LREF(pos7)
+	subb		t1,0,x1
+
+LSYM(neg9)
+	subi		1,x2,x2		/* negate and add 1 */
+	shd		0,x2,29,t1
+	shd		x2,0,29,t2
+	sub		t2,x2,x2
+	b		LREF(neg7_shift)
+	subb		t1,0,x1
+
+GSYM($$divU_9)
+	.export		$$divU_9,millicode
+	addi		1,x2,x2		/* can carry */
+	addc		0,0,x1
+	shd		x1,x2,29,t1
+	shd		x2,0,29,t2
+	sub		t2,x2,x2
+	b		LREF(pos7)
+	subb		t1,x1,x1
+
+/* DIVISION BY 14 (shift to divide by 2 then divide by 7) */
+GSYM($$divI_14)
+	.export		$$divI_14,millicode
+	comb,<,n	x2,0,LREF(neg14)
+GSYM($$divU_14)
+	.export		$$divU_14,millicode
+	b		LREF(7)		/* go to 7 case */
+	extru		x2,30,31,x2	/* divide by 2  */
+
+LSYM(neg14)
+	subi		2,x2,x2		/* negate (and add 2) */
+	b		LREF(8)
+	extru		x2,30,31,x2	/* divide by 2	      */
+	.exit
+	.procend
+	.end
+#endif
+
+#ifdef L_mulI
+/* VERSION "@(#)$$mulI $ Revision: 12.4 $ $ Date: 94/03/17 17:18:51 $" */
+/******************************************************************************
+This routine is used on PA2.0 processors when gcc -mno-fpregs is used
+
+ROUTINE:	$$mulI
+
+
+DESCRIPTION:
+
+	$$mulI multiplies two single word integers, giving a single
+	word result.
+
+
+INPUT REGISTERS:
+
+	arg0 = Operand 1
+	arg1 = Operand 2
+	r31  == return pc
+	sr0  == return space when called externally
+
+
+OUTPUT REGISTERS:
+
+	arg0 = undefined
+	arg1 = undefined
+	ret1 = result
+
+OTHER REGISTERS AFFECTED:
+
+	r1   = undefined
+
+SIDE EFFECTS:
+
+	Causes a trap under the following conditions:  NONE
+	Changes memory at the following places:  NONE
+
+PERMISSIBLE CONTEXT:
+
+	Unwindable
+	Does not create a stack frame
+	Is usable for internal or external microcode
+
+DISCUSSION:
+
+	Calls other millicode routines via mrp:  NONE
+	Calls other millicode routines:  NONE
+
+***************************************************************************/
+
+
+#define	a0	%arg0
+#define	a1	%arg1
+#define	t0	%r1
+#define	r	%ret1
+
+#define	a0__128a0	zdep	a0,24,25,a0
+#define	a0__256a0	zdep	a0,23,24,a0
+#define	a1_ne_0_b_l0	comb,<>	a1,0,LREF(l0)
+#define	a1_ne_0_b_l1	comb,<>	a1,0,LREF(l1)
+#define	a1_ne_0_b_l2	comb,<>	a1,0,LREF(l2)
+#define	b_n_ret_t0	b,n	LREF(ret_t0)
+#define	b_e_shift	b	LREF(e_shift)
+#define	b_e_t0ma0	b	LREF(e_t0ma0)
+#define	b_e_t0		b	LREF(e_t0)
+#define	b_e_t0a0	b	LREF(e_t0a0)
+#define	b_e_t02a0	b	LREF(e_t02a0)
+#define	b_e_t04a0	b	LREF(e_t04a0)
+#define	b_e_2t0		b	LREF(e_2t0)
+#define	b_e_2t0a0	b	LREF(e_2t0a0)
+#define	b_e_2t04a0	b	LREF(e2t04a0)
+#define	b_e_3t0		b	LREF(e_3t0)
+#define	b_e_4t0		b	LREF(e_4t0)
+#define	b_e_4t0a0	b	LREF(e_4t0a0)
+#define	b_e_4t08a0	b	LREF(e4t08a0)
+#define	b_e_5t0		b	LREF(e_5t0)
+#define	b_e_8t0		b	LREF(e_8t0)
+#define	b_e_8t0a0	b	LREF(e_8t0a0)
+#define	r__r_a0		add	r,a0,r
+#define	r__r_2a0	sh1add	a0,r,r
+#define	r__r_4a0	sh2add	a0,r,r
+#define	r__r_8a0	sh3add	a0,r,r
+#define	r__r_t0		add	r,t0,r
+#define	r__r_2t0	sh1add	t0,r,r
+#define	r__r_4t0	sh2add	t0,r,r
+#define	r__r_8t0	sh3add	t0,r,r
+#define	t0__3a0		sh1add	a0,a0,t0
+#define	t0__4a0		sh2add	a0,0,t0
+#define	t0__5a0		sh2add	a0,a0,t0
+#define	t0__8a0		sh3add	a0,0,t0
+#define	t0__9a0		sh3add	a0,a0,t0
+#define	t0__16a0	zdep	a0,27,28,t0
+#define	t0__32a0	zdep	a0,26,27,t0
+#define	t0__64a0	zdep	a0,25,26,t0
+#define	t0__128a0	zdep	a0,24,25,t0
+#define	t0__t0ma0	sub	t0,a0,t0
+#define	t0__t0_a0	add	t0,a0,t0
+#define	t0__t0_2a0	sh1add	a0,t0,t0
+#define	t0__t0_4a0	sh2add	a0,t0,t0
+#define	t0__t0_8a0	sh3add	a0,t0,t0
+#define	t0__2t0_a0	sh1add	t0,a0,t0
+#define	t0__3t0		sh1add	t0,t0,t0
+#define	t0__4t0		sh2add	t0,0,t0
+#define	t0__4t0_a0	sh2add	t0,a0,t0
+#define	t0__5t0		sh2add	t0,t0,t0
+#define	t0__8t0		sh3add	t0,0,t0
+#define	t0__8t0_a0	sh3add	t0,a0,t0
+#define	t0__9t0		sh3add	t0,t0,t0
+#define	t0__16t0	zdep	t0,27,28,t0
+#define	t0__32t0	zdep	t0,26,27,t0
+#define	t0__256a0	zdep	a0,23,24,t0
+
+
+	SUBSPA_MILLI
+	ATTR_MILLI
+	.align 16
+	.proc
+	.callinfo millicode
+	.export $$mulI, millicode
+GSYM($$mulI)
+	combt,<<=	a1,a0,LREF(l4)	/* swap args if unsigned a1>a0 */
+	copy		0,r		/* zero out the result */
+	xor		a0,a1,a0	/* swap a0 & a1 using the */
+	xor		a0,a1,a1	/*  old xor trick */
+	xor		a0,a1,a0
+LSYM(l4)
+	combt,<=	0,a0,LREF(l3)		/* if a0>=0 then proceed like unsigned */
+	zdep		a1,30,8,t0	/* t0 = (a1&0xff)<<1 ********* */
+	sub,>		0,a1,t0		/* otherwise negate both and */
+	combt,<=,n	a0,t0,LREF(l2)	/*  swap back if |a0|<|a1| */
+	sub		0,a0,a1
+	movb,tr,n	t0,a0,LREF(l2)	/* 10th inst. */
+
+LSYM(l0)	r__r_t0				/* add in this partial product */
+LSYM(l1)	a0__256a0			/* a0 <<= 8 ****************** */
+LSYM(l2)	zdep		a1,30,8,t0	/* t0 = (a1&0xff)<<1 ********* */
+LSYM(l3)	blr		t0,0		/* case on these 8 bits ****** */
+		extru		a1,23,24,a1	/* a1 >>= 8 ****************** */
+
+/*16 insts before this. */
+/*			  a0 <<= 8 ************************** */
+LSYM(x0)	a1_ne_0_b_l2	! a0__256a0	! MILLIRETN	! nop
+LSYM(x1)	a1_ne_0_b_l1	! r__r_a0	! MILLIRETN	! nop
+LSYM(x2)	a1_ne_0_b_l1	! r__r_2a0	! MILLIRETN	! nop
+LSYM(x3)	a1_ne_0_b_l0	! t0__3a0	! MILLIRET	! r__r_t0
+LSYM(x4)	a1_ne_0_b_l1	! r__r_4a0	! MILLIRETN	! nop
+LSYM(x5)	a1_ne_0_b_l0	! t0__5a0	! MILLIRET	! r__r_t0
+LSYM(x6)	t0__3a0		! a1_ne_0_b_l1	! r__r_2t0	! MILLIRETN
+LSYM(x7)	t0__3a0		! a1_ne_0_b_l0	! r__r_4a0	! b_n_ret_t0
+LSYM(x8)	a1_ne_0_b_l1	! r__r_8a0	! MILLIRETN	! nop
+LSYM(x9)	a1_ne_0_b_l0	! t0__9a0	! MILLIRET	! r__r_t0
+LSYM(x10)	t0__5a0		! a1_ne_0_b_l1	! r__r_2t0	! MILLIRETN
+LSYM(x11)	t0__3a0		! a1_ne_0_b_l0	! r__r_8a0	! b_n_ret_t0
+LSYM(x12)	t0__3a0		! a1_ne_0_b_l1	! r__r_4t0	! MILLIRETN
+LSYM(x13)	t0__5a0		! a1_ne_0_b_l0	! r__r_8a0	! b_n_ret_t0
+LSYM(x14)	t0__3a0		! t0__2t0_a0	! b_e_shift	! r__r_2t0
+LSYM(x15)	t0__5a0		! a1_ne_0_b_l0	! t0__3t0	! b_n_ret_t0
+LSYM(x16)	t0__16a0	! a1_ne_0_b_l1	! r__r_t0	! MILLIRETN
+LSYM(x17)	t0__9a0		! a1_ne_0_b_l0	! t0__t0_8a0	! b_n_ret_t0
+LSYM(x18)	t0__9a0		! a1_ne_0_b_l1	! r__r_2t0	! MILLIRETN
+LSYM(x19)	t0__9a0		! a1_ne_0_b_l0	! t0__2t0_a0	! b_n_ret_t0
+LSYM(x20)	t0__5a0		! a1_ne_0_b_l1	! r__r_4t0	! MILLIRETN
+LSYM(x21)	t0__5a0		! a1_ne_0_b_l0	! t0__4t0_a0	! b_n_ret_t0
+LSYM(x22)	t0__5a0		! t0__2t0_a0	! b_e_shift	! r__r_2t0
+LSYM(x23)	t0__5a0		! t0__2t0_a0	! b_e_t0	! t0__2t0_a0
+LSYM(x24)	t0__3a0		! a1_ne_0_b_l1	! r__r_8t0	! MILLIRETN
+LSYM(x25)	t0__5a0		! a1_ne_0_b_l0	! t0__5t0	! b_n_ret_t0
+LSYM(x26)	t0__3a0		! t0__4t0_a0	! b_e_shift	! r__r_2t0
+LSYM(x27)	t0__3a0		! a1_ne_0_b_l0	! t0__9t0	! b_n_ret_t0
+LSYM(x28)	t0__3a0		! t0__2t0_a0	! b_e_shift	! r__r_4t0
+LSYM(x29)	t0__3a0		! t0__2t0_a0	! b_e_t0	! t0__4t0_a0
+LSYM(x30)	t0__5a0		! t0__3t0	! b_e_shift	! r__r_2t0
+LSYM(x31)	t0__32a0	! a1_ne_0_b_l0	! t0__t0ma0	! b_n_ret_t0
+LSYM(x32)	t0__32a0	! a1_ne_0_b_l1	! r__r_t0	! MILLIRETN
+LSYM(x33)	t0__8a0		! a1_ne_0_b_l0	! t0__4t0_a0	! b_n_ret_t0
+LSYM(x34)	t0__16a0	! t0__t0_a0	! b_e_shift	! r__r_2t0
+LSYM(x35)	t0__9a0		! t0__3t0	! b_e_t0	! t0__t0_8a0
+LSYM(x36)	t0__9a0		! a1_ne_0_b_l1	! r__r_4t0	! MILLIRETN
+LSYM(x37)	t0__9a0		! a1_ne_0_b_l0	! t0__4t0_a0	! b_n_ret_t0
+LSYM(x38)	t0__9a0		! t0__2t0_a0	! b_e_shift	! r__r_2t0
+LSYM(x39)	t0__9a0		! t0__2t0_a0	! b_e_t0	! t0__2t0_a0
+LSYM(x40)	t0__5a0		! a1_ne_0_b_l1	! r__r_8t0	! MILLIRETN
+LSYM(x41)	t0__5a0		! a1_ne_0_b_l0	! t0__8t0_a0	! b_n_ret_t0
+LSYM(x42)	t0__5a0		! t0__4t0_a0	! b_e_shift	! r__r_2t0
+LSYM(x43)	t0__5a0		! t0__4t0_a0	! b_e_t0	! t0__2t0_a0
+LSYM(x44)	t0__5a0		! t0__2t0_a0	! b_e_shift	! r__r_4t0
+LSYM(x45)	t0__9a0		! a1_ne_0_b_l0	! t0__5t0	! b_n_ret_t0
+LSYM(x46)	t0__9a0		! t0__5t0	! b_e_t0	! t0__t0_a0
+LSYM(x47)	t0__9a0		! t0__5t0	! b_e_t0	! t0__t0_2a0
+LSYM(x48)	t0__3a0		! a1_ne_0_b_l0	! t0__16t0	! b_n_ret_t0
+LSYM(x49)	t0__9a0		! t0__5t0	! b_e_t0	! t0__t0_4a0
+LSYM(x50)	t0__5a0		! t0__5t0	! b_e_shift	! r__r_2t0
+LSYM(x51)	t0__9a0		! t0__t0_8a0	! b_e_t0	! t0__3t0
+LSYM(x52)	t0__3a0		! t0__4t0_a0	! b_e_shift	! r__r_4t0
+LSYM(x53)	t0__3a0		! t0__4t0_a0	! b_e_t0	! t0__4t0_a0
+LSYM(x54)	t0__9a0		! t0__3t0	! b_e_shift	! r__r_2t0
+LSYM(x55)	t0__9a0		! t0__3t0	! b_e_t0	! t0__2t0_a0
+LSYM(x56)	t0__3a0		! t0__2t0_a0	! b_e_shift	! r__r_8t0
+LSYM(x57)	t0__9a0		! t0__2t0_a0	! b_e_t0	! t0__3t0
+LSYM(x58)	t0__3a0		! t0__2t0_a0	! b_e_2t0	! t0__4t0_a0
+LSYM(x59)	t0__9a0		! t0__2t0_a0	! b_e_t02a0	! t0__3t0
+LSYM(x60)	t0__5a0		! t0__3t0	! b_e_shift	! r__r_4t0
+LSYM(x61)	t0__5a0		! t0__3t0	! b_e_t0	! t0__4t0_a0
+LSYM(x62)	t0__32a0	! t0__t0ma0	! b_e_shift	! r__r_2t0
+LSYM(x63)	t0__64a0	! a1_ne_0_b_l0	! t0__t0ma0	! b_n_ret_t0
+LSYM(x64)	t0__64a0	! a1_ne_0_b_l1	! r__r_t0	! MILLIRETN
+LSYM(x65)	t0__8a0		! a1_ne_0_b_l0	! t0__8t0_a0	! b_n_ret_t0
+LSYM(x66)	t0__32a0	! t0__t0_a0	! b_e_shift	! r__r_2t0
+LSYM(x67)	t0__8a0		! t0__4t0_a0	! b_e_t0	! t0__2t0_a0
+LSYM(x68)	t0__8a0		! t0__2t0_a0	! b_e_shift	! r__r_4t0
+LSYM(x69)	t0__8a0		! t0__2t0_a0	! b_e_t0	! t0__4t0_a0
+LSYM(x70)	t0__64a0	! t0__t0_4a0	! b_e_t0	! t0__t0_2a0
+LSYM(x71)	t0__9a0		! t0__8t0	! b_e_t0	! t0__t0ma0
+LSYM(x72)	t0__9a0		! a1_ne_0_b_l1	! r__r_8t0	! MILLIRETN
+LSYM(x73)	t0__9a0		! t0__8t0_a0	! b_e_shift	! r__r_t0
+LSYM(x74)	t0__9a0		! t0__4t0_a0	! b_e_shift	! r__r_2t0
+LSYM(x75)	t0__9a0		! t0__4t0_a0	! b_e_t0	! t0__2t0_a0
+LSYM(x76)	t0__9a0		! t0__2t0_a0	! b_e_shift	! r__r_4t0
+LSYM(x77)	t0__9a0		! t0__2t0_a0	! b_e_t0	! t0__4t0_a0
+LSYM(x78)	t0__9a0		! t0__2t0_a0	! b_e_2t0	! t0__2t0_a0
+LSYM(x79)	t0__16a0	! t0__5t0	! b_e_t0	! t0__t0ma0
+LSYM(x80)	t0__16a0	! t0__5t0	! b_e_shift	! r__r_t0
+LSYM(x81)	t0__9a0		! t0__9t0	! b_e_shift	! r__r_t0
+LSYM(x82)	t0__5a0		! t0__8t0_a0	! b_e_shift	! r__r_2t0
+LSYM(x83)	t0__5a0		! t0__8t0_a0	! b_e_t0	! t0__2t0_a0
+LSYM(x84)	t0__5a0		! t0__4t0_a0	! b_e_shift	! r__r_4t0
+LSYM(x85)	t0__8a0		! t0__2t0_a0	! b_e_t0	! t0__5t0
+LSYM(x86)	t0__5a0		! t0__4t0_a0	! b_e_2t0	! t0__2t0_a0
+LSYM(x87)	t0__9a0		! t0__9t0	! b_e_t02a0	! t0__t0_4a0
+LSYM(x88)	t0__5a0		! t0__2t0_a0	! b_e_shift	! r__r_8t0
+LSYM(x89)	t0__5a0		! t0__2t0_a0	! b_e_t0	! t0__8t0_a0
+LSYM(x90)	t0__9a0		! t0__5t0	! b_e_shift	! r__r_2t0
+LSYM(x91)	t0__9a0		! t0__5t0	! b_e_t0	! t0__2t0_a0
+LSYM(x92)	t0__5a0		! t0__2t0_a0	! b_e_4t0	! t0__2t0_a0
+LSYM(x93)	t0__32a0	! t0__t0ma0	! b_e_t0	! t0__3t0
+LSYM(x94)	t0__9a0		! t0__5t0	! b_e_2t0	! t0__t0_2a0
+LSYM(x95)	t0__9a0		! t0__2t0_a0	! b_e_t0	! t0__5t0
+LSYM(x96)	t0__8a0		! t0__3t0	! b_e_shift	! r__r_4t0
+LSYM(x97)	t0__8a0		! t0__3t0	! b_e_t0	! t0__4t0_a0
+LSYM(x98)	t0__32a0	! t0__3t0	! b_e_t0	! t0__t0_2a0
+LSYM(x99)	t0__8a0		! t0__4t0_a0	! b_e_t0	! t0__3t0
+LSYM(x100)	t0__5a0		! t0__5t0	! b_e_shift	! r__r_4t0
+LSYM(x101)	t0__5a0		! t0__5t0	! b_e_t0	! t0__4t0_a0
+LSYM(x102)	t0__32a0	! t0__t0_2a0	! b_e_t0	! t0__3t0
+LSYM(x103)	t0__5a0		! t0__5t0	! b_e_t02a0	! t0__4t0_a0
+LSYM(x104)	t0__3a0		! t0__4t0_a0	! b_e_shift	! r__r_8t0
+LSYM(x105)	t0__5a0		! t0__4t0_a0	! b_e_t0	! t0__5t0
+LSYM(x106)	t0__3a0		! t0__4t0_a0	! b_e_2t0	! t0__4t0_a0
+LSYM(x107)	t0__9a0		! t0__t0_4a0	! b_e_t02a0	! t0__8t0_a0
+LSYM(x108)	t0__9a0		! t0__3t0	! b_e_shift	! r__r_4t0
+LSYM(x109)	t0__9a0		! t0__3t0	! b_e_t0	! t0__4t0_a0
+LSYM(x110)	t0__9a0		! t0__3t0	! b_e_2t0	! t0__2t0_a0
+LSYM(x111)	t0__9a0		! t0__4t0_a0	! b_e_t0	! t0__3t0
+LSYM(x112)	t0__3a0		! t0__2t0_a0	! b_e_t0	! t0__16t0
+LSYM(x113)	t0__9a0		! t0__4t0_a0	! b_e_t02a0	! t0__3t0
+LSYM(x114)	t0__9a0		! t0__2t0_a0	! b_e_2t0	! t0__3t0
+LSYM(x115)	t0__9a0		! t0__2t0_a0	! b_e_2t0a0	! t0__3t0
+LSYM(x116)	t0__3a0		! t0__2t0_a0	! b_e_4t0	! t0__4t0_a0
+LSYM(x117)	t0__3a0		! t0__4t0_a0	! b_e_t0	! t0__9t0
+LSYM(x118)	t0__3a0		! t0__4t0_a0	! b_e_t0a0	! t0__9t0
+LSYM(x119)	t0__3a0		! t0__4t0_a0	! b_e_t02a0	! t0__9t0
+LSYM(x120)	t0__5a0		! t0__3t0	! b_e_shift	! r__r_8t0
+LSYM(x121)	t0__5a0		! t0__3t0	! b_e_t0	! t0__8t0_a0
+LSYM(x122)	t0__5a0		! t0__3t0	! b_e_2t0	! t0__4t0_a0
+LSYM(x123)	t0__5a0		! t0__8t0_a0	! b_e_t0	! t0__3t0
+LSYM(x124)	t0__32a0	! t0__t0ma0	! b_e_shift	! r__r_4t0
+LSYM(x125)	t0__5a0		! t0__5t0	! b_e_t0	! t0__5t0
+LSYM(x126)	t0__64a0	! t0__t0ma0	! b_e_shift	! r__r_2t0
+LSYM(x127)	t0__128a0	! a1_ne_0_b_l0	! t0__t0ma0	! b_n_ret_t0
+LSYM(x128)	t0__128a0	! a1_ne_0_b_l1	! r__r_t0	! MILLIRETN
+LSYM(x129)	t0__128a0	! a1_ne_0_b_l0	! t0__t0_a0	! b_n_ret_t0
+LSYM(x130)	t0__64a0	! t0__t0_a0	! b_e_shift	! r__r_2t0
+LSYM(x131)	t0__8a0		! t0__8t0_a0	! b_e_t0	! t0__2t0_a0
+LSYM(x132)	t0__8a0		! t0__4t0_a0	! b_e_shift	! r__r_4t0
+LSYM(x133)	t0__8a0		! t0__4t0_a0	! b_e_t0	! t0__4t0_a0
+LSYM(x134)	t0__8a0		! t0__4t0_a0	! b_e_2t0	! t0__2t0_a0
+LSYM(x135)	t0__9a0		! t0__5t0	! b_e_t0	! t0__3t0
+LSYM(x136)	t0__8a0		! t0__2t0_a0	! b_e_shift	! r__r_8t0
+LSYM(x137)	t0__8a0		! t0__2t0_a0	! b_e_t0	! t0__8t0_a0
+LSYM(x138)	t0__8a0		! t0__2t0_a0	! b_e_2t0	! t0__4t0_a0
+LSYM(x139)	t0__8a0		! t0__2t0_a0	! b_e_2t0a0	! t0__4t0_a0
+LSYM(x140)	t0__3a0		! t0__2t0_a0	! b_e_4t0	! t0__5t0
+LSYM(x141)	t0__8a0		! t0__2t0_a0	! b_e_4t0a0	! t0__2t0_a0
+LSYM(x142)	t0__9a0		! t0__8t0	! b_e_2t0	! t0__t0ma0
+LSYM(x143)	t0__16a0	! t0__9t0	! b_e_t0	! t0__t0ma0
+LSYM(x144)	t0__9a0		! t0__8t0	! b_e_shift	! r__r_2t0
+LSYM(x145)	t0__9a0		! t0__8t0	! b_e_t0	! t0__2t0_a0
+LSYM(x146)	t0__9a0		! t0__8t0_a0	! b_e_shift	! r__r_2t0
+LSYM(x147)	t0__9a0		! t0__8t0_a0	! b_e_t0	! t0__2t0_a0
+LSYM(x148)	t0__9a0		! t0__4t0_a0	! b_e_shift	! r__r_4t0
+LSYM(x149)	t0__9a0		! t0__4t0_a0	! b_e_t0	! t0__4t0_a0
+LSYM(x150)	t0__9a0		! t0__4t0_a0	! b_e_2t0	! t0__2t0_a0
+LSYM(x151)	t0__9a0		! t0__4t0_a0	! b_e_2t0a0	! t0__2t0_a0
+LSYM(x152)	t0__9a0		! t0__2t0_a0	! b_e_shift	! r__r_8t0
+LSYM(x153)	t0__9a0		! t0__2t0_a0	! b_e_t0	! t0__8t0_a0
+LSYM(x154)	t0__9a0		! t0__2t0_a0	! b_e_2t0	! t0__4t0_a0
+LSYM(x155)	t0__32a0	! t0__t0ma0	! b_e_t0	! t0__5t0
+LSYM(x156)	t0__9a0		! t0__2t0_a0	! b_e_4t0	! t0__2t0_a0
+LSYM(x157)	t0__32a0	! t0__t0ma0	! b_e_t02a0	! t0__5t0
+LSYM(x158)	t0__16a0	! t0__5t0	! b_e_2t0	! t0__t0ma0
+LSYM(x159)	t0__32a0	! t0__5t0	! b_e_t0	! t0__t0ma0
+LSYM(x160)	t0__5a0		! t0__4t0	! b_e_shift	! r__r_8t0
+LSYM(x161)	t0__8a0		! t0__5t0	! b_e_t0	! t0__4t0_a0
+LSYM(x162)	t0__9a0		! t0__9t0	! b_e_shift	! r__r_2t0
+LSYM(x163)	t0__9a0		! t0__9t0	! b_e_t0	! t0__2t0_a0
+LSYM(x164)	t0__5a0		! t0__8t0_a0	! b_e_shift	! r__r_4t0
+LSYM(x165)	t0__8a0		! t0__4t0_a0	! b_e_t0	! t0__5t0
+LSYM(x166)	t0__5a0		! t0__8t0_a0	! b_e_2t0	! t0__2t0_a0
+LSYM(x167)	t0__5a0		! t0__8t0_a0	! b_e_2t0a0	! t0__2t0_a0
+LSYM(x168)	t0__5a0		! t0__4t0_a0	! b_e_shift	! r__r_8t0
+LSYM(x169)	t0__5a0		! t0__4t0_a0	! b_e_t0	! t0__8t0_a0
+LSYM(x170)	t0__32a0	! t0__t0_2a0	! b_e_t0	! t0__5t0
+LSYM(x171)	t0__9a0		! t0__2t0_a0	! b_e_t0	! t0__9t0
+LSYM(x172)	t0__5a0		! t0__4t0_a0	! b_e_4t0	! t0__2t0_a0
+LSYM(x173)	t0__9a0		! t0__2t0_a0	! b_e_t02a0	! t0__9t0
+LSYM(x174)	t0__32a0	! t0__t0_2a0	! b_e_t04a0	! t0__5t0
+LSYM(x175)	t0__8a0		! t0__2t0_a0	! b_e_5t0	! t0__2t0_a0
+LSYM(x176)	t0__5a0		! t0__4t0_a0	! b_e_8t0	! t0__t0_a0
+LSYM(x177)	t0__5a0		! t0__4t0_a0	! b_e_8t0a0	! t0__t0_a0
+LSYM(x178)	t0__5a0		! t0__2t0_a0	! b_e_2t0	! t0__8t0_a0
+LSYM(x179)	t0__5a0		! t0__2t0_a0	! b_e_2t0a0	! t0__8t0_a0
+LSYM(x180)	t0__9a0		! t0__5t0	! b_e_shift	! r__r_4t0
+LSYM(x181)	t0__9a0		! t0__5t0	! b_e_t0	! t0__4t0_a0
+LSYM(x182)	t0__9a0		! t0__5t0	! b_e_2t0	! t0__2t0_a0
+LSYM(x183)	t0__9a0		! t0__5t0	! b_e_2t0a0	! t0__2t0_a0
+LSYM(x184)	t0__5a0		! t0__9t0	! b_e_4t0	! t0__t0_a0
+LSYM(x185)	t0__9a0		! t0__4t0_a0	! b_e_t0	! t0__5t0
+LSYM(x186)	t0__32a0	! t0__t0ma0	! b_e_2t0	! t0__3t0
+LSYM(x187)	t0__9a0		! t0__4t0_a0	! b_e_t02a0	! t0__5t0
+LSYM(x188)	t0__9a0		! t0__5t0	! b_e_4t0	! t0__t0_2a0
+LSYM(x189)	t0__5a0		! t0__4t0_a0	! b_e_t0	! t0__9t0
+LSYM(x190)	t0__9a0		! t0__2t0_a0	! b_e_2t0	! t0__5t0
+LSYM(x191)	t0__64a0	! t0__3t0	! b_e_t0	! t0__t0ma0
+LSYM(x192)	t0__8a0		! t0__3t0	! b_e_shift	! r__r_8t0
+LSYM(x193)	t0__8a0		! t0__3t0	! b_e_t0	! t0__8t0_a0
+LSYM(x194)	t0__8a0		! t0__3t0	! b_e_2t0	! t0__4t0_a0
+LSYM(x195)	t0__8a0		! t0__8t0_a0	! b_e_t0	! t0__3t0
+LSYM(x196)	t0__8a0		! t0__3t0	! b_e_4t0	! t0__2t0_a0
+LSYM(x197)	t0__8a0		! t0__3t0	! b_e_4t0a0	! t0__2t0_a0
+LSYM(x198)	t0__64a0	! t0__t0_2a0	! b_e_t0	! t0__3t0
+LSYM(x199)	t0__8a0		! t0__4t0_a0	! b_e_2t0a0	! t0__3t0
+LSYM(x200)	t0__5a0		! t0__5t0	! b_e_shift	! r__r_8t0
+LSYM(x201)	t0__5a0		! t0__5t0	! b_e_t0	! t0__8t0_a0
+LSYM(x202)	t0__5a0		! t0__5t0	! b_e_2t0	! t0__4t0_a0
+LSYM(x203)	t0__5a0		! t0__5t0	! b_e_2t0a0	! t0__4t0_a0
+LSYM(x204)	t0__8a0		! t0__2t0_a0	! b_e_4t0	! t0__3t0
+LSYM(x205)	t0__5a0		! t0__8t0_a0	! b_e_t0	! t0__5t0
+LSYM(x206)	t0__64a0	! t0__t0_4a0	! b_e_t02a0	! t0__3t0
+LSYM(x207)	t0__8a0		! t0__2t0_a0	! b_e_3t0	! t0__4t0_a0
+LSYM(x208)	t0__5a0		! t0__5t0	! b_e_8t0	! t0__t0_a0
+LSYM(x209)	t0__5a0		! t0__5t0	! b_e_8t0a0	! t0__t0_a0
+LSYM(x210)	t0__5a0		! t0__4t0_a0	! b_e_2t0	! t0__5t0
+LSYM(x211)	t0__5a0		! t0__4t0_a0	! b_e_2t0a0	! t0__5t0
+LSYM(x212)	t0__3a0		! t0__4t0_a0	! b_e_4t0	! t0__4t0_a0
+LSYM(x213)	t0__3a0		! t0__4t0_a0	! b_e_4t0a0	! t0__4t0_a0
+LSYM(x214)	t0__9a0		! t0__t0_4a0	! b_e_2t04a0	! t0__8t0_a0
+LSYM(x215)	t0__5a0		! t0__4t0_a0	! b_e_5t0	! t0__2t0_a0
+LSYM(x216)	t0__9a0		! t0__3t0	! b_e_shift	! r__r_8t0
+LSYM(x217)	t0__9a0		! t0__3t0	! b_e_t0	! t0__8t0_a0
+LSYM(x218)	t0__9a0		! t0__3t0	! b_e_2t0	! t0__4t0_a0
+LSYM(x219)	t0__9a0		! t0__8t0_a0	! b_e_t0	! t0__3t0
+LSYM(x220)	t0__3a0		! t0__9t0	! b_e_4t0	! t0__2t0_a0
+LSYM(x221)	t0__3a0		! t0__9t0	! b_e_4t0a0	! t0__2t0_a0
+LSYM(x222)	t0__9a0		! t0__4t0_a0	! b_e_2t0	! t0__3t0
+LSYM(x223)	t0__9a0		! t0__4t0_a0	! b_e_2t0a0	! t0__3t0
+LSYM(x224)	t0__9a0		! t0__3t0	! b_e_8t0	! t0__t0_a0
+LSYM(x225)	t0__9a0		! t0__5t0	! b_e_t0	! t0__5t0
+LSYM(x226)	t0__3a0		! t0__2t0_a0	! b_e_t02a0	! t0__32t0
+LSYM(x227)	t0__9a0		! t0__5t0	! b_e_t02a0	! t0__5t0
+LSYM(x228)	t0__9a0		! t0__2t0_a0	! b_e_4t0	! t0__3t0
+LSYM(x229)	t0__9a0		! t0__2t0_a0	! b_e_4t0a0	! t0__3t0
+LSYM(x230)	t0__9a0		! t0__5t0	! b_e_5t0	! t0__t0_a0
+LSYM(x231)	t0__9a0		! t0__2t0_a0	! b_e_3t0	! t0__4t0_a0
+LSYM(x232)	t0__3a0		! t0__2t0_a0	! b_e_8t0	! t0__4t0_a0
+LSYM(x233)	t0__3a0		! t0__2t0_a0	! b_e_8t0a0	! t0__4t0_a0
+LSYM(x234)	t0__3a0		! t0__4t0_a0	! b_e_2t0	! t0__9t0
+LSYM(x235)	t0__3a0		! t0__4t0_a0	! b_e_2t0a0	! t0__9t0
+LSYM(x236)	t0__9a0		! t0__2t0_a0	! b_e_4t08a0	! t0__3t0
+LSYM(x237)	t0__16a0	! t0__5t0	! b_e_3t0	! t0__t0ma0
+LSYM(x238)	t0__3a0		! t0__4t0_a0	! b_e_2t04a0	! t0__9t0
+LSYM(x239)	t0__16a0	! t0__5t0	! b_e_t0ma0	! t0__3t0
+LSYM(x240)	t0__9a0		! t0__t0_a0	! b_e_8t0	! t0__3t0
+LSYM(x241)	t0__9a0		! t0__t0_a0	! b_e_8t0a0	! t0__3t0
+LSYM(x242)	t0__5a0		! t0__3t0	! b_e_2t0	! t0__8t0_a0
+LSYM(x243)	t0__9a0		! t0__9t0	! b_e_t0	! t0__3t0
+LSYM(x244)	t0__5a0		! t0__3t0	! b_e_4t0	! t0__4t0_a0
+LSYM(x245)	t0__8a0		! t0__3t0	! b_e_5t0	! t0__2t0_a0
+LSYM(x246)	t0__5a0		! t0__8t0_a0	! b_e_2t0	! t0__3t0
+LSYM(x247)	t0__5a0		! t0__8t0_a0	! b_e_2t0a0	! t0__3t0
+LSYM(x248)	t0__32a0	! t0__t0ma0	! b_e_shift	! r__r_8t0
+LSYM(x249)	t0__32a0	! t0__t0ma0	! b_e_t0	! t0__8t0_a0
+LSYM(x250)	t0__5a0		! t0__5t0	! b_e_2t0	! t0__5t0
+LSYM(x251)	t0__5a0		! t0__5t0	! b_e_2t0a0	! t0__5t0
+LSYM(x252)	t0__64a0	! t0__t0ma0	! b_e_shift	! r__r_4t0
+LSYM(x253)	t0__64a0	! t0__t0ma0	! b_e_t0	! t0__4t0_a0
+LSYM(x254)	t0__128a0	! t0__t0ma0	! b_e_shift	! r__r_2t0
+LSYM(x255)	t0__256a0	! a1_ne_0_b_l0	! t0__t0ma0	! b_n_ret_t0
+/*1040 insts before this. */
+LSYM(ret_t0)	MILLIRET
+LSYM(e_t0)	r__r_t0
+LSYM(e_shift)	a1_ne_0_b_l2
+	a0__256a0	/* a0 <<= 8 *********** */
+	MILLIRETN
+LSYM(e_t0ma0)	a1_ne_0_b_l0
+	t0__t0ma0
+	MILLIRET
+	r__r_t0
+LSYM(e_t0a0)	a1_ne_0_b_l0
+	t0__t0_a0
+	MILLIRET
+	r__r_t0
+LSYM(e_t02a0)	a1_ne_0_b_l0
+	t0__t0_2a0
+	MILLIRET
+	r__r_t0
+LSYM(e_t04a0)	a1_ne_0_b_l0
+	t0__t0_4a0
+	MILLIRET
+	r__r_t0
+LSYM(e_2t0)	a1_ne_0_b_l1
+	r__r_2t0
+	MILLIRETN
+LSYM(e_2t0a0)	a1_ne_0_b_l0
+	t0__2t0_a0
+	MILLIRET
+	r__r_t0
+LSYM(e2t04a0)	t0__t0_2a0
+	a1_ne_0_b_l1
+	r__r_2t0
+	MILLIRETN
+LSYM(e_3t0)	a1_ne_0_b_l0
+	t0__3t0
+	MILLIRET
+	r__r_t0
+LSYM(e_4t0)	a1_ne_0_b_l1
+	r__r_4t0
+	MILLIRETN
+LSYM(e_4t0a0)	a1_ne_0_b_l0
+	t0__4t0_a0
+	MILLIRET
+	r__r_t0
+LSYM(e4t08a0)	t0__t0_2a0
+	a1_ne_0_b_l1
+	r__r_4t0
+	MILLIRETN
+LSYM(e_5t0)	a1_ne_0_b_l0
+	t0__5t0
+	MILLIRET
+	r__r_t0
+LSYM(e_8t0)	a1_ne_0_b_l1
+	r__r_8t0
+	MILLIRETN
+LSYM(e_8t0a0)	a1_ne_0_b_l0
+	t0__8t0_a0
+	MILLIRET
+	r__r_t0
+
+	.procend
+	.end
+#endif
diff -urpN -xCVS gnu_gcc/gcc/config/pa/pa-64.h gcc/gcc/config/pa/pa-64.h
--- gnu_gcc/gcc/config/pa/pa-64.h	Sun Jan 28 20:08:16 2001
+++ gcc/gcc/config/pa/pa-64.h	Mon Apr 16 00:33:26 2001
@@ -19,43 +19,6 @@ along with GNU CC; see the file COPYING.
 the Free Software Foundation, 59 Temple Place - Suite 330,
 Boston, MA 02111-1307, USA.  */

-/* We can debug dynamically linked executables on hpux11; we also
-   want dereferencing of a NULL pointer to cause a SEGV.  */
-#undef LINK_SPEC
-#define LINK_SPEC \
-  "-E %{mlinker-opt:-O} %{!shared:-u main} %{static:-a archive} %{shared:-shared}"
-
-/* Like the default, except no -lg.  */
-#undef LIB_SPEC
-#define LIB_SPEC \
-  "%{!shared:\
-     %{!p:\
-       %{!pg:\
-         %{!threads:-lc}\
-         %{threads:-lcma -lc_r}}\
-       %{p: -L/lib/libp/ -lc}\
-       %{pg: -L/lib/libp/ -lc}}} /usr/lib/pa20_64/milli.a"
-
-/* Under hpux11, the normal location of the `ld' and `as' programs is the
-   /usr/ccs/bin directory.  */
-
-#ifndef CROSS_COMPILE
-#undef MD_EXEC_PREFIX
-#define MD_EXEC_PREFIX "/opt/langtools/bin"
-#endif
-
-/* Under hpux11 the normal location of the various *crt*.o files is the
-   /usr/ccs/lib directory.  */
-
-#ifndef CROSS_COMPILE
-#undef MD_STARTFILE_PREFIX
-#define MD_STARTFILE_PREFIX "/opt/langtools/lib/pa20_64/"
-#endif
-
-/* hpux11 has the new HP assembler.  It's still lousy, but it's a whole lot
-   better than the assembler shipped with older versions of hpux.  */
-#define NEW_HP_ASSEMBLER
-
 /* The default sizes for basic datatypes provided by GCC are not
    correct for the PA64 runtime architecture.

@@ -76,11 +39,17 @@ Boston, MA 02111-1307, USA.  */

   Make GCC agree with types.h.  */
 #undef SIZE_TYPE
-#undef PTRDIFF_TYPE
-
 #define SIZE_TYPE "long unsigned int"
+
+#undef PTRDIFF_TYPE
 #define PTRDIFF_TYPE "long int"

+#undef WCHAR_TYPE
+#define WCHAR_TYPE "unsigned int"
+
+#undef WCHAR_TYPE_SIZE
+#define WCHAR_TYPE_SIZE 32
+
 /* If it is not listed here, then the default selected by GCC is OK.  */
 #define SHORT_TYPE_SIZE 16
 #define INT_TYPE_SIZE 32
@@ -95,23 +64,6 @@ Boston, MA 02111-1307, USA.  */
 #define LONG_DOUBLE_TYPE_SIZE 64
 #define MAX_WCHAR_TYPE_SIZE 32

-#undef ASM_FILE_START
-#define ASM_FILE_START(FILE) \
-do {  \
-     if (TARGET_64BIT) \
-       fputs("\t.LEVEL 2.0w\n", FILE); \
-     else if (TARGET_PA_11) \
-       fputs("\t.LEVEL 2.0\n", FILE); \
-     else if (TARGET_PA_11) \
-       fputs("\t.LEVEL 1.1\n", FILE); \
-     else \
-       fputs("\t.LEVEL 1.0\n", FILE); \
-     if (profile_flag)\
-       fprintf (FILE, "\t.IMPORT _mcount, CODE\n");\
-     if (write_symbols != NO_DEBUG) \
-       output_file_directive ((FILE), main_input_filename); \
-   } while (0)
-
 /* Temporary until we figure out what to do with those *(&@$ 32bit
    relocs which appear in stabs.  */
 #undef DBX_DEBUGGING_INFO
@@ -135,280 +87,19 @@ do {  \
 /* This is not needed for correct operation in 32bit mode, and since
    older versions of gas and the hpux assembler do not accept .dword
    we put this here instead of the more logical location, pa.h.  */
-#define ASM_OUTPUT_DOUBLE_INT(FILE,VALUE)  \
-{ fputs ("\t.dword ", FILE);                    \
-  if (function_label_operand (VALUE, VOIDmode)) \
-    fputs ("P%", FILE);                         \
-  output_addr_const (FILE, (VALUE));            \
-  fputs ("\n", FILE);}
-
-/* It looks like DWARF2 will be the easiest debug format to handle on this
-   platform.  */
-#define OBJECT_FORMAT_ELF
-#define DWARF2_DEBUGGING_INFO
-#define PREFERRED_DEBUGGING_FORMAT DWARF2_DEBUG
-/* This isn't quite ready yet.  I'm seeing it mess up some line
-   tables.  For example, we're getting lines starting/ending at
-   impossible addresses.  */
-#define DWARF2_ASM_LINE_DEBUG_INFO 1
-
+#define ASM_OUTPUT_DOUBLE_INT(FILE,VALUE) \
+  do								\
+    {								\
+      fputs ("\t.dword ", FILE);				\
+      if (function_label_operand (VALUE, VOIDmode))		\
+	fputs ("P%", FILE);					\
+      output_addr_const (FILE, (VALUE));			\
+      fputs ("\n", FILE);					\
+    }								\
+  while (0)

 /* Nonzero if we do not know how to pass TYPE solely in registers. */
-#define MUST_PASS_IN_STACK(MODE,TYPE)                   \
-  ((TYPE) != 0                                          \
-   && (TREE_CODE (TYPE_SIZE (TYPE)) != INTEGER_CST      \
+#define MUST_PASS_IN_STACK(MODE,TYPE) \
+  ((TYPE) != 0							\
+   && (TREE_CODE (TYPE_SIZE (TYPE)) != INTEGER_CST		\
        || TREE_ADDRESSABLE (TYPE)))
-
-/* The rest of this file is copied from the generic svr4.h.  One day we
-   would like to simply include svr4.h instead of copying all these
-   definitions.  */
-
-/* Support const sections and the ctors and dtors sections for g++.
-   Note that there appears to be two different ways to support const
-   sections at the moment.  You can either #define the symbol
-   READONLY_DATA_SECTION (giving it some code which switches to the
-   readonly data section) or else you can #define the symbols
-   EXTRA_SECTIONS, EXTRA_SECTION_FUNCTIONS, SELECT_SECTION, and
-   SELECT_RTX_SECTION.  We do both here just to be on the safe side.  */
-
-#define USE_CONST_SECTION	1
-
-#define CONST_SECTION_ASM_OP	"\t.section\t.rodata"
-
-/* Define the pseudo-ops used to switch to the .ctors and .dtors sections.
-
-   Note that we want to give these sections the SHF_WRITE attribute
-   because these sections will actually contain data (i.e. tables of
-   addresses of functions in the current root executable or shared library
-   file) and, in the case of a shared library, the relocatable addresses
-   will have to be properly resolved/relocated (and then written into) by
-   the dynamic linker when it actually attaches the given shared library
-   to the executing process.  (Note that on SVR4, you may wish to use the
-   `-z text' option to the ELF linker, when building a shared library, as
-   an additional check that you are doing everything right.  But if you do
-   use the `-z text' option when building a shared library, you will get
-   errors unless the .ctors and .dtors sections are marked as writable
-   via the SHF_WRITE attribute.)  */
-
-#define CTORS_SECTION_ASM_OP	"\t.section\t.ctors,\"aw\""
-#define DTORS_SECTION_ASM_OP	"\t.section\t.dtors,\"aw\""
-
-/* On svr4, we *do* have support for the .init and .fini sections, and we
-   can put stuff in there to be executed before and after `main'.  We let
-   crtstuff.c and other files know this by defining the following symbols.
-   The definitions say how to change sections to the .init and .fini
-   sections.  This is the same for all known svr4 assemblers.  */
-
-/* ??? For the time being, we aren't using init sections. */
-#if 0
-#define INIT_SECTION_ASM_OP	"\t.section\t.init"
-#define FINI_SECTION_ASM_OP	"\t.section\t.fini"
-#endif
-
-/* A default list of other sections which we might be "in" at any given
-   time.  For targets that use additional sections (e.g. .tdesc) you
-   should override this definition in the target-specific file which
-   includes this file.  */
-
-#undef EXTRA_SECTIONS
-#define EXTRA_SECTIONS in_const, in_ctors, in_dtors
-
-/* A default list of extra section function definitions.  For targets
-   that use additional sections (e.g. .tdesc) you should override this
-   definition in the target-specific file which includes this file.  */
-
-#undef EXTRA_SECTION_FUNCTIONS
-#define EXTRA_SECTION_FUNCTIONS						\
-  CONST_SECTION_FUNCTION						\
-  CTORS_SECTION_FUNCTION						\
-  DTORS_SECTION_FUNCTION
-
-#define READONLY_DATA_SECTION() const_section ()
-
-#define CONST_SECTION_FUNCTION						\
-void									\
-const_section ()							\
-{									\
-  if (!USE_CONST_SECTION)						\
-    text_section();							\
-  else if (in_section != in_const)					\
-    {									\
-      fprintf (asm_out_file, "%s\n", CONST_SECTION_ASM_OP);		\
-      in_section = in_const;						\
-    }									\
-}
-
-#define CTORS_SECTION_FUNCTION						\
-void									\
-ctors_section ()							\
-{									\
-  if (in_section != in_ctors)						\
-    {									\
-      fprintf (asm_out_file, "%s\n", CTORS_SECTION_ASM_OP);		\
-      in_section = in_ctors;						\
-    }									\
-}
-
-#define DTORS_SECTION_FUNCTION						\
-void									\
-dtors_section ()							\
-{									\
-  if (in_section != in_dtors)						\
-    {									\
-      fprintf (asm_out_file, "%s\n", DTORS_SECTION_ASM_OP);		\
-      in_section = in_dtors;						\
-    }									\
-}
-
-/* Switch into a generic section.
-
-   We make the section read-only and executable for a function decl,
-   read-only for a const data decl, and writable for a non-const data decl.
-
-   If the section has already been defined, we must not
-   emit the attributes here. The SVR4 assembler does not
-   recognize section redefinitions.
-   If DECL is NULL, no attributes are emitted.  */
-
-#define ASM_OUTPUT_SECTION_NAME(FILE, DECL, NAME, RELOC)		\
-  do									\
-    {									\
-      static htab_t htab;                                               \
-                                                                        \
-      struct section_info                                               \
-      {									\
-	enum sect_enum {SECT_RW, SECT_RO, SECT_EXEC} type;		\
-      };                                                                \
-                                                                        \
-      struct section_info *s;						\
-      const char *mode;							\
-      enum sect_enum type;                                              \
-      PTR* slot;                                                        \
-                                                                        \
-      /* The names we put in the hashtable will always be the unique    \
-	 versions gived to us by the stringtable, so we can just use    \
-	 their addresses as the keys.  */                               \
-      if (!htab)                                                        \
-	htab = htab_create (31,                                         \
-			    htab_hash_pointer,                          \
-			    htab_eq_pointer,                            \
-			    NULL);                                      \
-                                                                        \
-      if (DECL && TREE_CODE (DECL) == FUNCTION_DECL)			\
-	type = SECT_EXEC, mode = "ax";					\
-      else if (DECL && DECL_READONLY_SECTION (DECL, RELOC))		\
-	type = SECT_RO, mode = "a";					\
-      else								\
-	type = SECT_RW, mode = "aw";					\
-      									\
-                                                                        \
-      /* See if we already have an entry for this section.  */          \
-      slot = htab_find_slot (htab, NAME, INSERT);                       \
-      if (!*slot)                                                       \
-	{                                                               \
-	  s = (struct section_info *) xmalloc (sizeof (* s));		\
-	  s->type = type;						\
-	  *slot = s;							\
-	  fprintf (FILE, "\t.section\t%s,\"%s\",@progbits\n",		\
-		   NAME, mode);						\
-	}								\
-      else								\
-	{								\
-	  s = (struct section_info *) *slot;                            \
-	  if (DECL && s->type != type)					\
-	    error_with_decl (DECL,                                      \
-			     "%s causes a section type conflict");      \
-	  								\
-	  fprintf (FILE, "\t.section\t%s\n", NAME);			\
-	}								\
-    }									\
-  while (0)
-
-#define MAKE_DECL_ONE_ONLY(DECL) (DECL_WEAK (DECL) = 1)
-#define UNIQUE_SECTION_P(DECL) (DECL_ONE_ONLY (DECL))
-#define UNIQUE_SECTION(DECL,RELOC)				\
-do {								\
-  int len;							\
-  char *name, *string, *prefix;					\
-								\
-  name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (DECL));	\
-								\
-  if (! DECL_ONE_ONLY (DECL))					\
-    {								\
-      prefix = ".";                                             \
-      if (TREE_CODE (DECL) == FUNCTION_DECL)			\
-	prefix = ".text.";					\
-      else if (DECL_READONLY_SECTION (DECL, RELOC))		\
-	prefix = ".rodata.";					\
-      else							\
-	prefix = ".data.";					\
-    }								\
-  else if (TREE_CODE (DECL) == FUNCTION_DECL)			\
-    prefix = ".gnu.linkonce.t.";				\
-  else if (DECL_READONLY_SECTION (DECL, RELOC))			\
-    prefix = ".gnu.linkonce.r.";				\
-  else								\
-    prefix = ".gnu.linkonce.d.";				\
-								\
-  len = strlen (name) + strlen (prefix);			\
-  string = alloca (len + 1);					\
-  sprintf (string, "%s%s", prefix, name);			\
-								\
-  DECL_SECTION_NAME (DECL) = build_string (len, string);	\
-} while (0)
-
-#define INT_ASM_OP "\t.dword\t"
-/* A C statement (sans semicolon) to output an element in the table of
-   global constructors.  */
-#define ASM_OUTPUT_CONSTRUCTOR(FILE,NAME)				\
-  do {									\
-    ctors_section ();							\
-    fprintf (FILE, "%sP%%", INT_ASM_OP);				\
-    assemble_name (FILE, NAME);						\
-    fprintf (FILE, "\n");						\
-  } while (0)
-
-/* A C statement (sans semicolon) to output an element in the table of
-   global destructors.  */
-#define ASM_OUTPUT_DESTRUCTOR(FILE,NAME)       				\
-  do {									\
-    dtors_section ();                   				\
-    fprintf (FILE, "%sP%%", INT_ASM_OP);				\
-    assemble_name (FILE, NAME);              				\
-    fprintf (FILE, "\n");						\
-  } while (0)
-
-/* ??? For the time being, we aren't using .ctors/.dtors sections. */
-#undef ASM_OUTPUT_DESTRUCTOR
-#undef ASM_OUTPUT_CONSTRUCTOR
-
-/* Define the strings used for the special svr4 .type and .size directives.
-   These strings generally do not vary from one system running svr4 to
-   another, but if a given system (e.g. m88k running svr) needs to use
-   different pseudo-op names for these, they may be overridden in the
-   file which includes this one.  */
-
-#define TYPE_ASM_OP	"\t.type\t"
-#define SIZE_ASM_OP	"\t.size\t"
-
-/* This is how we tell the assembler that a symbol is weak.  */
-
-#define ASM_WEAKEN_LABEL(FILE,NAME) \
-  do { fputs ("\t.weak\t", FILE); assemble_name (FILE, NAME); \
-       fputc ('\n', FILE); } while (0)
-
-/* The following macro defines the format used to output the second
-   operand of the .type assembler directive.  Different svr4 assemblers
-   expect various different forms for this operand.  The one given here
-   is just a default.  You may need to override it in your machine-
-   specific tm.h file (depending upon the particulars of your assembler).  */
-
-#define TYPE_OPERAND_FMT	"@%s"
-
-/* Write the extra assembler code needed to declare a function's result.
-   Most svr4 assemblers don't require any special declaration of the
-   result value, but there are exceptions.  */
-
-#ifndef ASM_DECLARE_RESULT
-#define ASM_DECLARE_RESULT(FILE, RESULT)
-#endif
diff -urpN -xCVS gnu_gcc/gcc/config/pa/pa-gas.h gcc/gcc/config/pa/pa-gas.h
--- gnu_gcc/gcc/config/pa/pa-gas.h	Sun Feb 13 18:31:03 2000
+++ gcc/gcc/config/pa/pa-gas.h	Wed Dec 31 17:00:00 1969
@@ -1,22 +0,0 @@
-/* Definitions of target machine for GNU compiler, for HP-UX using GNU as.
-   Copyright (C) 1996 Free Software Foundation, Inc.
-
-This file is part of GNU CC.
-
-GNU CC is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2, or (at your option)
-any later version.
-
-GNU CC is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with GNU CC; see the file COPYING.  If not, write to
-the Free Software Foundation, 59 Temple Place - Suite 330,
-Boston, MA 02111-1307, USA.  */
-
-#undef TARGET_DEFAULT
-#define TARGET_DEFAULT (MASK_GAS | MASK_JUMP_IN_DELAY)
diff -urpN -xCVS gnu_gcc/gcc/config/pa/pa-hpux10.h gcc/gcc/config/pa/pa-hpux10.h
--- gnu_gcc/gcc/config/pa/pa-hpux10.h	Thu Jun 28 21:32:53 2001
+++ gcc/gcc/config/pa/pa-hpux10.h	Thu Jun 28 22:37:22 2001
@@ -63,4 +63,5 @@ Boston, MA 02111-1307, USA.  */

 /* hpux10 has the new HP assembler.  It's still lousy, but it's a whole lot
    better than the assembler shipped with older versions of hpux.  */
-#define NEW_HP_ASSEMBLER
+#undef NEW_HP_ASSEMBLER
+#define NEW_HP_ASSEMBLER 1
diff -urpN -xCVS gnu_gcc/gcc/config/pa/pa-hpux11.h gcc/gcc/config/pa/pa-hpux11.h
--- gnu_gcc/gcc/config/pa/pa-hpux11.h	Fri Jul  7 17:59:13 2000
+++ gcc/gcc/config/pa/pa-hpux11.h	Mon Feb 19 06:54:41 2001
@@ -59,7 +59,8 @@ Boston, MA 02111-1307, USA.  */

 /* hpux11 has the new HP assembler.  It's still lousy, but it's a whole lot
    better than the assembler shipped with older versions of hpux.  */
-#define NEW_HP_ASSEMBLER
+#undef NEW_HP_ASSEMBLER
+#define NEW_HP_ASSEMBLER 1

 /* Make GCC agree with types.h.  */
 #undef SIZE_TYPE
diff -urpN -xCVS gnu_gcc/gcc/config/pa/pa-linux.h gcc/gcc/config/pa/pa-linux.h
--- gnu_gcc/gcc/config/pa/pa-linux.h	Thu Jun 28 21:32:53 2001
+++ gcc/gcc/config/pa/pa-linux.h	Thu Jun 28 22:37:22 2001
@@ -1,5 +1,5 @@
 /* Definitions for PA_RISC with ELF format
-   Copyright (C) 1999 Free Software Foundation, Inc.
+   Copyright 1999, 2000, 2001 Free Software Foundation, Inc.

 This file is part of GNU CC.

@@ -18,38 +18,165 @@ along with GNU CC; see the file COPYING.
 the Free Software Foundation, 59 Temple Place - Suite 330,
 Boston, MA 02111-1307, USA.  */

-/* FIXME - this doesn't seem to be used anywhere */
-#define LINUX_DEFAULT_ELF
-
-#undef SIZE_TYPE
-#define SIZE_TYPE "unsigned int"
-
-#undef PTRDIFF_TYPE
-#define PTRDIFF_TYPE "int"
+#if 0 /* eventually... */
+/* Use DWARF2 debugging info and unwind.  */
+#undef PREFERRED_DEBUGGING_TYPE
+#define PREFERRED_DEBUGGING_TYPE DWARF2_DEBUG
+#endif
+#define DWARF2_ASM_LINE_DEBUG_INFO 1
+#define DWARF2_UNWIND_INFO 1

 #undef CPP_PREDEFINES
 #define CPP_PREDEFINES "-D__ELF__ -Dunix -D__hppa__ -Dlinux -Asystem=unix -Asystem=posix -Acpu=hppa -Amachine=hppa -Amachine=bigendian"

-#undef CPP_SPEC
-#define CPP_SPEC "%{posix:-D_POSIX_SOURCE}\
- %{msnake:-D_PA_RISC1_1}\
- %{mpa-risc-1-1:-D_PA_RISC1_1}"
+#undef CC1_SPEC
+#define CC1_SPEC "%{pg:} %{p:} %{!mspace-regs:-mno-space-regs}"

 #undef	LIB_SPEC
-#define LIB_SPEC "%{!p:%{!pg:-lc}}%{p:-lc_p}%{pg:-lc_p} -lmilli"
-
-/* How to renumber registers for dbx and gdb.
-
-   It is entirely possible linux will use a different numbering scheme.
-   Until we know for sure, it's the same as hpux, osf & bsd, but we're
-   ready if it needs to be different.
-
-   Registers 0  - 31 remain unchanged.
-
-   Registers 32 - 87 are mapped to 72 - 127
-
-   Register 88 is mapped to 32.  */
+#define LIB_SPEC "%{!p:%{!pg:-lc}}%{p:-lc_p}%{pg:-lc_p}"

-#define DBX_REGISTER_NUMBER(REGNO) \
-  ((REGNO) <= 31 ? (REGNO) :						\
-   ((REGNO) > 31 && (REGNO) <= 87 ? (REGNO) + 40 : 32))
+#undef ASM_SPEC
+#define ASM_SPEC \
+  "%{v:-V} %{n} %{T} %{Ym,*} %{Yd,*} %{Wa,*:%*}"
+
+/* Define this for shared library support because it isn't in the main
+   linux.h file.  */
+
+#undef LINK_SPEC
+#define LINK_SPEC "\
+  %{shared:-shared} \
+  %{!shared: \
+    %{!static: \
+      %{rdynamic:-export-dynamic} \
+      %{!dynamic-linker:-dynamic-linker /lib/ld.so.1}} \
+      %{static:-static}}"
+
+#undef FUNCTION_OK_FOR_SIBCALL
+#define FUNCTION_OK_FOR_SIBCALL(DECL) 1
+
+/* glibc's profiling functions don't need gcc to allocate counters.  */
+#define NO_PROFILE_COUNTERS 1
+
+/* Put plabels into the data section so we can relocate them.  */
+#undef SELECT_RTX_SECTION
+#define SELECT_RTX_SECTION(MODE,RTX)	\
+  if (flag_pic && function_label_operand (RTX, MODE))	\
+    data_section ();					\
+  else							\
+    readonly_data_section ();
+
+/* A C expression whose value is RTL representing the location of the
+   incoming return address at the beginning of any function, before the
+   prologue.  */
+#define INCOMING_RETURN_ADDR_RTX  (gen_rtx_REG (word_mode, 2))
+#define DWARF_FRAME_RETURN_COLUMN (DWARF_FRAME_REGNUM (2))
+
+/* Define the strings used for the special svr4 .type and .size directives.
+   These strings generally do not vary from one system running svr4 to
+   another, but if a given system (e.g. m88k running svr) needs to use
+   different pseudo-op names for these, they may be overridden in the
+   file which includes this one.  */
+
+#undef STRING_ASM_OP
+#define STRING_ASM_OP   ".stringz"
+
+#define TEXT_SECTION_ASM_OP "\t.text"
+#define DATA_SECTION_ASM_OP "\t.data"
+#define BSS_SECTION_ASM_OP "\t.section\t.bss"
+
+/* Output at beginning of assembler file.  We override the definition
+   from <linux.h> so that we can get the proper .LEVEL directive. */
+#undef ASM_FILE_START
+#define ASM_FILE_START(FILE) \
+  do								\
+    {								\
+      if (write_symbols != NO_DEBUG)				\
+	{							\
+	  output_file_directive (FILE, main_input_filename);	\
+	  fputs ("\t.version\t\"01.01\"\n", FILE);		\
+	}							\
+      if (TARGET_64BIT)						\
+	fputs("\t.LEVEL 2.0w\n", FILE);				\
+      else if (TARGET_PA_20)					\
+	fputs("\t.LEVEL 2.0\n", FILE);				\
+      else if (TARGET_PA_11)					\
+	fputs("\t.LEVEL 1.1\n", FILE);				\
+      else							\
+	fputs("\t.LEVEL 1.0\n", FILE);				\
+      if (profile_flag)						\
+	fputs ("\t.IMPORT _mcount, CODE\n", FILE);		\
+    }								\
+   while (0)
+
+/* Output a definition */
+#define ASM_OUTPUT_DEF(FILE,LABEL1,LABEL2) \
+  do								\
+    {								\
+      fprintf ((FILE), "\t%s\t", SET_ASM_OP);			\
+      assemble_name (FILE, LABEL1);				\
+      fprintf (FILE, ",");					\
+      assemble_name (FILE, LABEL2);				\
+      fprintf (FILE, "\n");					\
+    }								\
+  while (0)
+
+/* Define these to generate the Linux/ELF/SysV style of internal
+   labels all the time - i.e. to be compatible with
+   ASM_GENERATE_INTERNAL_LABEL in <elfos.h>.  Compare these with the
+   ones in pa.h and note the lack of dollar signs in these.  FIXME:
+   shouldn't we fix pa.h to use ASM_GENERATE_INTERNAL_LABEL instead? */
+
+#undef ASM_OUTPUT_ADDR_VEC_ELT
+#define ASM_OUTPUT_ADDR_VEC_ELT(FILE, VALUE) \
+  if (TARGET_BIG_SWITCH)					\
+    fprintf (FILE, "\tstw %%r1,-16(%%r30)\n\tldil LR'.L%d,%%r1\n\tbe RR'.L%d(%%sr4,%%r1)\n\tldw -16(%%r30),%%r1\n", VALUE, VALUE);		\
+  else								\
+    fprintf (FILE, "\tb .L%d\n\tnop\n", VALUE)
+
+#undef ASM_OUTPUT_ADDR_DIFF_ELT
+#define ASM_OUTPUT_ADDR_DIFF_ELT(FILE, BODY, VALUE, REL) \
+  if (TARGET_BIG_SWITCH)					\
+    fprintf (FILE, "\tstw %%r1,-16(%%r30)\n\tldw T'.L%d(%%r19),%%r1\n\tbv %%r0(%%r1)\n\tldw -16(%%r30),%%r1\n", VALUE);				\
+  else								\
+    fprintf (FILE, "\tb .L%d\n\tnop\n", VALUE)
+
+/* This is how to output the definition of a user-level label named NAME,
+   such as the label on a static function or variable NAME.  */
+
+#undef ASM_OUTPUT_LABEL
+#define ASM_OUTPUT_LABEL(FILE, NAME) \
+  do								\
+    {								\
+      assemble_name (FILE, NAME);				\
+      fputs (":\n", FILE);					\
+    }								\
+  while (0)
+
+/* NOTE: ASM_OUTPUT_INTERNAL_LABEL() is defined for us by elfos.h, and
+   does what we want (i.e. uses colons).  It must be compatible with
+   ASM_GENERATE_INTERNAL_LABEL(), so do not define it here.  */
+
+#undef ASM_GLOBALIZE_LABEL
+#define ASM_GLOBALIZE_LABEL(FILE, NAME) \
+  (fputs (".globl ", FILE), assemble_name (FILE, NAME), fputs ("\n", FILE))
+
+/* FIXME: Hacked from the <elfos.h> one so that we avoid multiple
+   labels in a function declaration (since pa.c seems determined to do
+   it differently)  */
+
+#undef ASM_DECLARE_FUNCTION_NAME
+#define ASM_DECLARE_FUNCTION_NAME(FILE, NAME, DECL)		\
+  do								\
+    {								\
+      fprintf (FILE, "\t%s\t ", TYPE_ASM_OP);			\
+      assemble_name (FILE, NAME);				\
+      putc (',', FILE);						\
+      fprintf (FILE, TYPE_OPERAND_FMT, "function");		\
+      putc ('\n', FILE);					\
+      ASM_DECLARE_RESULT (FILE, DECL_RESULT (DECL));		\
+    }								\
+  while (0)
+
+/* Linux always uses gas.  */
+#undef TARGET_GAS
+#define TARGET_GAS 1
diff -urpN -xCVS gnu_gcc/gcc/config/pa/pa.c gcc/gcc/config/pa/pa.c
--- gnu_gcc/gcc/config/pa/pa.c	Thu Jun 28 21:32:53 2001
+++ gcc/gcc/config/pa/pa.c	Thu Jun 28 22:37:22 2001
@@ -42,21 +42,30 @@ Boston, MA 02111-1307, USA.  */
 #include "recog.h"
 #include "tm_p.h"

+#ifndef DO_FRAME_NOTES
+#ifdef INCOMING_RETURN_ADDR_RTX
+#define DO_FRAME_NOTES 1
+#else
+#define DO_FRAME_NOTES 0
+#endif
+#endif
+
 static void pa_init_machine_status PARAMS ((struct function *));
 static void pa_mark_machine_status PARAMS ((struct function *));
 static void pa_free_machine_status PARAMS ((struct function *));
-static void pa_combine_instructions			PARAMS ((rtx));
-static int pa_can_combine_p	PARAMS ((rtx, rtx, rtx, int, rtx, rtx, rtx));
-static int forward_branch_p				PARAMS ((rtx));
-static int shadd_constant_p				PARAMS ((int));
-static void pa_add_gc_roots                             PARAMS ((void));
-static void mark_deferred_plabels                       PARAMS ((void *));
-static void compute_zdepwi_operands			PARAMS ((unsigned HOST_WIDE_INT, unsigned *));
-static int compute_movstrsi_length			PARAMS ((rtx));
-static void remove_useless_addtr_insns			PARAMS ((rtx, int));
-static void store_reg					PARAMS ((int, int, int));
-static void load_reg					PARAMS ((int, int, int));
-static void set_reg_plus_d				PARAMS ((int, int, int));
+static inline rtx force_mode PARAMS ((enum machine_mode, rtx));
+static void pa_combine_instructions PARAMS ((rtx));
+static int pa_can_combine_p PARAMS ((rtx, rtx, rtx, int, rtx, rtx, rtx));
+static int forward_branch_p PARAMS ((rtx));
+static int shadd_constant_p PARAMS ((int));
+static void pa_add_gc_roots PARAMS ((void));
+static void mark_deferred_plabels PARAMS ((void *));
+static void compute_zdepwi_operands PARAMS ((unsigned HOST_WIDE_INT, unsigned *));
+static int compute_movstrsi_length PARAMS ((rtx));
+static void remove_useless_addtr_insns PARAMS ((rtx, int));
+static void store_reg PARAMS ((int, int, int));
+static void load_reg PARAMS ((int, int, int));
+static rtx set_reg_plus_d PARAMS ((int, int, int));

 /* Save the operands last given to a compare for use when we
    generate a scc or bcc insn.  */
@@ -76,9 +85,6 @@ enum architecture_type pa_arch;
 /* String to hold which architecture we are generating code for.  */
 const char *pa_arch_string;

-/* Set by the FUNCTION_PROFILER macro. */
-int hp_profile_labelno;
-
 /* Counts for the number of callee-saved general and floating point
    registers which were saved by the current function's prologue.  */
 static int gr_saved, fr_saved;
@@ -165,16 +171,11 @@ override_options ()
       warning ("PIC code generation is not supported in the portable runtime model\n");
     }

-  if (flag_pic && (TARGET_NO_SPACE_REGS || TARGET_FAST_INDIRECT_CALLS))
+  if (flag_pic && TARGET_FAST_INDIRECT_CALLS)
    {
       warning ("PIC code generation is not compatible with fast indirect calls\n");
    }

-  if (flag_pic && profile_flag)
-    {
-      warning ("PIC code generation is not compatible with profiling\n");
-    }
-
   if (! TARGET_GAS && write_symbols != NO_DEBUG)
     {
       warning ("-g is only supported when using GAS on this processor,");
@@ -568,7 +569,7 @@ uint32_operand (op, mode)
 #if HOST_BITS_PER_WIDE_INT > 32
   /* All allowed constants will fit a CONST_INT.  */
   return (GET_CODE (op) == CONST_INT
-	  && (INTVAL (op) >= 0 && INTVAL (op) < 0x100000000L));
+	  && (INTVAL (op) >= 0 && INTVAL (op) < (HOST_WIDE_INT) 1 << 32));
 #else
   return (GET_CODE (op) == CONST_INT
 	  || (GET_CODE (op) == CONST_DOUBLE
@@ -584,7 +585,9 @@ arith5_operand (op, mode)
   return register_operand (op, mode) || int5_operand (op, mode);
 }

-/* True iff zdepi can be used to generate this CONST_INT.  */
+/* True iff zdepi can be used to generate this CONST_INT.
+   zdepi first sign extends a 5 bit signed number to a given field
+   length, then places this field anywhere in a zero.  */
 int
 zdepi_cint_p (x)
      unsigned HOST_WIDE_INT x;
@@ -946,7 +949,7 @@ hppa_legitimize_address (x, oldx, mode)
 	reg2 = force_reg (Pmode, force_operand (reg2, 0));

       /* Figure out what the base and index are.  */
-
+
       if (GET_CODE (reg1) == REG
 	  && REG_POINTER (reg1))
 	{
@@ -1037,13 +1040,13 @@ hppa_legitimize_address (x, oldx, mode)

       /* Add the result to our base register and return.  */
       return force_reg (Pmode, gen_rtx_PLUS (Pmode, base, reg1));
-
+
     }

   /* Uh-oh.  We might have an address for x[n-100000].  This needs
      special handling to avoid creating an indexed memory address
      with x-100000 as the base.
-
+
      If the constant part is small enough, then it's still safe because
      there is a guard page at the beginning and end of the data segment.

@@ -1072,7 +1075,7 @@ hppa_legitimize_address (x, oldx, mode)
 		      (const (plus (symbol_ref) (const_int))))

 	     Where const_int is small.  In that case the const
-	     expression is a valid pointer for indexing.
+	     expression is a valid pointer for indexing.

 	     If const_int is big, but can be divided evenly by shadd_const
 	     and added to (reg).  This allows more scaled indexed addresses.  */
@@ -1171,11 +1174,27 @@ hppa_address_cost (X)
   return 4;
 }

+/* Ensure mode of ORIG, a REG rtx, is MODE.  Returns either ORIG or a
+   new rtx with the correct mode.  */
+static inline rtx
+force_mode (mode, orig)
+     enum machine_mode mode;
+     rtx orig;
+{
+  if (mode == GET_MODE (orig))
+    return orig;
+
+  if (REGNO (orig) >= FIRST_PSEUDO_REGISTER)
+    abort ();
+
+  return gen_rtx_REG (mode, REGNO (orig));
+}
+
 /* Emit insns to move operands[1] into operands[0].

    Return 1 if we have written out everything that needs to be done to
    do the move.  Otherwise, return 0 and the caller will emit the move
-   normally.
+   normally.

    Note SCRATCH_REG may not be in the proper mode depending on how it
    will be used.  This routine is resposible for creating a new copy
@@ -1235,7 +1254,7 @@ emit_move_sequence (operands, mode, scra
     operand1 = gen_rtx_MEM (GET_MODE (operand1), tem);

   /* Handle secondary reloads for loads/stores of FP registers from
-     REG+D addresses where D does not fit in 5 bits, including
+     REG+D addresses where D does not fit in 5 bits, including
      (subreg (mem (addr))) cases.  */
   if (fp_reg_operand (operand0, mode)
       && ((GET_CODE (operand1) == MEM
@@ -1251,7 +1270,7 @@ emit_move_sequence (operands, mode, scra
       /* SCRATCH_REG will hold an address and maybe the actual data.  We want
 	 it in WORD_MODE regardless of what mode it was originally given
 	 to us.  */
-      scratch_reg = gen_rtx_REG (word_mode, REGNO (scratch_reg));
+      scratch_reg = force_mode (word_mode, scratch_reg);

       /* D might not fit in 14 bits either; for such cases load D into
 	 scratch reg.  */
@@ -1283,7 +1302,7 @@ emit_move_sequence (operands, mode, scra
       /* SCRATCH_REG will hold an address and maybe the actual data.  We want
 	 it in WORD_MODE regardless of what mode it was originally given
 	 to us.  */
-      scratch_reg = gen_rtx_REG (word_mode, REGNO (scratch_reg));
+      scratch_reg = force_mode (word_mode, scratch_reg);

       /* D might not fit in 14 bits either; for such cases load D into
 	 scratch reg.  */
@@ -1321,7 +1340,7 @@ emit_move_sequence (operands, mode, scra
       /* SCRATCH_REG will hold an address and maybe the actual data.  We want
 	 it in WORD_MODE regardless of what mode it was originally given
 	 to us.  */
-      scratch_reg = gen_rtx_REG (word_mode, REGNO (scratch_reg));
+      scratch_reg = force_mode (word_mode, scratch_reg);

       /* Force the constant into memory and put the address of the
 	 memory location into scratch_reg.  */
@@ -1352,9 +1371,9 @@ emit_move_sequence (operands, mode, scra
 	{
 	  /* We are reloading the address into the scratch register, so we
 	     want to make sure the scratch register is a full register.  */
-	  scratch_reg = gen_rtx_REG (word_mode, REGNO (scratch_reg));
+	  scratch_reg = force_mode (word_mode, scratch_reg);

-	  emit_move_insn (scratch_reg, XEXP (XEXP (operand1, 0), 1));
+	  emit_move_insn (scratch_reg, XEXP (XEXP (operand1, 0), 1));
 	  emit_move_insn (scratch_reg, gen_rtx_fmt_ee (GET_CODE (XEXP (operand1,
 								        0)),
 						       Pmode,
@@ -1366,8 +1385,8 @@ emit_move_sequence (operands, mode, scra
 	     we want to load it in the same width as the original MEM,
 	     which must be the same as the width of the ultimate destination,
 	     OPERAND0.  */
-	  scratch_reg = gen_rtx_REG (GET_MODE (operand0), REGNO (scratch_reg));
-
+	  scratch_reg = force_mode (GET_MODE (operand0), scratch_reg);
+
 	  emit_move_insn (scratch_reg, gen_rtx_MEM (GET_MODE (operand0),
 						    scratch_reg));
 	}
@@ -1375,7 +1394,8 @@ emit_move_sequence (operands, mode, scra
 	{
 	  /* We want to load the scratch register using the same mode as
 	     the ultimate destination.  */
-	  scratch_reg = gen_rtx_REG (GET_MODE (operand0), REGNO (scratch_reg));
+	  scratch_reg = force_mode (GET_MODE (operand0), scratch_reg);
+
 	  emit_move_insn (scratch_reg, operand1);
 	}

@@ -1462,7 +1482,7 @@ emit_move_sequence (operands, mode, scra
 		  /* SCRATCH_REG will hold an address and maybe the actual
 		     data.  We want it in WORD_MODE regardless of what mode it
 		     was originally given to us.  */
-		  scratch_reg = gen_rtx_REG (word_mode, REGNO (scratch_reg));
+		  scratch_reg = force_mode (word_mode, scratch_reg);
 		}
 	      else if (flag_pic)
 		scratch_reg = gen_reg_rtx (Pmode);
@@ -1485,7 +1505,7 @@ emit_move_sequence (operands, mode, scra
 		  /* Force the function label into memory.  */
 		  temp = force_const_mem (mode, operand1);
 		}
-
+

 	      /* Get the address of the memory location.  PIC-ify it if
 		 necessary.  */
@@ -1520,7 +1540,7 @@ emit_move_sequence (operands, mode, scra
 		  /* TEMP will hold an address and maybe the actual
 		     data.  We want it in WORD_MODE regardless of what mode it
 		     was originally given to us.  */
-		  temp = gen_rtx_REG (word_mode, REGNO (temp));
+		  temp = force_mode (word_mode, temp);
 		}
 	      else
 		temp = gen_reg_rtx (Pmode);
@@ -1559,13 +1579,13 @@ emit_move_sequence (operands, mode, scra
 		  /* TEMP will hold an address and maybe the actual
 		     data.  We want it in WORD_MODE regardless of what mode it
 		     was originally given to us.  */
-		  temp = gen_rtx_REG (word_mode, REGNO (temp));
+		  temp = force_mode (word_mode, temp);
 		}
 	      else
 		temp = gen_reg_rtx (mode);

 	      /* Loading a SYMBOL_REF into a register makes that register
-		 safe to be used as the base in an indexed address.
+		 safe to be used as the base in an indexed address.

 		 Don't mark hard registers though.  That loses.  */
 	      if (GET_CODE (operand0) == REG
@@ -1595,16 +1615,18 @@ emit_move_sequence (operands, mode, scra
 	  int need_zero_extend = 0;

 	  if (TARGET_64BIT && GET_CODE (operand1) == CONST_INT
+	      && HOST_BITS_PER_WIDE_INT > 32
 	      && GET_MODE_BITSIZE (GET_MODE (operand0)) > 32)
 	    {
 	      HOST_WIDE_INT val = INTVAL (operand1);
-	      HOST_WIDE_INT nval = INTVAL (operand1);
+	      HOST_WIDE_INT nval;

 	      /* If the value is the same after a 32->64bit sign
 		 extension, then we can use it as-is.  Else we will
 		 need to sign extend the constant from 32->64bits
 		 then zero extend the result from 32->64bits.  */
-	      nval = ((val & 0xffffffff) ^ (~0x7fffffff)) + 0x80000000;
+	      nval = ((val & (((HOST_WIDE_INT) 2 << 31) - 1))
+		      ^ ((HOST_WIDE_INT) 1 << 31)) - ((HOST_WIDE_INT) 1 << 31);
 	      if (val != nval)
 		{
 		  need_zero_extend = 1;
@@ -1629,7 +1651,7 @@ emit_move_sequence (operands, mode, scra
 							       operands[0],
 							       0)));
 	    }
-
+
 	  return 1;
 	}
     }
@@ -1816,7 +1838,7 @@ compute_zdepdi_operands (imm, op)
       /* Find the width of the bitstring in IMM.  */
       for (len = 5; len < HOST_BITS_PER_WIDE_INT; len++)
 	{
-	  if ((imm & ((unsigned HOST_WIDE_INT)1 << len)) == 0)
+	  if ((imm & ((unsigned HOST_WIDE_INT) 1 << len)) == 0)
 	    break;
 	}

@@ -1899,7 +1921,7 @@ output_move_double (operands)
 	      return "{stws|stw},ma %1,-8(%0)\n\tstw %R1,12(%0)";
 	    }
 	  else
-	    abort();
+	    abort ();
 	}
       else if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
 	{
@@ -1919,7 +1941,7 @@ output_move_double (operands)
 	      return "{stws|stw},mb %1,-8(%0)\n\tstw %R1,4(%0)";
 	    }
 	  else
-	    abort();
+	    abort ();
 	}
     }
   if (optype1 == MEMOP)
@@ -2011,7 +2033,6 @@ output_move_double (operands)
 			       xoperands);
 	      return "ldw 0(%R0),%0\n\tldw 4(%R0),%R0";
 	    }
-
 	}
     }

@@ -2057,10 +2078,10 @@ output_move_double (operands)
 	can create such insns.

 	mem in this case will be either register indirect or register
-	indirect plus a valid offset.
+	indirect plus a valid offset.

 	register -> register move where REGNO(dst) == REGNO(src + 1)
-	someone (Tim/Tege?) claimed this can happen for parameter loads.
+	someone (Tim/Tege?) claimed this can happen for parameter loads.

      Handle mem -> register case first.  */
   if (optype0 == REGOP
@@ -2177,7 +2198,7 @@ find_addr_reg (addr)
    OPERANDS[2] is a register for temporary storage.
    OPERANDS[4] is the size as a CONST_INT
    OPERANDS[3] is a register for temporary storage.
-   OPERANDS[5] is the alignment safe to use, as a CONST_INT.
+   OPERANDS[5] is the alignment safe to use, as a CONST_INT.
    OPERANDS[6] is another temporary register.   */

 const char *
@@ -2286,7 +2307,7 @@ compute_movstrsi_length (insn)
      rtx insn;
 {
   rtx pat = PATTERN (insn);
-  int align = INTVAL (XEXP (XVECEXP (pat, 0, 6), 0));
+  unsigned int align = INTVAL (XEXP (XVECEXP (pat, 0, 6), 0));
   unsigned long n_bytes = INTVAL (XEXP (XVECEXP (pat, 0, 5), 0));
   unsigned int n_insns = 0;

@@ -2335,7 +2356,7 @@ output_and (operands)
 	  break;

       if (ms0 != 32)
-	abort();
+	abort ();

       if (ls1 == 32)
 	{
@@ -2373,22 +2394,22 @@ output_64bit_and (operands)
   if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) != 0)
     {
       unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
-      unsigned HOST_WIDE_INT ls0, ls1, ms0, p, len;
+      int ls0, ls1, ms0, p, len;

       for (ls0 = 0; ls0 < HOST_BITS_PER_WIDE_INT; ls0++)
-	if ((mask & ((unsigned HOST_WIDE_INT)1 << ls0)) == 0)
+	if ((mask & ((unsigned HOST_WIDE_INT) 1 << ls0)) == 0)
 	  break;

       for (ls1 = ls0; ls1 < HOST_BITS_PER_WIDE_INT; ls1++)
-	if ((mask & ((unsigned HOST_WIDE_INT)1 << ls1)) != 0)
+	if ((mask & ((unsigned HOST_WIDE_INT) 1 << ls1)) != 0)
 	  break;

       for (ms0 = ls1; ms0 < HOST_BITS_PER_WIDE_INT; ms0++)
-	if ((mask & ((unsigned HOST_WIDE_INT)1 << ms0)) == 0)
+	if ((mask & ((unsigned HOST_WIDE_INT) 1 << ms0)) == 0)
 	  break;

       if (ms0 != HOST_BITS_PER_WIDE_INT)
-	abort();
+	abort ();

       if (ls1 == HOST_BITS_PER_WIDE_INT)
 	{
@@ -2436,7 +2457,7 @@ output_ior (operands)
       break;

   if (bs1 != 32 && ((unsigned HOST_WIDE_INT) 1 << bs1) <= mask)
-    abort();
+    abort ();

   p = 31 - bs0;
   len = bs1 - bs0;
@@ -2453,22 +2474,22 @@ output_64bit_ior (operands)
      rtx *operands;
 {
   unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
-  unsigned HOST_WIDE_INT bs0, bs1, p, len;
+  int bs0, bs1, p, len;

   if (INTVAL (operands[2]) == 0)
     return "copy %1,%0";

   for (bs0 = 0; bs0 < HOST_BITS_PER_WIDE_INT; bs0++)
-    if ((mask & ((unsigned HOST_WIDE_INT)1 << bs0)) != 0)
+    if ((mask & ((unsigned HOST_WIDE_INT) 1 << bs0)) != 0)
       break;

   for (bs1 = bs0; bs1 < HOST_BITS_PER_WIDE_INT; bs1++)
-    if ((mask & ((unsigned HOST_WIDE_INT)1 << bs1)) == 0)
+    if ((mask & ((unsigned HOST_WIDE_INT) 1 << bs1)) == 0)
       break;

   if (bs1 != HOST_BITS_PER_WIDE_INT
       && ((unsigned HOST_WIDE_INT) 1 << bs1) <= mask)
-    abort();
+    abort ();

   p = 63 - bs0;
   len = bs1 - bs0;
@@ -2587,7 +2608,7 @@ remove_useless_addtr_insns (insns, check
 	      fcmp_count++;
 	      continue;
 	    }
-
+
 	  tmp = PATTERN (insn);
 	  /* If this is an fbranch instruction, bump the fbranch counter.  */
 	  if (GET_CODE (tmp) == SET
@@ -2677,8 +2698,8 @@ remove_useless_addtr_insns (insns, check
 		  /* Reverse our condition.  */
 		  tmp = PATTERN (insn);
 		  PUT_CODE (XEXP (tmp, 1),
-		    reverse_condition_maybe_unordered (GET_CODE (XEXP (tmp,
-		      1))));
+			    (reverse_condition_maybe_unordered
+			     (GET_CODE (XEXP (tmp, 1)))));
 		}
 	    }
 	}
@@ -2688,8 +2709,8 @@ remove_useless_addtr_insns (insns, check

 }
 
-/* You may have trouble believing this, but this is the 32 bit HP-PA stack
-   layout.  Wow.
+/* You may have trouble believing this, but this is the 32 bit HP-PA
+   stack layout.  Wow.

    Offset		Contents

@@ -2751,6 +2772,12 @@ remove_useless_addtr_insns (insns, check

 */

+/* Global variables set by FUNCTION_PROLOGUE.  */
+/* Size of frame.  Need to know this to emit return insns from
+   leaf procedures.  */
+static int actual_fsize;
+static int local_fsize, save_fregs;
+
 /* Emit RTL to store REG at the memory location specified by BASE+DISP.
    Handle case where DISP > 8k by using the add_high_const patterns.

@@ -2761,21 +2788,36 @@ static void
 store_reg (reg, disp, base)
      int reg, disp, base;
 {
+  rtx i, dest, src, basereg;
+
+  src = gen_rtx_REG (word_mode, reg);
+  basereg = gen_rtx_REG (Pmode, base);
   if (VAL_14_BITS_P (disp))
-    emit_move_insn (gen_rtx_MEM (word_mode,
-				 plus_constant (gen_rtx_REG (Pmode, base),
-						disp)),
-		    gen_rtx_REG (word_mode, reg));
+    {
+      dest = gen_rtx_MEM (word_mode, plus_constant (basereg, disp));
+      i = emit_move_insn (dest, src);
+    }
   else
     {
-      emit_move_insn (gen_rtx_REG (Pmode, 1),
-		      gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, base),
-				    gen_rtx_HIGH (Pmode, GEN_INT (disp))));
-      emit_move_insn (gen_rtx_MEM (word_mode,
-				   gen_rtx_LO_SUM (Pmode,
-						   gen_rtx_REG (Pmode, 1),
-						   GEN_INT (disp))),
-		      gen_rtx_REG (word_mode, reg));
+      rtx delta = GEN_INT (disp);
+      rtx high = gen_rtx_PLUS (Pmode, basereg, gen_rtx_HIGH (Pmode, delta));
+      rtx tmpreg = gen_rtx_REG (Pmode, 1);
+      emit_move_insn (tmpreg, high);
+      dest = gen_rtx_MEM (word_mode, gen_rtx_LO_SUM (Pmode, tmpreg, delta));
+      i = emit_move_insn (dest, src);
+    }
+  if (DO_FRAME_NOTES)
+    {
+      RTX_FRAME_RELATED_P (i) = 1;
+      if (base != STACK_POINTER_REGNUM)
+	{
+	  rtx addr = plus_constant (stack_pointer_rtx, disp - actual_fsize);
+	  dest = gen_rtx_MEM (word_mode, addr);
+	  REG_NOTES (i)
+	    = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
+				 gen_rtx_SET (VOIDmode, dest, src),
+				 REG_NOTES (i));
+	}
     }
 }

@@ -2790,10 +2832,12 @@ load_reg (reg, disp, base)
      int reg, disp, base;
 {
   if (VAL_14_BITS_P (disp))
-    emit_move_insn (gen_rtx_REG (word_mode, reg),
-		    gen_rtx_MEM (word_mode,
-				 plus_constant (gen_rtx_REG (Pmode, base),
-						disp)));
+    {
+      emit_move_insn (gen_rtx_REG (word_mode, reg),
+		      gen_rtx_MEM (word_mode,
+				   plus_constant (gen_rtx_REG (Pmode, base),
+						  disp)));
+    }
   else
     {
       emit_move_insn (gen_rtx_REG (Pmode, 1),
@@ -2813,31 +2857,30 @@ load_reg (reg, disp, base)
    Note in DISP > 8k case, we will leave the high part of the address
    in %r1.  There is code in expand_hppa_{prologue,epilogue} that knows this.*/

-static void
+static rtx
 set_reg_plus_d (reg, base, disp)
      int reg, base, disp;
 {
+  rtx i;
+
   if (VAL_14_BITS_P (disp))
-    emit_move_insn (gen_rtx_REG (Pmode, reg),
-		    plus_constant (gen_rtx_REG (Pmode, base), disp));
+    {
+      i = emit_move_insn (gen_rtx_REG (Pmode, reg),
+			  plus_constant (gen_rtx_REG (Pmode, base), disp));
+    }
   else
     {
+      rtx delta = GEN_INT (disp);
       emit_move_insn (gen_rtx_REG (Pmode, 1),
 		      gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, base),
-				    gen_rtx_HIGH (Pmode, GEN_INT (disp))));
-      emit_move_insn (gen_rtx_REG (Pmode, reg),
-		      gen_rtx_LO_SUM (Pmode,
-				      gen_rtx_REG (Pmode, 1),
-				       GEN_INT (disp)));
+				    gen_rtx_HIGH (Pmode, delta)));
+      i = emit_move_insn (gen_rtx_REG (Pmode, reg),
+			  gen_rtx_LO_SUM (Pmode, gen_rtx_REG (Pmode, 1),
+					  delta));
     }
+  return i;
 }

-/* Global variables set by FUNCTION_PROLOGUE.  */
-/* Size of frame.  Need to know this to emit return insns from
-   leaf procedures.  */
-static int actual_fsize;
-static int local_fsize, save_fregs;
-
 int
 compute_frame_size (size, fregs_live)
      int size;
@@ -2881,13 +2924,11 @@ compute_frame_size (size, fregs_live)
      allocated for any function that makes calls or otherwise allocates
      stack space.  */
   if (!current_function_is_leaf || fsize)
-    fsize += 32;
+    fsize += TARGET_64BIT ? 16 : 32;

   return (fsize + STACK_BOUNDARY - 1) & ~(STACK_BOUNDARY - 1);
 }

-rtx hp_profile_label_rtx;
-static char hp_profile_label_name[8];
 void
 output_function_prologue (file, size)
      FILE *file;
@@ -2903,7 +2944,7 @@ output_function_prologue (file, size)
      to output the assembler directives which denote the start
      of a function.  */
   fprintf (file, "\t.CALLINFO FRAME=%d", actual_fsize);
-  if (regs_ever_live[2] || profile_flag)
+  if (regs_ever_live[2])
     fputs (",CALLS,SAVE_RP", file);
   else
     fputs (",NO_CALLS", file);
@@ -2927,12 +2968,6 @@ output_function_prologue (file, size)

   fputs ("\n\t.ENTRY\n", file);

-  /* Horrid hack.  emit_function_prologue will modify this RTL in
-     place to get the expected results.  */
-  if (profile_flag)
-    ASM_GENERATE_INTERNAL_LABEL (hp_profile_label_name, "LP",
-				 hp_profile_labelno);
-
   /* If we're using GAS and not using the portable runtime model, then
      we don't need to accumulate the total number of code bytes.  */
   if (TARGET_GAS && ! TARGET_PORTABLE_RUNTIME)
@@ -2941,7 +2976,7 @@ output_function_prologue (file, size)
     {
       unsigned int old_total = total_code_bytes;

-      total_code_bytes += INSN_ADDRESSES (INSN_UID (get_last_insn()));
+      total_code_bytes += INSN_ADDRESSES (INSN_UID (get_last_insn ()));
       total_code_bytes += FUNCTION_BOUNDARY / BITS_PER_UNIT;

       /* Be prepared to handle overflows.  */
@@ -2954,13 +2989,14 @@ output_function_prologue (file, size)
 }

 void
-hppa_expand_prologue()
+hppa_expand_prologue ()
 {
   extern char call_used_regs[];
   int size = get_frame_size ();
   int merge_sp_adjust_with_store = 0;
   int i, offset;
   rtx tmpreg, size_rtx;
+  rtx insn;

   gr_saved = 0;
   fr_saved = 0;
@@ -2981,9 +3017,9 @@ hppa_expand_prologue()
   size_rtx = GEN_INT (actual_fsize);

   /* Save RP first.  The calling conventions manual states RP will
-     always be stored into the caller's frame at sp-20 or sp - 16
+     always be stored into the caller's frame at sp - 20 or sp - 16
      depending on which ABI is in use.  */
-  if (regs_ever_live[2] || profile_flag)
+  if (regs_ever_live[2])
     store_reg (2, TARGET_64BIT ? -16 : -20, STACK_POINTER_REGNUM);

   /* Allocate the local frame and set up the frame pointer if needed.  */
@@ -2998,9 +3034,31 @@ hppa_expand_prologue()
 	     handles small (<8k) frames.  The second handles large (>=8k)
 	     frames.  */
 	  emit_move_insn (tmpreg, frame_pointer_rtx);
-	  emit_move_insn (frame_pointer_rtx, stack_pointer_rtx);
+	  insn = emit_move_insn (frame_pointer_rtx, stack_pointer_rtx);
 	  if (VAL_14_BITS_P (actual_fsize))
-	    emit_insn (gen_post_store (stack_pointer_rtx, tmpreg, size_rtx));
+	    {
+	      insn = emit_insn (gen_post_store (stack_pointer_rtx, tmpreg,
+						size_rtx));
+	      if (DO_FRAME_NOTES)
+		{
+		  rtvec vec;
+		  RTX_FRAME_RELATED_P (insn) = 1;
+		  vec = gen_rtvec (2,
+				   gen_rtx_SET (VOIDmode,
+						gen_rtx_MEM (word_mode,
+							     stack_pointer_rtx),
+						frame_pointer_rtx),
+				   gen_rtx_SET (VOIDmode,
+						stack_pointer_rtx,
+						gen_rtx_PLUS (word_mode,
+							      stack_pointer_rtx,
+							      size_rtx)));
+		  REG_NOTES (insn)
+		    = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
+					 gen_rtx_SEQUENCE (VOIDmode, vec),
+					 REG_NOTES (insn));
+		}
+	    }
 	  else
 	    {
 	      /* It is incorrect to store the saved frame pointer at *sp,
@@ -3012,15 +3070,51 @@ hppa_expand_prologue()
 	      int adjust1 = 8192 - 64;
 	      int adjust2 = actual_fsize - adjust1;
 	      rtx delta = GEN_INT (adjust1);
-	      emit_insn (gen_post_store (stack_pointer_rtx, tmpreg, delta));
-	      set_reg_plus_d (STACK_POINTER_REGNUM,
-			      STACK_POINTER_REGNUM,
-			      adjust2);
+	      insn = emit_insn (gen_post_store (stack_pointer_rtx, tmpreg,
+						delta));
+	      if (DO_FRAME_NOTES)
+		{
+		  rtvec vec;
+		  RTX_FRAME_RELATED_P (insn) = 1;
+		  vec = gen_rtvec (2,
+				   gen_rtx_SET (VOIDmode,
+						gen_rtx_MEM (word_mode,
+							     stack_pointer_rtx),
+						frame_pointer_rtx),
+				   gen_rtx_SET (VOIDmode,
+						stack_pointer_rtx,
+						gen_rtx_PLUS (word_mode,
+							      stack_pointer_rtx,
+							      delta)));
+		  REG_NOTES (insn)
+		    = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
+					 gen_rtx_SEQUENCE (VOIDmode, vec),
+					 REG_NOTES (insn));
+		}
+
+	      insn = set_reg_plus_d (STACK_POINTER_REGNUM,
+				     STACK_POINTER_REGNUM,
+				     adjust2);
+	      if (DO_FRAME_NOTES)
+		{
+		  RTX_FRAME_RELATED_P (insn) = 1;
+		  if (! VAL_14_BITS_P (adjust2))
+		    {
+		      rtx addr = plus_constant (stack_pointer_rtx, adjust2);
+
+		      REG_NOTES (insn)
+			= gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
+					     gen_rtx_SET (VOIDmode,
+							  stack_pointer_rtx,
+							  addr),
+					     REG_NOTES (insn));
+		    }
+		}
 	    }
 	  /* Prevent register spills from being scheduled before the
 	     stack pointer is raised.  Necessary as we will be storing
 	     registers using the frame pointer as a base register, and
-	     we happen to set fp before raising sp.  */
+	     we happen to set fp before raising sp.  */
 	  emit_insn (gen_blockage ());
 	}
       /* no frame pointer needed.  */
@@ -3030,89 +3124,32 @@ hppa_expand_prologue()
 	     and allocating the stack frame at the same time.   If so, just
 	     make a note of it and defer allocating the frame until saving
 	     the callee registers.  */
-	  if (VAL_14_BITS_P (actual_fsize)
-	      && local_fsize == 0
-	      && ! profile_flag
-	      && ! flag_pic)
+	  if (VAL_14_BITS_P (actual_fsize) && local_fsize == 0)
 	    merge_sp_adjust_with_store = 1;
 	  /* Can not optimize.  Adjust the stack frame by actual_fsize
 	     bytes.  */
 	  else
-	    set_reg_plus_d (STACK_POINTER_REGNUM,
-			    STACK_POINTER_REGNUM,
-			    actual_fsize);
-	}
-
-      /* The hppa calling conventions say that %r19, the pic offset
-	 register, is saved at sp - 32 (in this function's frame)
-	 when generating PIC code.  FIXME:  What is the correct thing
-	 to do for functions which make no calls and allocate no
-	 frame?  Do we need to allocate a frame, or can we just omit
-	 the save?   For now we'll just omit the save.  */
-      if (flag_pic && !TARGET_64BIT)
-	store_reg (PIC_OFFSET_TABLE_REGNUM, -32, STACK_POINTER_REGNUM);
-    }
-
-  /* Profiling code.
-
-     Instead of taking one argument, the counter label, as most normal
-     mcounts do, _mcount appears to behave differently on the HPPA.  It
-     takes the return address of the caller, the address of this routine,
-     and the address of the label.  Also, it isn't magic, so
-     argument registers have to be preserved.  */
-  if (profile_flag)
-    {
-      int pc_offset, i, arg_offset, basereg, offsetadj;
-
-      pc_offset = 4 + (frame_pointer_needed
-		       ? (VAL_14_BITS_P (actual_fsize) ? 12 : 20)
-		       : (VAL_14_BITS_P (actual_fsize) ? 4 : 8));
-
-      /* When the function has a frame pointer, use it as the base
-	 register for saving/restore registers.  Else use the stack
-	 pointer.  Adjust the offset according to the frame size if
-	 this function does not have a frame pointer.  */
-
-      basereg = frame_pointer_needed ? FRAME_POINTER_REGNUM
-				     : STACK_POINTER_REGNUM;
-      offsetadj = frame_pointer_needed ? 0 : actual_fsize;
-
-      /* Horrid hack.  emit_function_prologue will modify this RTL in
-	 place to get the expected results.   sprintf here is just to
-	 put something in the name.  */
-      sprintf(hp_profile_label_name, "LP$%04d", -1);
-      hp_profile_label_rtx = gen_rtx_SYMBOL_REF (Pmode,
-						 hp_profile_label_name);
-      if (current_function_returns_struct)
-	store_reg (STRUCT_VALUE_REGNUM, - 12 - offsetadj, basereg);
-      if (current_function_needs_context)
-	store_reg (STATIC_CHAIN_REGNUM, - 16 - offsetadj, basereg);
-
-      for (i = 26, arg_offset = -36 - offsetadj; i >= 23; i--, arg_offset -= 4)
-	if (regs_ever_live [i])
-	  {
-	    store_reg (i, arg_offset, basereg);
-	    /* Deal with arg_offset not fitting in 14 bits.  */
-	    pc_offset += VAL_14_BITS_P (arg_offset) ? 4 : 8;
-	  }
-
-      emit_move_insn (gen_rtx_REG (word_mode, 26), gen_rtx_REG (word_mode, 2));
-      emit_move_insn (tmpreg, gen_rtx_HIGH (Pmode, hp_profile_label_rtx));
-      emit_move_insn (gen_rtx_REG (Pmode, 24),
-		      gen_rtx_LO_SUM (Pmode, tmpreg, hp_profile_label_rtx));
-      /* %r25 is set from within the output pattern.  */
-      emit_insn (gen_call_profiler (GEN_INT (- pc_offset - 20)));
-
-      /* Restore argument registers.  */
-      for (i = 26, arg_offset = -36 - offsetadj; i >= 23; i--, arg_offset -= 4)
-	if (regs_ever_live [i])
-	  load_reg (i, arg_offset, basereg);
-
-      if (current_function_returns_struct)
-	load_reg (STRUCT_VALUE_REGNUM, -12 - offsetadj, basereg);
-
-      if (current_function_needs_context)
-	load_reg (STATIC_CHAIN_REGNUM, -16 - offsetadj, basereg);
+	    {
+	      insn = set_reg_plus_d (STACK_POINTER_REGNUM,
+				     STACK_POINTER_REGNUM,
+				     actual_fsize);
+	      if (DO_FRAME_NOTES)
+		{
+		  RTX_FRAME_RELATED_P (insn) = 1;
+		  if (! VAL_14_BITS_P (actual_fsize))
+		    {
+		      rtx addr;
+		      addr = plus_constant (stack_pointer_rtx, actual_fsize);
+		      REG_NOTES (insn)
+			= gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
+					     gen_rtx_SET (VOIDmode,
+							  stack_pointer_rtx,
+							  addr),
+					     REG_NOTES (insn));
+		    }
+		}
+	    }
+	}
     }

   /* Normal register save.
@@ -3141,10 +3178,25 @@ hppa_expand_prologue()
 	       optimize the first GR save.  */
 	    if (merge_sp_adjust_with_store)
 	      {
+		rtx delta = GEN_INT (-offset);
 		merge_sp_adjust_with_store = 0;
-	        emit_insn (gen_post_store (stack_pointer_rtx,
-					   gen_rtx_REG (word_mode, i),
-					   GEN_INT (-offset)));
+	        insn = emit_insn (gen_post_store (stack_pointer_rtx,
+						  gen_rtx_REG (word_mode, i),
+						  delta));
+		if (DO_FRAME_NOTES)
+		  {
+		    rtx set;
+		    RTX_FRAME_RELATED_P (insn) = 1;
+		    set = gen_rtx_SET (VOIDmode,
+				       stack_pointer_rtx,
+				       gen_rtx_PLUS (word_mode,
+						     stack_pointer_rtx,
+						     delta));
+		    REG_NOTES (insn)
+		      = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
+					   set,
+					   REG_NOTES (insn));
+		  }
 	      }
 	    else
 	      store_reg (i, offset, STACK_POINTER_REGNUM);
@@ -3155,11 +3207,24 @@ hppa_expand_prologue()
       /* If we wanted to merge the SP adjustment with a GR save, but we never
 	 did any GR saves, then just emit the adjustment here.  */
       if (merge_sp_adjust_with_store)
-	set_reg_plus_d (STACK_POINTER_REGNUM,
-			STACK_POINTER_REGNUM,
-			actual_fsize);
+	{
+	  insn = set_reg_plus_d (STACK_POINTER_REGNUM,
+				 STACK_POINTER_REGNUM,
+				 actual_fsize);
+	  if (DO_FRAME_NOTES)
+	    RTX_FRAME_RELATED_P (insn) = 1;
+	}
     }

+  /* The hppa calling conventions say that %r19, the pic offset
+     register, is saved at sp - 32 (in this function's frame)
+     when generating PIC code.  FIXME:  What is the correct thing
+     to do for functions which make no calls and allocate no
+     frame?  Do we need to allocate a frame, or can we just omit
+     the save?   For now we'll just omit the save.  */
+  if (flag_pic && actual_fsize != 0 && !TARGET_64BIT)
+    store_reg (PIC_OFFSET_TABLE_REGNUM, -32, STACK_POINTER_REGNUM);
+
   /* Align pointer properly (doubleword boundary).  */
   offset = (offset + 7) & ~7;

@@ -3179,16 +3244,32 @@ hppa_expand_prologue()
 	  if (regs_ever_live[i]
 	      || (! TARGET_64BIT && regs_ever_live[i + 1]))
 	    {
-	      emit_move_insn (gen_rtx_MEM (DFmode,
-				           gen_rtx_POST_INC (DFmode, tmpreg)),
-			      gen_rtx_REG (DFmode, i));
+	      rtx addr, reg;
+	      addr = gen_rtx_MEM (DFmode, gen_rtx_POST_INC (DFmode, tmpreg));
+	      reg = gen_rtx_REG (DFmode, i);
+	      insn = emit_move_insn (addr, reg);
+	      if (DO_FRAME_NOTES)
+		{
+		  rtx stackoff;
+		  RTX_FRAME_RELATED_P (insn) = 1;
+		  stackoff = GEN_INT (offset + (frame_pointer_needed
+						? -actual_fsize : 0));
+		  addr = gen_rtx_MEM (DFmode,
+				      gen_rtx_PLUS (DFmode,
+						    stack_pointer_rtx,
+						    stackoff));
+		  REG_NOTES (insn)
+		    = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
+					 gen_rtx_SET (VOIDmode, addr, reg),
+					 REG_NOTES (insn));
+		  offset += 8;
+		}
 	      fr_saved++;
 	    }
 	}
     }
 }

-
 void
 output_function_epilogue (file, size)
      FILE *file;
@@ -3235,7 +3316,7 @@ hppa_expand_epilogue ()
   /* Try to restore RP early to avoid load/use interlocks when
      RP gets used in the return (bv) instruction.  This appears to still
      be necessary even when we schedule the prologue and epilogue. */
-  if (regs_ever_live [2] || profile_flag)
+  if (regs_ever_live [2])
     {
       ret_off = TARGET_64BIT ? -16 : -20;
       if (frame_pointer_needed)
@@ -3278,7 +3359,7 @@ hppa_expand_epilogue ()
 		  && VAL_14_BITS_P (-actual_fsize))
 	        merge_sp_adjust_with_load = i;
 	      else
-	        load_reg (i, offset, STACK_POINTER_REGNUM);
+		load_reg (i, offset, STACK_POINTER_REGNUM);
 	      offset += UNITS_PER_WORD;
 	    }
 	}
@@ -3298,15 +3379,13 @@ hppa_expand_epilogue ()

       /* Actually do the restores now.  */
       for (i = FP_SAVED_REG_LAST; i >= FP_SAVED_REG_FIRST; i -= FP_REG_STEP)
-	{
-	  if (regs_ever_live[i]
-	      || (! TARGET_64BIT && regs_ever_live[i + 1]))
-	    {
-	      emit_move_insn (gen_rtx_REG (DFmode, i),
-			      gen_rtx_MEM (DFmode,
-				           gen_rtx_POST_INC (DFmode, tmpreg)));
-	    }
-	}
+	if (regs_ever_live[i]
+	    || (! TARGET_64BIT && regs_ever_live[i + 1]))
+	  {
+	    rtx src = gen_rtx_MEM (DFmode, gen_rtx_POST_INC (DFmode, tmpreg));
+	    rtx dest = gen_rtx_REG (DFmode, i);
+	    emit_move_insn (dest, src);
+	  }
     }

   /* Emit a blockage insn here to keep these insns from being moved to
@@ -3316,30 +3395,27 @@ hppa_expand_epilogue ()
      restores are finished.  */
   emit_insn (gen_blockage ());

-  /* Reset stack pointer (and possibly frame pointer).  The stack
+  /* Reset stack pointer (and possibly frame pointer).  The stack
      pointer is initially set to fp + 64 to avoid a race condition.  */
   if (frame_pointer_needed)
     {
+      rtx delta = GEN_INT (-64);
       set_reg_plus_d (STACK_POINTER_REGNUM, FRAME_POINTER_REGNUM, 64);
-      emit_insn (gen_pre_load (frame_pointer_rtx,
+      emit_insn (gen_pre_load (frame_pointer_rtx,
 			       stack_pointer_rtx,
-			       GEN_INT (-64)));
+			       delta));
     }
   /* If we were deferring a callee register restore, do it now.  */
   else if (merge_sp_adjust_with_load)
     {
       rtx delta = GEN_INT (-actual_fsize);
-      emit_insn (gen_pre_load (gen_rtx_REG (word_mode,
-					    merge_sp_adjust_with_load),
-			       stack_pointer_rtx,
-			       delta));
+      rtx dest = gen_rtx_REG (word_mode, merge_sp_adjust_with_load);
+      emit_insn (gen_pre_load (dest, stack_pointer_rtx, delta));
     }
   else if (actual_fsize != 0)
-    {
-      set_reg_plus_d (STACK_POINTER_REGNUM,
-		      STACK_POINTER_REGNUM,
-		      - actual_fsize);
-    }
+    set_reg_plus_d (STACK_POINTER_REGNUM,
+		    STACK_POINTER_REGNUM,
+		    -actual_fsize);

   /* If we haven't restored %r2 yet (no frame pointer, and a stack
      frame greater than 8k), do so now.  */
@@ -3348,18 +3424,108 @@ hppa_expand_epilogue ()
 }

 /* Set up a callee saved register for the pic offset table register.  */
-void hppa_init_pic_save ()
+void
+hppa_init_pic_save ()
 {
-  rtx insn, picreg;
+  rtx insn, picreg, where;

   picreg = gen_rtx_REG (word_mode, PIC_OFFSET_TABLE_REGNUM);
   PIC_OFFSET_TABLE_SAVE_RTX = gen_reg_rtx (Pmode);
+  RTX_UNCHANGING_P (PIC_OFFSET_TABLE_SAVE_RTX) = 1;
   insn = gen_rtx_SET (VOIDmode, PIC_OFFSET_TABLE_SAVE_RTX, picreg);

   /* Emit the insn at the beginning of the function after the prologue.  */
-  push_topmost_sequence ();
-  emit_insn_after (insn, last_parm_insn ? last_parm_insn : get_insns ());
-  pop_topmost_sequence ();
+  where = last_parm_insn;
+  if (!where)
+    {
+      push_topmost_sequence ();
+      where = get_insns ();
+      pop_topmost_sequence ();
+    }
+  emit_insn_after (insn, where);
+}
+
+void
+hppa_profile_hook (label_no)
+     int label_no ATTRIBUTE_UNUSED;
+{
+  rtx call_insn;
+
+  /* No profiling for inline functions.  We don't want extra calls to
+     _mcount when the inline function is expanded.  Even if that made
+     sense, it wouldn't work here as there is no function label for
+     the inline expansion.  */
+  if (DECL_INLINE (cfun->decl))
+    return;
+
+  if (TARGET_64BIT)
+    emit_move_insn (arg_pointer_rtx,
+		    gen_rtx_PLUS (word_mode, virtual_outgoing_args_rtx,
+				  GEN_INT (64)));
+
+  if (flag_pic && PIC_OFFSET_TABLE_SAVE_RTX == NULL_RTX)
+    hppa_init_pic_save ();
+
+  emit_move_insn (gen_rtx_REG (word_mode, 26), gen_rtx_REG (word_mode, 2));
+
+#ifndef NO_PROFILE_COUNTERS
+  {
+    rtx count_label_rtx, addr, r24;
+    char label_name[16];
+
+    ASM_GENERATE_INTERNAL_LABEL (label_name, "LP", label_no);
+    count_label_rtx = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (label_name));
+
+    if (flag_pic)
+      {
+	rtx tmpreg;
+
+	current_function_uses_pic_offset_table = 1;
+	tmpreg = gen_rtx_REG (Pmode, 1);
+	emit_move_insn (tmpreg,
+			gen_rtx_PLUS (Pmode, pic_offset_table_rtx,
+				      gen_rtx_HIGH (Pmode, count_label_rtx)));
+	addr = gen_rtx_MEM (Pmode,
+			    gen_rtx_LO_SUM (Pmode, tmpreg, count_label_rtx));
+      }
+    else
+      {
+	rtx tmpreg = gen_rtx_REG (Pmode, 1);
+	emit_move_insn (tmpreg, gen_rtx_HIGH (Pmode, count_label_rtx));
+	addr = gen_rtx_LO_SUM (Pmode, tmpreg, count_label_rtx);
+      }
+    r24 = gen_rtx_REG (Pmode, 24);
+    emit_move_insn (r24, addr);
+
+    /* %r25 is set from within the output pattern.  */
+    call_insn =
+      emit_call_insn (gen_call_profiler (gen_rtx_SYMBOL_REF (Pmode, "_mcount"),
+					 GEN_INT (TARGET_64BIT ? 24 : 12),
+					 XEXP (DECL_RTL (cfun->decl), 0)));
+
+    use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), r24);
+  }
+#else
+    /* %r25 is set from within the output pattern.  */
+  call_insn =
+    emit_call_insn (gen_call_profiler (gen_rtx_SYMBOL_REF (Pmode, "_mcount"),
+				       GEN_INT (TARGET_64BIT ? 16 : 8),
+				       XEXP (DECL_RTL (cfun->decl), 0)));
+#endif
+
+  /* Indicate the _mcount call cannot throw, nor will it execute a
+     non-local goto.  */
+  REG_NOTES (call_insn)
+    = gen_rtx_EXPR_LIST (REG_EH_REGION, constm1_rtx, REG_NOTES (call_insn));
+
+  if (flag_pic)
+    {
+      use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), pic_offset_table_rtx);
+      if (TARGET_64BIT)
+	use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), arg_pointer_rtx);
+
+      emit_move_insn (pic_offset_table_rtx, PIC_OFFSET_TABLE_SAVE_RTX);
+    }
 }

 /* Fetch the return address for the frame COUNT steps up from
@@ -3398,15 +3564,17 @@ return_addr_rtx (count, frameaddr)
   rtx saved_rp;
   rtx ins;

-  saved_rp = gen_reg_rtx (Pmode);
+  if (TARGET_64BIT)
+    return gen_rtx_MEM (Pmode, plus_constant (frameaddr, -16));
+
+  if (TARGET_NO_SPACE_REGS)
+    return gen_rtx_MEM (Pmode, plus_constant (frameaddr, -20));

   /* First, we start off with the normal return address pointer from
      -20[frameaddr].  */

-  if (TARGET_64BIT)
-    return gen_rtx_MEM (Pmode, plus_constant (frameaddr, -16));
-  else
-    emit_move_insn (saved_rp, plus_constant (frameaddr, -5 * UNITS_PER_WORD));
+  saved_rp = gen_reg_rtx (Pmode);
+  emit_move_insn (saved_rp, plus_constant (frameaddr, -20));

   /* Get pointer to the instruction stream.  We have to mask out the
      privilege level from the two low order bits of the return address
@@ -3457,7 +3625,7 @@ return_addr_rtx (count, frameaddr)
      but rather the return address that leads back into user code.
      That return address is stored at -24[frameaddr].  */

-  emit_move_insn (saved_rp, plus_constant (frameaddr, -6 * UNITS_PER_WORD));
+  emit_move_insn (saved_rp, plus_constant (frameaddr, -24));

   emit_label (label);
   return gen_rtx_MEM (Pmode, memory_address (Pmode, saved_rp));
@@ -3475,7 +3643,6 @@ hppa_can_use_return_insn_p ()
 {
   return (reload_completed
 	  && (compute_frame_size (get_frame_size (), 0) ? 0 : 1)
-	  && ! profile_flag
 	  && ! regs_ever_live[2]
 	  && ! frame_pointer_needed);
 }
@@ -3776,7 +3943,7 @@ pa_adjust_insn_length (insn, length)
       else
 	return 0;
     }
-  /* Jumps inside switch tables which have unfilled delay slots
+  /* Jumps inside switch tables which have unfilled delay slots
      also need adjustment.  */
   else if (GET_CODE (insn) == JUMP_INSN
 	   && simplejump_p (insn)
@@ -3854,7 +4021,7 @@ print_operand (file, x, code)
     case 'R':
       /* Print out the second register name of a register pair.
 	 I.e., R (6) => 7.  */
-      fputs (reg_names[REGNO (x)+1], file);
+      fputs (reg_names[REGNO (x) + 1], file);
       return;
     case 'r':
       /* A register or zero. */
@@ -3944,8 +4111,8 @@ print_operand (file, x, code)
 	  abort ();
 	}
       return;
-    /* For floating point comparisons. Note that the output predicates are the
-       complement of the desired mode. */
+    /* For floating point comparisons.  Note that the output
+       predicates are the complement of the desired mode.  */
     case 'Y':
       switch (GET_CODE (x))
 	{
@@ -4041,42 +4208,42 @@ print_operand (file, x, code)
 	  fprintf (file, "%d", ~INTVAL (x));
 	  return;
 	}
-      abort();
+      abort ();
     case 'Q':
       if (GET_CODE (x) == CONST_INT)
 	{
 	  fprintf (file, "%d", 64 - (INTVAL (x) & 63));
 	  return;
 	}
-      abort();
+      abort ();
     case 'L':
       if (GET_CODE (x) == CONST_INT)
 	{
 	  fprintf (file, "%d", 32 - (INTVAL (x) & 31));
 	  return;
 	}
-      abort();
+      abort ();
     case 'O':
       if (GET_CODE (x) == CONST_INT && exact_log2 (INTVAL (x)) >= 0)
 	{
 	  fprintf (file, "%d", exact_log2 (INTVAL (x)));
 	  return;
 	}
-      abort();
+      abort ();
     case 'p':
       if (GET_CODE (x) == CONST_INT)
 	{
 	  fprintf (file, "%d", 63 - (INTVAL (x) & 63));
 	  return;
 	}
-      abort();
+      abort ();
     case 'P':
       if (GET_CODE (x) == CONST_INT)
 	{
 	  fprintf (file, "%d", 31 - (INTVAL (x) & 31));
 	  return;
 	}
-      abort();
+      abort ();
     case 'I':
       if (GET_CODE (x) == CONST_INT)
 	fputs ("i", file);
@@ -4237,7 +4404,7 @@ output_global_address (file, x, round_co
 	  output_addr_const (file, base);
 	}
       else if (GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
-	offset = INTVAL (XEXP (XEXP (x, 0),1));
+	offset = INTVAL (XEXP (XEXP (x, 0), 1));
       else abort ();

       /* How bogus.  The compiler is apparently responsible for
@@ -4270,7 +4437,7 @@ output_global_address (file, x, round_co
       if (!read_only_operand (base, VOIDmode) && !flag_pic)
 	fputs ("-$global$", file);
       if (offset)
-	fprintf (file,"%s%d", sep, offset);
+	fprintf (file, "%s%d", sep, offset);
     }
   else
     output_addr_const (file, x);
@@ -4304,7 +4471,7 @@ output_deferred_plabels (file)

 enum millicodes { remI, remU, divI, divU, mulI, mulU, end1000 };
 static void import_milli			PARAMS ((enum millicodes));
-static char imported[(int)end1000];
+static char imported[(int) end1000];
 static const char * const milli_names[] = {"remI", "remU", "divI", "divU", "mulI", "mulU"};
 static char import_string[] = ".IMPORT $$....,MILLICODE";
 #define MILLI_START 10
@@ -4315,11 +4482,11 @@ import_milli (code)
 {
   char str[sizeof (import_string)];

-  if (!imported[(int)code])
+  if (!imported[(int) code])
     {
-      imported[(int)code] = 1;
+      imported[(int) code] = 1;
       strcpy (str, import_string);
-      strncpy (str + MILLI_START, milli_names[(int)code], 4);
+      strncpy (str + MILLI_START, milli_names[(int) code], 4);
       output_asm_insn (str, 0);
     }
 }
@@ -4369,6 +4536,8 @@ emit_hpdiv_const (operands, unsignedp)
       && INTVAL (operands[2]) < 16
       && magic_milli[INTVAL (operands[2])])
     {
+      rtx ret = gen_rtx_REG (SImode, TARGET_64BIT ? 2 : 31);
+
       emit_move_insn (gen_rtx_REG (SImode, 26), operands[1]);
       emit
 	(gen_rtx
@@ -4382,7 +4551,7 @@ emit_hpdiv_const (operands, unsignedp)
 		     gen_rtx_CLOBBER (VOIDmode, operands[3]),
 		     gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 26)),
 		     gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 25)),
-		     gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 31)))));
+		     gen_rtx_CLOBBER (VOIDmode, ret))));
       emit_move_insn (operands[0], gen_rtx_REG (SImode, 29));
       return 1;
     }
@@ -4474,8 +4643,8 @@ output_arg_descriptor (call_insn)
   int regno;

   /* We neither need nor want argument location descriptors for the
-     64bit runtime environment.  */
-  if (TARGET_64BIT)
+     64bit runtime environment or the ELF32 environment.  */
+  if (TARGET_64BIT || TARGET_ELF32)
     return;

   for (i = 0; i < 4; i++)
@@ -4627,7 +4796,7 @@ secondary_reload_class (class, mode, in)
         is_symbolic = 0;
         break;
     }
-
+
   if (!flag_pic
       && is_symbolic
       && read_only_operand (in, VOIDmode))
@@ -4687,7 +4856,7 @@ hppa_builtin_saveregs ()
   if (TARGET_64BIT)
     {
       int i, off;
-
+
       /* Adjust for varargs/stdarg differences.  */
       if (argadj)
 	offset = plus_constant (current_function_arg_offset_rtx, -argadj);
@@ -4771,7 +4940,7 @@ hppa_va_arg (valist, type)
         {
           t = build (PLUS_EXPR, TREE_TYPE (valist), valist,
                      build_int_2 (2 * UNITS_PER_WORD - 1, 0));
-          t = build (BIT_AND_EXPR, TREE_TYPE (t), t,
+          t = build (BIT_AND_EXPR, TREE_TYPE (t), t,
                      build_int_2 (-2 * UNITS_PER_WORD, -1));
           t = build (MODIFY_EXPR, TREE_TYPE (valist), valist, t);
           TREE_SIDE_EFFECTS (t) = 1;
@@ -4789,7 +4958,7 @@ hppa_va_arg (valist, type)
   /* "Large" types are passed by reference.  */
   if (size > 8)
     {
-      t = build (PREDECREMENT_EXPR, TREE_TYPE (valist), valist,
+      t = build (PREDECREMENT_EXPR, TREE_TYPE (valist), valist,
 		 build_int_2 (POINTER_SIZE / BITS_PER_UNIT, 0));
       TREE_SIDE_EFFECTS (t) = 1;

@@ -4812,7 +4981,7 @@ hppa_va_arg (valist, type)

       t = build (MODIFY_EXPR, TREE_TYPE (valist), valist, t);
       TREE_SIDE_EFFECTS (t) = 1;
-
+
       ofs = (8 - size) % 4;
       if (ofs)
 	{
@@ -5028,9 +5197,9 @@ output_cbranch (operands, nullify, lengt
 	/* Now restore the value of %r1 in the delay slot.  We're not
 	   optimizing so we know nothing else can be in the delay slot.  */
 	return "ldw -16(%%r30),%%r1";
-
+
       default:
-	abort();
+	abort ();
     }
   return buf;
 }
@@ -5177,7 +5346,7 @@ output_bb (operands, nullify, length, ne
 	break;

       default:
-	abort();
+	abort ();
     }
   return buf;
 }
@@ -5325,7 +5494,7 @@ output_bvb (operands, nullify, length, n
 	break;

       default:
-	abort();
+	abort ();
     }
   return buf;
 }
@@ -5350,8 +5519,8 @@ output_dbra (operands, insn, which_alter
 	return "ldo %1(%0),%0";
       else if (which_alternative == 1)
 	{
-	  output_asm_insn ("{fstws|fstw} %0,-16(%%r30)",operands);
-	  output_asm_insn ("ldw -16(%%r30),%4",operands);
+	  output_asm_insn ("{fstws|fstw} %0,-16(%%r30)", operands);
+	  output_asm_insn ("ldw -16(%%r30),%4", operands);
 	  output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(%%r30)", operands);
 	  return "{fldws|fldw} -16(%%r30),%0";
 	}
@@ -5407,7 +5576,7 @@ output_dbra (operands, insn, which_alter
 	    return "addi,%N2 %1,%0,%0\n\tb %3";
 	}
       else
-	abort();
+	abort ();
     }
   /* Deal with gross reload from FP register case.  */
   else if (which_alternative == 1)
@@ -5415,7 +5584,8 @@ output_dbra (operands, insn, which_alter
       /* Move loop counter from FP register to MEM then into a GR,
 	 increment the GR, store the GR into MEM, and finally reload
 	 the FP register from MEM from within the branch's delay slot.  */
-      output_asm_insn ("{fstws|fstw} %0,-16(%%r30)\n\tldw -16(%%r30),%4",operands);
+      output_asm_insn ("{fstws|fstw} %0,-16(%%r30)\n\tldw -16(%%r30),%4",
+		       operands);
       output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(%%r30)", operands);
       if (get_attr_length (insn) == 24)
 	return "{comb|cmpb},%S2 %%r0,%4,%3\n\t{fldws|fldw} -16(%%r30),%0";
@@ -5456,7 +5626,7 @@ output_movb (operands, insn, which_alter
 	return "copy %1,%0";
       else if (which_alternative == 1)
 	{
-	  output_asm_insn ("stw %1,-16(%%r30)",operands);
+	  output_asm_insn ("stw %1,-16(%%r30)", operands);
 	  return "{fldws|fldw} -16(%%r30),%0";
 	}
       else if (which_alternative == 2)
@@ -5514,7 +5684,7 @@ output_movb (operands, insn, which_alter
 	    return "or,%N2 %1,%%r0,%0\n\tb %3";
 	}
       else
-	abort();
+	abort ();
     }
   /* Deal with gross reload from FP register case.  */
   else if (which_alternative == 1)
@@ -5522,7 +5692,7 @@ output_movb (operands, insn, which_alter
       /* Move loop counter from FP register to MEM then into a GR,
 	 increment the GR, store the GR into MEM, and finally reload
 	 the FP register from MEM from within the branch's delay slot.  */
-      output_asm_insn ("stw %1,-16(%%r30)",operands);
+      output_asm_insn ("stw %1,-16(%%r30)", operands);
       if (get_attr_length (insn) == 12)
 	return "{comb|cmpb},%S2 %%r0,%1,%3\n\t{fldws|fldw} -16(%%r30),%0";
       else
@@ -5563,9 +5733,7 @@ output_millicode_call (insn, call_dest)
   rtx xoperands[4];
   rtx seq_insn;

-  xoperands[3] = gen_rtx_REG (SImode, 31);
-  if (TARGET_64BIT)
-    xoperands[3] = gen_rtx_REG (SImode, 2);
+  xoperands[3] = gen_rtx_REG (Pmode, TARGET_64BIT ? 2 : 31);

   /* Handle common case -- empty delay slot or no jump in the delay slot,
      and we're sure that the branch will reach the beginning of the $CODE$
@@ -5601,18 +5769,32 @@ output_millicode_call (insn, call_dest)
 	  delay_insn_deleted = 1;
 	}

-      /* If we're allowed to use be/ble instructions, then this is the
-	 best sequence to use for a long millicode call.  */
-      if (TARGET_NO_SPACE_REGS || TARGET_FAST_INDIRECT_CALLS
-	  || ! (flag_pic  || TARGET_PORTABLE_RUNTIME))
+      /* PIC long millicode call sequence.  */
+      if (flag_pic)
 	{
 	  xoperands[0] = call_dest;
-	  output_asm_insn ("ldil L%%%0,%3", xoperands);
-	  output_asm_insn ("{ble|be,l} R%%%0(%%sr4,%3)", xoperands);
+	  xoperands[1] = gen_label_rtx ();
+	  /* Get our address + 8 into %r1.  */
+	  output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
+
+	  /* Add %r1 to the offset of our target from the next insn.  */
+	  output_asm_insn ("addil L%%%0-%1,%%r1", xoperands);
+	  ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L",
+				     CODE_LABEL_NUMBER (xoperands[1]));
+	  output_asm_insn ("ldo R%%%0-%1(%%r1),%%r1", xoperands);
+
+	  /* Get the return address into %r31.  */
+	  output_asm_insn ("blr 0,%3", xoperands);
+
+	  /* Branch to our target which is in %r1.  */
+	  output_asm_insn ("bv,n %%r0(%%r1)", xoperands);
+
+	  /* Empty delay slot.  Note this insn gets fetched twice and
+	     executed once.  To be safe we use a nop.  */
 	  output_asm_insn ("nop", xoperands);
 	}
       /* Pure portable runtime doesn't allow be/ble; we also don't have
-	 PIC support int he assembler/linker, so this sequence is needed.  */
+	 PIC support in the assembler/linker, so this sequence is needed.  */
       else if (TARGET_PORTABLE_RUNTIME)
 	{
 	  xoperands[0] = call_dest;
@@ -5629,30 +5811,14 @@ output_millicode_call (insn, call_dest)
 	  /* Empty delay slot.  Note this insn gets fetched twice and
 	     executed once.  To be safe we use a nop.  */
 	  output_asm_insn ("nop", xoperands);
-	  return "";
 	}
-      /* PIC long millicode call sequence.  */
+      /* If we're allowed to use be/ble instructions, then this is the
+	 best sequence to use for a long millicode call.  */
       else
 	{
 	  xoperands[0] = call_dest;
-	  xoperands[1] = gen_label_rtx ();
-	  /* Get our address + 8 into %r1.  */
-	  output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
-
-	  /* Add %r1 to the offset of our target from the next insn.  */
-	  output_asm_insn ("addil L%%%0-%1,%%r1", xoperands);
-	  ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L",
-				     CODE_LABEL_NUMBER (xoperands[1]));
-	  output_asm_insn ("ldo R%%%0-%1(%%r1),%%r1", xoperands);
-
-	  /* Get the return address into %r31.  */
-	  output_asm_insn ("blr 0,%3", xoperands);
-
-	  /* Branch to our target which is in %r1.  */
-	  output_asm_insn ("bv,n %%r0(%%r1)", xoperands);
-
-	  /* Empty delay slot.  Note this insn gets fetched twice and
-	     executed once.  To be safe we use a nop.  */
+	  output_asm_insn ("ldil L%%%0,%3", xoperands);
+	  output_asm_insn ("{ble|be,l} R%%%0(%%sr4,%3)", xoperands);
 	  output_asm_insn ("nop", xoperands);
 	}

@@ -5987,7 +6153,7 @@ hppa_encode_label (sym)
   *p++ = '@';
   strcpy (p, str);

-  XSTR (sym,0) = ggc_alloc_string (newstr, len);
+  XSTR (sym, 0) = ggc_alloc_string (newstr, len);
 }

 int
@@ -6331,7 +6497,7 @@ output_parallel_addb (operands, length)
    It is also used to avoid filling the delay slot of a jump which
    immediately follows a call since the jump can usually be eliminated
    completely by modifying RP in the delay slot of the call.  */
-
+
 int
 following_call (insn)
      rtx insn;
@@ -6381,10 +6547,10 @@ following_call (insn)
    Reorg and the final jump pass can then optimize these branches and
    fill their delay slots.  We end up with smaller, more efficient code.

-   The jump instructions within the table are special; we must be able
+   The jump instructions within the table are special; we must be able
    to identify them during assembly output (if the jumps don't get filled
    we need to emit a nop rather than nullifying the delay slot)).  We
-   identify jumps in switch tables by marking the SET with DImode.
+   identify jumps in switch tables by marking the SET with DImode.

    We also surround the jump table itself with BEGIN_BRTAB and END_BRTAB
    insns.  This serves two purposes, first it prevents jump.c from
@@ -6680,7 +6846,7 @@ pa_combine_instructions (insns)
 		      || (GET_CODE (floater) == INSN
 			  && (GET_CODE (PATTERN (floater)) == USE
 			      || GET_CODE (PATTERN (floater)) == CLOBBER)))
-
+
 		    continue;

 		  /* Anything except a regular INSN will stop our search.  */
@@ -6704,8 +6870,10 @@ pa_combine_instructions (insns)
 			 done with this pass.  */
 		      if (pa_can_combine_p (new, anchor, floater, 1,
 					    SET_DEST (PATTERN (floater)),
-					    XEXP (SET_SRC (PATTERN(floater)),0),
-					    XEXP(SET_SRC(PATTERN(floater)),1)))
+					    XEXP (SET_SRC (PATTERN (floater)),
+						  0),
+					    XEXP (SET_SRC (PATTERN (floater)),
+						  1)))
 			break;
 		    }
 		}
@@ -6824,12 +6992,18 @@ pa_can_combine_p (new, anchor, floater,

    Millicode calls always expect their arguments in the integer argument
    registers, and always return their result in %r29 (ret1).  They
-   are expected to clobber their arguments, %r1, %r29, and %r31 and
-   nothing else.
+   are expected to clobber their arguments, %r1, %r29, and the return
+   pointer which is %r31 on 32-bit and %r2 on 64-bit, and nothing else.

-   By considering this effects delayed reorg reorg can put insns
-   which set the argument registers into the delay slot of the millicode
-   call -- thus they act more like traditional CALL_INSNs.
+   This function tells reorg that the references to arguments and
+   millicode calls do not appear to happen until after the millicode call.
+   This allows reorg to put insns which set the argument registers into the
+   delay slot of the millicode call -- thus they act more like traditional
+   CALL_INSNs.
+
+   Note we can not consider side effects of the insn to be delayed because
+   the branch and link insn will clobber the return pointer.  If we happened
+   to use the return pointer in the delay slot of the call, then we lose.

    get_attr_type will try to recognize the given insn, so make sure to
    filter out things it will not accept -- SEQUENCE, USE and CLOBBER insns
@@ -6838,7 +7012,7 @@ int
 insn_refs_are_delayed (insn)
      rtx insn;
 {
-  return ((GET_CODE (insn) == INSN
+  return ((GET_CODE (insn) == INSN
 	   && GET_CODE (PATTERN (insn)) != SEQUENCE
 	   && GET_CODE (PATTERN (insn)) != USE
 	   && GET_CODE (PATTERN (insn)) != CLOBBER
@@ -6849,7 +7023,7 @@ insn_refs_are_delayed (insn)
    if the parameter has any component that is passed in memory.

    This is new code and will be pushed to into the net sources after
-   further testing.
+   further testing.

    ??? We might want to restructure this so that it looks more like other
    ports.  */
@@ -6890,7 +7064,7 @@ function_arg (cum, mode, type, named, in
      particularly in their handling of FP registers.  We might
      be able to cleverly share code between them, but I'm not
      going to bother in the hope that splitting them up results
-     in code that is more easily understood.
+     in code that is more easily understood.

      The 64bit code probably is very wrong for structure passing.  */
   if (TARGET_64BIT)
@@ -6903,7 +7077,7 @@ function_arg (cum, mode, type, named, in
 	 varies based on the size of the target word.  */
       gpr_reg_base = 26 - cum->words;
       fpr_reg_base = 32 + cum->words;
-
+
       /* If the argument is more than a word long, then we need to align
 	 the base registers.  Same caveats as above.  */
       if (FUNCTION_ARG_SIZE (mode, type) > 1)
@@ -6927,16 +7101,16 @@ function_arg (cum, mode, type, named, in
 	    {
 	      rtx loc[8];
 	      int i, offset = 0, ub;
-              ub = FUNCTION_ARG_SIZE (mode, type);
-	      ub = MIN(ub,
-                       MAX(0, max_arg_words - cum->words - (cum->words & 1)));
+              ub = FUNCTION_ARG_SIZE (mode, type);
+	      ub = MIN (ub,
+			MAX (0, max_arg_words - cum->words - (cum->words & 1)));
 	      gpr_reg_base -= (cum->words & 1);
 	      for (i = 0; i < ub; i++)
 		{
 		  loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
 					      gen_rtx_REG (DImode,
 							   gpr_reg_base),
-					      GEN_INT(offset));
+					      GEN_INT (offset));
 		  gpr_reg_base -= 1;
 		  offset += 8;
 		}
@@ -6945,7 +7119,7 @@ function_arg (cum, mode, type, named, in
 	      else if (ub == 1)
 		return XEXP (loc[0], 0);
 	      else
-		return gen_rtx_PARALLEL(mode, gen_rtvec_v(ub, loc));
+		return gen_rtx_PARALLEL (mode, gen_rtvec_v (ub, loc));
 	    }
 	}
     }
@@ -6990,7 +7164,7 @@ function_arg (cum, mode, type, named, in
     }
   /* Determine if the register needs to be passed in both general and
      floating point registers.  */
-  if ((TARGET_PORTABLE_RUNTIME || TARGET_64BIT)
+  if ((TARGET_PORTABLE_RUNTIME || TARGET_64BIT || TARGET_ELF32)
       /* If we are doing soft-float with portable runtime, then there
 	 is no need to worry about FP regs.  */
       && ! TARGET_SOFT_FLOAT
@@ -7046,22 +7220,21 @@ function_arg_partial_nregs (cum, mode, t
      tree type;
      int named ATTRIBUTE_UNUSED;
 {
-  int max_arg_words = 8;
-  int offset = 0;
+  unsigned int max_arg_words = 8;
+  unsigned int offset = 0;

-  if (FUNCTION_ARG_SIZE(mode, type) > 1 && (cum->words & 1))
+  if (FUNCTION_ARG_SIZE (mode, type) > 1 && (cum->words & 1))
     offset = 1;

-  if (cum->words + offset + FUNCTION_ARG_SIZE(mode, type) <= max_arg_words)
+  if (cum->words + offset + FUNCTION_ARG_SIZE (mode, type) <= max_arg_words)
     /* Arg fits fully into registers. */
     return 0;
-  else if (cum->words + offset >= max_arg_words)
+  else if (cum->words + offset >= max_arg_words)
     /* Arg fully on the stack. */
     return 0;
   else
     /* Arg is split. */
     return max_arg_words - cum->words - offset;
-
 }


@@ -7100,12 +7273,11 @@ mark_deferred_plabels (arg)
 /* Called to register all of our global variables with the garbage
    collector.  */

-static void
+static void
 pa_add_gc_roots ()
 {
   ggc_add_rtx_root (&hppa_compare_op0, 1);
   ggc_add_rtx_root (&hppa_compare_op1, 1);
-  ggc_add_rtx_root (&hp_profile_label_rtx, 1);
   ggc_add_root (&deferred_plabels, 1, sizeof (&deferred_plabels),
 		&mark_deferred_plabels);
 }
diff -urpN -xCVS gnu_gcc/gcc/config/pa/pa.h gcc/gcc/config/pa/pa.h
--- gnu_gcc/gcc/config/pa/pa.h	Tue May  1 05:56:06 2001
+++ gcc/gcc/config/pa/pa.h	Thu Jun 28 22:37:22 2001
@@ -106,13 +106,9 @@ extern int target_flags;
 #define MASK_DISABLE_FPREGS 2
 #define TARGET_DISABLE_FPREGS (target_flags & MASK_DISABLE_FPREGS)

-/* Generate code which assumes that calls through function pointers will
-   never cross a space boundary.  Such assumptions are generally safe for
-   building kernels and statically linked executables.  Code compiled with
-   this option will fail miserably if the executable is dynamically linked
-   or uses nested functions!
-
-   This is also used to trigger aggressive unscaled index addressing.  */
+/* Generate code which assumes that all space register are equivalent.
+   Triggers aggressive unscaled index addressing and faster
+   builtin_return_address.  */
 #define MASK_NO_SPACE_REGS 4
 #define TARGET_NO_SPACE_REGS (target_flags & MASK_NO_SPACE_REGS)

@@ -148,7 +144,12 @@ extern int target_flags;
 #define MASK_LONG_LOAD_STORE 512
 #define TARGET_LONG_LOAD_STORE (target_flags & MASK_LONG_LOAD_STORE)

-/* Use a faster sequence for indirect calls.  */
+/* Use a faster sequence for indirect calls.  This assumes that calls
+   through function pointers will never cross a space boundary, and
+   that the executable is not dynamically linked.  Such assumptions
+   are generally safe for building kernels and statically linked
+   executables.  Code compiled with this option will fail miserably if
+   the executable is dynamically linked or uses nested functions!  */
 #define MASK_FAST_INDIRECT_CALLS 1024
 #define TARGET_FAST_INDIRECT_CALLS (target_flags & MASK_FAST_INDIRECT_CALLS)

@@ -170,6 +171,11 @@ extern int target_flags;
 #define TARGET_64BIT 0
 #endif

+/* Generate code for ELF32 ABI. */
+#ifndef TARGET_ELF32
+#define TARGET_ELF32 0
+#endif
+
 /* Macro to define tables used to set the flags.
    This is a list in braces of pairs in braces,
    each pair being { "NAME", VALUE }
@@ -526,6 +532,12 @@ extern void hppa_init_pic_save PARAMS ((
 /* Register in which address to store a structure value
    is passed to a function.  */
 #define STRUCT_VALUE_REGNUM 28
+
+/* Describe how we implement __builtin_eh_return.  */
+#define EH_RETURN_DATA_REGNO(N)	\
+  ((N) < 3 ? (N) + 20 : (N) == 4 ? 31 : INVALID_REGNUM)
+#define EH_RETURN_STACKADJ_RTX	gen_rtx_REG (Pmode, 29)
+#define EH_RETURN_HANDLER_RTX	gen_rtx_REG (Pmode, 2)
 
 /* The letters I, J, K, L and M in a register constraint string
    can be used to stand for particular ranges of immediate operands.
@@ -545,16 +557,16 @@ extern void hppa_init_pic_save PARAMS ((
    */

 #define CONST_OK_FOR_LETTER_P(VALUE, C)  \
-  ((C) == 'I' ? VAL_11_BITS_P (VALUE)				\
-   : (C) == 'J' ? VAL_14_BITS_P (VALUE)				\
-   : (C) == 'K' ? zdepi_cint_p (VALUE)				\
-   : (C) == 'L' ? VAL_5_BITS_P (VALUE)				\
-   : (C) == 'M' ? (VALUE) == 0					\
-   : (C) == 'N' ? (((VALUE) & (unsigned long)0x7ff) == 0	\
-		   && (VALUE) == ((((VALUE) & 0xffffffff) ^ (~0x7fffffff)) \
-				  + 0x80000000))		\
-   : (C) == 'O' ? (((VALUE) & ((VALUE) + (long)1)) == 0)	\
-   : (C) == 'P' ? and_mask_p (VALUE)				\
+  ((C) == 'I' ? VAL_11_BITS_P (VALUE)					\
+   : (C) == 'J' ? VAL_14_BITS_P (VALUE)					\
+   : (C) == 'K' ? zdepi_cint_p (VALUE)					\
+   : (C) == 'L' ? VAL_5_BITS_P (VALUE)					\
+   : (C) == 'M' ? (VALUE) == 0						\
+   : (C) == 'N' ? (((VALUE) & (((HOST_WIDE_INT) -1 << 31) | 0x7ff)) == 0 \
+		   || (((VALUE) & (((HOST_WIDE_INT) -1 << 31) | 0x7ff))	\
+		       == (HOST_WIDE_INT) -1 << 31))			\
+   : (C) == 'O' ? (((VALUE) & ((VALUE) + 1)) == 0)			\
+   : (C) == 'P' ? and_mask_p (VALUE)					\
    : 0)

 /* Similar, but for floating or large integer constants, and defining letters
@@ -894,16 +906,13 @@ extern enum cmp_type hppa_branch_type;
 #define FUNCTION_PROLOGUE(FILE, SIZE) \
   output_function_prologue (FILE, SIZE)

-/* Output assembler code to FILE to increment profiler label # LABELNO
-   for profiling a function entry.
+/* On HPPA, we emit profiling code as rtl via PROFILE_HOOK rather than
+   as assembly via FUNCTION_PROFILER.  */
+
+#define FUNCTION_PROFILER(FILE, LABEL) /* nothing */

-   Because HPUX _mcount is so different, we actually emit the
-   profiling code in function_prologue. This just stores LABELNO for
-   that. */
-
-#define PROFILE_BEFORE_PROLOGUE
-#define FUNCTION_PROFILER(FILE, LABELNO) \
-{ extern int hp_profile_labelno; hp_profile_labelno = (LABELNO);}
+#define PROFILE_HOOK(label_no) hppa_profile_hook (label_no)
+void hppa_profile_hook PARAMS ((int label_no));

 /* EXIT_IGNORE_STACK should be nonzero if, when returning from a function,
    the stack pointer does not matter.  The value is tested only in
@@ -1118,28 +1127,20 @@ extern int may_call_alloca;

    ?!? For now also reject CONST_DOUBLES in 64bit mode.  This will need
    further work.  */
-#ifdef NEW_HP_ASSEMBLER
-#define LEGITIMATE_CONSTANT_P(X)  		\
-  ((GET_MODE_CLASS (GET_MODE (X)) != MODE_FLOAT	\
-    || (X) == CONST0_RTX (GET_MODE (X)))	\
-   && !(TARGET_64BIT && GET_CODE (X) == CONST_DOUBLE) \
-   && !(TARGET_64BIT && GET_CODE (X) == CONST_INT \
-	&& !(cint_ok_for_move (INTVAL (X))	\
-	     || ((INTVAL (X) & 0xffffffff80000000L) == 0xffffffff80000000L) \
-	     || ((INTVAL (X) & 0xffffffff00000000L) == 0x0000000000000000L))) \
-   && !function_label_operand (X, VOIDmode))
-#else
-#define LEGITIMATE_CONSTANT_P(X)  		\
-  ((GET_MODE_CLASS (GET_MODE (X)) != MODE_FLOAT	\
-    || (X) == CONST0_RTX (GET_MODE (X)))	\
-   && (GET_CODE (X) != LABEL_REF || TARGET_GAS)\
-   && !(TARGET_64BIT && GET_CODE (X) == CONST_DOUBLE) \
-   && !(TARGET_64BIT && GET_CODE (X) == CONST_INT \
-	&& !(cint_ok_for_move (INTVAL (X))	\
-	     || ((INTVAL (X) & 0xffffffff80000000L) == 0xffffffff80000000L) \
-	     || ((INTVAL (X) & 0xffffffff00000000L) == 0x0000000000000000L))) \
-   && !function_label_operand (X, VOIDmode))
+#ifndef NEW_HP_ASSEMBLER
+#define NEW_HP_ASSEMBLER 0
 #endif
+#define LEGITIMATE_CONSTANT_P(X)				\
+  ((GET_MODE_CLASS (GET_MODE (X)) != MODE_FLOAT			\
+    || (X) == CONST0_RTX (GET_MODE (X)))			\
+   && (NEW_HP_ASSEMBLER || TARGET_GAS || GET_CODE (X) != LABEL_REF)	\
+   && !(TARGET_64BIT && GET_CODE (X) == CONST_DOUBLE)		\
+   && !(TARGET_64BIT && GET_CODE (X) == CONST_INT		\
+	&& !(HOST_BITS_PER_WIDE_INT <= 32			\
+	     || (INTVAL (X) >= (HOST_WIDE_INT) -1 << 31		\
+		 && INTVAL (X) < (HOST_WIDE_INT) 1 << 32)	\
+	     || cint_ok_for_move (INTVAL (X))))			\
+   && !function_label_operand (X, VOIDmode))

 /* Subroutine for EXTRA_CONSTRAINT.

@@ -1730,8 +1731,8 @@ while (0)

    Millicode calls always expect their arguments in the integer argument
    registers, and always return their result in %r29 (ret1).  They
-   are expected to clobber their arguments, %r1, %r29, and %r31 and
-   nothing else.
+   are expected to clobber their arguments, %r1, %r29, and the return
+   pointer which is %r31 on 32-bit and %r2 on 64-bit, and nothing else.

    This macro tells reorg that the references to arguments and
    millicode calls do not appear to happen until after the millicode call.
@@ -1849,8 +1850,6 @@ while (0)
 #define ASM_OUTPUT_ASCII(FILE, P, SIZE)  \
   output_ascii ((FILE), (P), (SIZE))

-#define ASM_OUTPUT_REG_PUSH(FILE,REGNO)
-#define ASM_OUTPUT_REG_POP(FILE,REGNO)
 /* This is how to output an element of a case-vector that is absolute.
    Note that this method makes filling these branch delay slots
    impossible.  */
diff -urpN -xCVS gnu_gcc/gcc/config/pa/pa.md gcc/gcc/config/pa/pa.md
--- gnu_gcc/gcc/config/pa/pa.md	Thu Jun 28 21:32:53 2001
+++ gcc/gcc/config/pa/pa.md	Thu Jun 28 22:37:22 2001
@@ -3869,11 +3869,12 @@
 	      (clobber (match_dup 3))
 	      (clobber (reg:SI 26))
 	      (clobber (reg:SI 25))
-	      (clobber (reg:SI 31))])
+	      (clobber (match_dup 4))])
    (set (match_operand:SI 0 "general_operand" "") (reg:SI 29))]
   ""
   "
 {
+  operands[4] = gen_rtx_REG (SImode, TARGET_64BIT ? 2 : 31);
   if (TARGET_PA_11 && ! TARGET_DISABLE_FPREGS && ! TARGET_SOFT_FLOAT)
     {
       rtx scratch = gen_reg_rtx (DImode);
@@ -3930,7 +3931,7 @@
    (clobber (reg:SI 26))
    (clobber (reg:SI 25))
    (clobber (reg:SI 31))]
-  ""
+  "!TARGET_64BIT"
   "* return output_mul_insn (0, insn);"
   [(set_attr "type" "milli")
    (set (attr "length")
@@ -3942,21 +3943,29 @@
                      (const_int 0)))
             (const_int 4)

-;; NO_SPACE_REGS
-            (ne (symbol_ref "TARGET_NO_SPACE_REGS || TARGET_FAST_INDIRECT_CALLS")
+;; Out of reach PIC
+            (ne (symbol_ref "flag_pic")
                 (const_int 0))
-            (const_int 8)
+            (const_int 24)

-;; Out of reach, but not PIC or PORTABLE_RUNTIME
-;; same as NO_SPACE_REGS code
-            (and (eq (symbol_ref "TARGET_PORTABLE_RUNTIME")
-                     (const_int 0))
-                 (eq (symbol_ref "flag_pic")
-                     (const_int 0)))
-            (const_int 8)]
+;; Out of reach PORTABLE_RUNTIME
+            (ne (symbol_ref "TARGET_PORTABLE_RUNTIME")
+                (const_int 0))
+            (const_int 20)]
+
+;; Out of reach, can use ble
+          (const_int 12)))])

-;; Out of range and either PIC or PORTABLE_RUNTIME
-	  (const_int 24)))])
+(define_insn ""
+  [(set (reg:SI 29) (mult:SI (reg:SI 26) (reg:SI 25)))
+   (clobber (match_operand:SI 0 "register_operand" "=a"))
+   (clobber (reg:SI 26))
+   (clobber (reg:SI 25))
+   (clobber (reg:SI 2))]
+  "TARGET_64BIT"
+  "* return output_mul_insn (0, insn);"
+  [(set_attr "type" "milli")
+   (set (attr "length") (const_int 4))])

 (define_expand "muldi3"
   [(set (match_operand:DI 0 "register_operand" "")
@@ -4015,15 +4024,22 @@
 	      (clobber (match_dup 4))
 	      (clobber (reg:SI 26))
 	      (clobber (reg:SI 25))
-	      (clobber (reg:SI 31))])
+	      (clobber (match_dup 5))])
    (set (match_operand:SI 0 "general_operand" "") (reg:SI 29))]
   ""
   "
 {
   operands[3] = gen_reg_rtx (SImode);
-  operands[4] = gen_reg_rtx (SImode);
   if (TARGET_64BIT)
-    operands[4] = gen_rtx_REG (SImode, 2);
+    {
+      operands[5] = gen_rtx_REG (SImode, 2);
+      operands[4] = operands[5];
+    }
+  else
+    {
+      operands[5] = gen_rtx_REG (SImode, 31);
+      operands[4] = gen_reg_rtx (SImode);
+    }
   if (GET_CODE (operands[2]) == CONST_INT && emit_hpdiv_const (operands, 0))
     DONE;
 }")
@@ -4036,7 +4052,7 @@
    (clobber (reg:SI 26))
    (clobber (reg:SI 25))
    (clobber (reg:SI 31))]
-  ""
+  "!TARGET_64BIT"
   "*
    return output_div_insn (operands, 0, insn);"
   [(set_attr "type" "milli")
@@ -4049,21 +4065,32 @@
                      (const_int 0)))
             (const_int 4)

-;; NO_SPACE_REGS
-            (ne (symbol_ref "TARGET_NO_SPACE_REGS || TARGET_FAST_INDIRECT_CALLS")
+;; Out of reach PIC
+            (ne (symbol_ref "flag_pic")
                 (const_int 0))
-            (const_int 8)
+            (const_int 24)

-;; Out of reach, but not PIC or PORTABLE_RUNTIME
-;; same as NO_SPACE_REGS code
-            (and (eq (symbol_ref "TARGET_PORTABLE_RUNTIME")
-                     (const_int 0))
-                 (eq (symbol_ref "flag_pic")
-                     (const_int 0)))
-            (const_int 8)]
+;; Out of reach PORTABLE_RUNTIME
+            (ne (symbol_ref "TARGET_PORTABLE_RUNTIME")
+                (const_int 0))
+            (const_int 20)]

-;; Out of range and either PIC or PORTABLE_RUNTIME
-	  (const_int 24)))])
+;; Out of reach, can use ble
+          (const_int 12)))])
+
+(define_insn ""
+  [(set (reg:SI 29)
+	(div:SI (reg:SI 26) (match_operand:SI 0 "div_operand" "")))
+   (clobber (match_operand:SI 1 "register_operand" "=a"))
+   (clobber (match_operand:SI 2 "register_operand" "=&r"))
+   (clobber (reg:SI 26))
+   (clobber (reg:SI 25))
+   (clobber (reg:SI 2))]
+  "TARGET_64BIT"
+  "*
+   return output_div_insn (operands, 0, insn);"
+  [(set_attr "type" "milli")
+   (set (attr "length") (const_int 4))])

 (define_expand "udivsi3"
   [(set (reg:SI 26) (match_operand:SI 1 "move_operand" ""))
@@ -4073,15 +4100,22 @@
 	      (clobber (match_dup 4))
 	      (clobber (reg:SI 26))
 	      (clobber (reg:SI 25))
-	      (clobber (reg:SI 31))])
+	      (clobber (match_dup 5))])
    (set (match_operand:SI 0 "general_operand" "") (reg:SI 29))]
   ""
   "
 {
   operands[3] = gen_reg_rtx (SImode);
-  operands[4] = gen_reg_rtx (SImode);
   if (TARGET_64BIT)
-    operands[4] = gen_rtx_REG (SImode, 2);
+    {
+      operands[5] = gen_rtx_REG (SImode, 2);
+      operands[4] = operands[5];
+    }
+  else
+    {
+      operands[5] = gen_rtx_REG (SImode, 31);
+      operands[4] = gen_reg_rtx (SImode);
+    }
   if (GET_CODE (operands[2]) == CONST_INT && emit_hpdiv_const (operands, 1))
     DONE;
 }")
@@ -4094,7 +4128,7 @@
    (clobber (reg:SI 26))
    (clobber (reg:SI 25))
    (clobber (reg:SI 31))]
-  ""
+  "!TARGET_64BIT"
   "*
    return output_div_insn (operands, 1, insn);"
   [(set_attr "type" "milli")
@@ -4107,21 +4141,32 @@
                      (const_int 0)))
             (const_int 4)

-;; NO_SPACE_REGS
-            (ne (symbol_ref "TARGET_NO_SPACE_REGS || TARGET_FAST_INDIRECT_CALLS")
+;; Out of reach PIC
+            (ne (symbol_ref "flag_pic")
                 (const_int 0))
-            (const_int 8)
+            (const_int 24)

-;; Out of reach, but not PIC or PORTABLE_RUNTIME
-;; same as NO_SPACE_REGS code
-            (and (eq (symbol_ref "TARGET_PORTABLE_RUNTIME")
-                     (const_int 0))
-                 (eq (symbol_ref "flag_pic")
-                     (const_int 0)))
-            (const_int 8)]
+;; Out of reach PORTABLE_RUNTIME
+            (ne (symbol_ref "TARGET_PORTABLE_RUNTIME")
+                (const_int 0))
+            (const_int 20)]

-;; Out of range and either PIC or PORTABLE_RUNTIME
-	  (const_int 24)))])
+;; Out of reach, can use ble
+          (const_int 12)))])
+
+(define_insn ""
+  [(set (reg:SI 29)
+	(udiv:SI (reg:SI 26) (match_operand:SI 0 "div_operand" "")))
+   (clobber (match_operand:SI 1 "register_operand" "=a"))
+   (clobber (match_operand:SI 2 "register_operand" "=&r"))
+   (clobber (reg:SI 26))
+   (clobber (reg:SI 25))
+   (clobber (reg:SI 2))]
+  "TARGET_64BIT"
+  "*
+   return output_div_insn (operands, 1, insn);"
+  [(set_attr "type" "milli")
+   (set (attr "length") (const_int 4))])

 (define_expand "modsi3"
   [(set (reg:SI 26) (match_operand:SI 1 "move_operand" ""))
@@ -4131,14 +4176,21 @@
 	      (clobber (match_dup 4))
 	      (clobber (reg:SI 26))
 	      (clobber (reg:SI 25))
-	      (clobber (reg:SI 31))])
+	      (clobber (match_dup 5))])
    (set (match_operand:SI 0 "general_operand" "") (reg:SI 29))]
   ""
   "
 {
-  operands[4] = gen_reg_rtx (SImode);
   if (TARGET_64BIT)
-    operands[4] = gen_rtx_REG (SImode, 2);
+    {
+      operands[5] = gen_rtx_REG (SImode, 2);
+      operands[4] = operands[5];
+    }
+  else
+    {
+      operands[5] = gen_rtx_REG (SImode, 31);
+      operands[4] = gen_reg_rtx (SImode);
+    }
   operands[3] = gen_reg_rtx (SImode);
 }")

@@ -4149,7 +4201,7 @@
    (clobber (reg:SI 26))
    (clobber (reg:SI 25))
    (clobber (reg:SI 31))]
-  ""
+  "!TARGET_64BIT"
   "*
   return output_mod_insn (0, insn);"
   [(set_attr "type" "milli")
@@ -4162,21 +4214,31 @@
                      (const_int 0)))
             (const_int 4)

-;; NO_SPACE_REGS
-            (ne (symbol_ref "TARGET_NO_SPACE_REGS || TARGET_FAST_INDIRECT_CALLS")
+;; Out of reach PIC
+            (ne (symbol_ref "flag_pic")
                 (const_int 0))
-            (const_int 8)
+            (const_int 24)

-;; Out of reach, but not PIC or PORTABLE_RUNTIME
-;; same as NO_SPACE_REGS code
-            (and (eq (symbol_ref "TARGET_PORTABLE_RUNTIME")
-                     (const_int 0))
-                 (eq (symbol_ref "flag_pic")
-                     (const_int 0)))
-            (const_int 8)]
+;; Out of reach PORTABLE_RUNTIME
+            (ne (symbol_ref "TARGET_PORTABLE_RUNTIME")
+                (const_int 0))
+            (const_int 20)]

-;; Out of range and either PIC or PORTABLE_RUNTIME
-	  (const_int 24)))])
+;; Out of reach, can use ble
+          (const_int 12)))])
+
+(define_insn ""
+  [(set (reg:SI 29) (mod:SI (reg:SI 26) (reg:SI 25)))
+   (clobber (match_operand:SI 0 "register_operand" "=a"))
+   (clobber (match_operand:SI 1 "register_operand" "=&r"))
+   (clobber (reg:SI 26))
+   (clobber (reg:SI 25))
+   (clobber (reg:SI 2))]
+  "TARGET_64BIT"
+  "*
+  return output_mod_insn (0, insn);"
+  [(set_attr "type" "milli")
+   (set (attr "length") (const_int 4))])

 (define_expand "umodsi3"
   [(set (reg:SI 26) (match_operand:SI 1 "move_operand" ""))
@@ -4186,14 +4248,21 @@
 	      (clobber (match_dup 4))
 	      (clobber (reg:SI 26))
 	      (clobber (reg:SI 25))
-	      (clobber (reg:SI 31))])
+	      (clobber (match_dup 5))])
    (set (match_operand:SI 0 "general_operand" "") (reg:SI 29))]
   ""
   "
 {
-  operands[4] = gen_reg_rtx (SImode);
   if (TARGET_64BIT)
-    operands[4] = gen_rtx_REG (SImode, 2);
+    {
+      operands[5] = gen_rtx_REG (SImode, 2);
+      operands[4] = operands[5];
+    }
+  else
+    {
+      operands[5] = gen_rtx_REG (SImode, 31);
+      operands[4] = gen_reg_rtx (SImode);
+    }
   operands[3] = gen_reg_rtx (SImode);
 }")

@@ -4204,7 +4273,7 @@
    (clobber (reg:SI 26))
    (clobber (reg:SI 25))
    (clobber (reg:SI 31))]
-  ""
+  "!TARGET_64BIT"
   "*
   return output_mod_insn (1, insn);"
   [(set_attr "type" "milli")
@@ -4217,21 +4286,31 @@
                      (const_int 0)))
             (const_int 4)

-;; NO_SPACE_REGS
-            (ne (symbol_ref "TARGET_NO_SPACE_REGS || TARGET_FAST_INDIRECT_CALLS")
+;; Out of reach PIC
+            (ne (symbol_ref "flag_pic")
                 (const_int 0))
-            (const_int 8)
+            (const_int 24)

-;; Out of reach, but not PIC or PORTABLE_RUNTIME
-;; same as NO_SPACE_REGS code
-            (and (eq (symbol_ref "TARGET_PORTABLE_RUNTIME")
-                     (const_int 0))
-                 (eq (symbol_ref "flag_pic")
-                     (const_int 0)))
-            (const_int 8)]
+;; Out of reach PORTABLE_RUNTIME
+            (ne (symbol_ref "TARGET_PORTABLE_RUNTIME")
+                (const_int 0))
+            (const_int 20)]

-;; Out of range and either PIC or PORTABLE_RUNTIME
-	  (const_int 24)))])
+;; Out of reach, can use ble
+          (const_int 12)))])
+
+(define_insn ""
+  [(set (reg:SI 29) (umod:SI (reg:SI 26) (reg:SI 25)))
+   (clobber (match_operand:SI 0 "register_operand" "=a"))
+   (clobber (match_operand:SI 1 "register_operand" "=&r"))
+   (clobber (reg:SI 26))
+   (clobber (reg:SI 25))
+   (clobber (reg:SI 2))]
+  "TARGET_64BIT"
+  "*
+  return output_mod_insn (1, insn);"
+  [(set_attr "type" "milli")
+   (set (attr "length") (const_int 4))])

 ;;- and instructions
 ;; We define DImode `and` so with DImode `not` we can get
@@ -5513,10 +5592,28 @@
 ;; from within its delay slot to set the value for the 2nd parameter to
 ;; the call.
 (define_insn "call_profiler"
-  [(unspec_volatile [(const_int 0)] 0)
-   (use (match_operand:SI 0 "const_int_operand" ""))]
+  [(call (mem:SI (match_operand 0 "call_operand_address" ""))
+	 (match_operand 1 "" ""))
+   (use (match_operand 2 "" ""))
+   (use (reg:SI 25))
+   (use (reg:SI 26))
+   (clobber (reg:SI 2))]
   ""
-  "{bl|b,l} _mcount,%%r2\;ldo %0(%%r2),%%r25"
+  "*
+{
+  rtx xoperands[3];
+
+  output_arg_descriptor (insn);
+
+  xoperands[0] = operands[0];
+  xoperands[1] = operands[2];
+  xoperands[2] = gen_label_rtx ();
+  output_asm_insn (\"{bl|b,l} %0,%%r2\;ldo %1-%2(%%r2),%%r25\", xoperands);
+
+  ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, \"L\",
+			     CODE_LABEL_NUMBER (xoperands[2]));
+  return \"\";
+}"
   [(set_attr "type" "multi")
    (set_attr "length" "8")])

@@ -5785,7 +5882,7 @@
   rtx xoperands[2];

   /* First the special case for kernels, level 0 systems, etc.  */
-  if (TARGET_NO_SPACE_REGS || TARGET_FAST_INDIRECT_CALLS)
+  if (TARGET_FAST_INDIRECT_CALLS)
     return \"ble 0(%%sr4,%%r22)\;copy %%r31,%%r2\";

   /* Now the normal case -- we can reach $$dyncall directly or
@@ -5820,8 +5917,8 @@
   [(set_attr "type" "dyncall")
    (set (attr "length")
      (cond [
-;; First NO_SPACE_REGS
-	    (ne (symbol_ref "TARGET_NO_SPACE_REGS || TARGET_FAST_INDIRECT_CALLS")
+;; First FAST_INDIRECT_CALLS
+	    (ne (symbol_ref "TARGET_FAST_INDIRECT_CALLS")
 		(const_int 0))
 	    (const_int 8)

@@ -5832,19 +5929,18 @@
 		     (const_int 0)))
 	    (const_int 8)

-;; Out of reach, but not PIC or PORTABLE_RUNTIME
-	    (and (eq (symbol_ref "TARGET_PORTABLE_RUNTIME")
-		     (const_int 0))
-		 (eq (symbol_ref "flag_pic")
-		     (const_int 0)))
-	    (const_int 12)
+;; Out of reach PIC
+	    (ne (symbol_ref "flag_pic")
+		(const_int 0))
+	    (const_int 24)

+;; Out of reach PORTABLE_RUNTIME
 	    (ne (symbol_ref "TARGET_PORTABLE_RUNTIME")
 		(const_int 0))
 	    (const_int 20)]

-;; Out of range PIC case
-	  (const_int 24)))])
+;; Out of reach, can use ble
+	  (const_int 12)))])

 (define_expand "call_value"
   [(parallel [(set (match_operand 0 "" "")
@@ -5964,7 +6060,7 @@
   rtx xoperands[2];

   /* First the special case for kernels, level 0 systems, etc.  */
-  if (TARGET_NO_SPACE_REGS || TARGET_FAST_INDIRECT_CALLS)
+  if (TARGET_FAST_INDIRECT_CALLS)
     return \"ble 0(%%sr4,%%r22)\;copy %%r31,%%r2\";

   /* Now the normal case -- we can reach $$dyncall directly or
@@ -5999,8 +6095,8 @@
   [(set_attr "type" "dyncall")
    (set (attr "length")
      (cond [
-;; First NO_SPACE_REGS
-	    (ne (symbol_ref "TARGET_NO_SPACE_REGS || TARGET_FAST_INDIRECT_CALLS")
+;; First FAST_INDIRECT_CALLS
+	    (ne (symbol_ref "TARGET_FAST_INDIRECT_CALLS")
 		(const_int 0))
 	    (const_int 8)

@@ -6011,19 +6107,18 @@
 		     (const_int 0)))
 	    (const_int 8)

-;; Out of reach, but not PIC or PORTABLE_RUNTIME
-	    (and (eq (symbol_ref "TARGET_PORTABLE_RUNTIME")
-		     (const_int 0))
-		 (eq (symbol_ref "flag_pic")
-		     (const_int 0)))
-	    (const_int 12)
+;; Out of reach PIC
+	    (ne (symbol_ref "flag_pic")
+		(const_int 0))
+	    (const_int 24)

+;; Out of reach PORTABLE_RUNTIME
 	    (ne (symbol_ref "TARGET_PORTABLE_RUNTIME")
 		(const_int 0))
 	    (const_int 20)]

-;; Out of range PIC case
-	  (const_int 24)))])
+;; Out of reach, can use ble
+	  (const_int 12)))])

 ;; Call subroutine returning any type.

@@ -7025,7 +7120,7 @@
 	      (clobber (reg:SI 31))])
    (set (match_operand:SI 0 "register_operand" "")
 	(reg:SI 29))]
-  "! TARGET_PORTABLE_RUNTIME && !TARGET_64BIT"
+  "! TARGET_PORTABLE_RUNTIME && !TARGET_64BIT && !TARGET_ELF32"
   "
 {
   operands[2] = gen_reg_rtx (SImode);
@@ -7096,26 +7191,18 @@
                      (const_int 0)))
             (const_int 28)

-;; NO_SPACE_REGS
-            (ne (symbol_ref "TARGET_NO_SPACE_REGS || TARGET_FAST_INDIRECT_CALLS")
-                (const_int 0))
-            (const_int 32)
-
-;; Out of reach, but not PIC or PORTABLE_RUNTIME
-;; same as NO_SPACE_REGS code
-            (and (eq (symbol_ref "TARGET_PORTABLE_RUNTIME")
-                     (const_int 0))
-                 (eq (symbol_ref "flag_pic")
-                     (const_int 0)))
-            (const_int 32)
+;; Out of reach PIC
+	    (ne (symbol_ref "flag_pic")
+		(const_int 0))
+	    (const_int 44)

-;; PORTABLE_RUNTIME
+;; Out of reach PORTABLE_RUNTIME
 	    (ne (symbol_ref "TARGET_PORTABLE_RUNTIME")
 		(const_int 0))
 	    (const_int 40)]

-;; Out of range and PIC
-	  (const_int 44)))])
+;; Out of reach, can use ble
+          (const_int 32)))])

 ;; On the PA, the PIC register is call clobbered, so it must
 ;; be saved & restored around calls by the caller.  If the call
diff -urpN -xCVS gnu_gcc/gcc/config/pa/pa32-linux.h gcc/gcc/config/pa/pa32-linux.h
--- gnu_gcc/gcc/config/pa/pa32-linux.h	Wed Dec 31 17:00:00 1969
+++ gcc/gcc/config/pa/pa32-linux.h	Mon Feb 19 06:54:41 2001
@@ -0,0 +1,26 @@
+/* Definitions for PA_RISC with ELF-32 format
+   Copyright (C) 2000 Free Software Foundation, Inc.
+
+This file is part of GNU CC.
+
+GNU CC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+GNU CC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU CC; see the file COPYING.  If not, write to
+the Free Software Foundation, 59 Temple Place - Suite 330,
+Boston, MA 02111-1307, USA.  */
+
+/* Turn off various SOM crap we don't want. */
+#undef TARGET_ELF32
+#define TARGET_ELF32 1
+
+#undef CPP_SPEC
+#define CPP_SPEC "%{fPIC:-D__PIC__ -D__pic__} %{fpic:-D__PIC__ -D__pic__} %{mhppa:-D__hppa__} %{posix:-D_POSIX_SOURCE} -D_PA_RISC1_1"
diff -urpN -xCVS gnu_gcc/gcc/config/pa/pa32-regs.h gcc/gcc/config/pa/pa32-regs.h
--- gnu_gcc/gcc/config/pa/pa32-regs.h	Thu Jun 28 21:32:53 2001
+++ gcc/gcc/config/pa/pa32-regs.h	Thu Jun 28 22:37:22 2001
@@ -186,7 +186,12 @@

 #define DBX_REGISTER_NUMBER(REGNO) \
   ((REGNO) <= 31 ? (REGNO) :						\
-   ((REGNO) > 31 && (REGNO) <= 87 ? (REGNO) + 40 : 32))
+   ((REGNO) <= 87 ? (REGNO) + 40 : 32))
+
+/* We must not use the DBX register numbers for the DWARF 2 CFA column
+   numbers because that maps to numbers beyond FIRST_PSEUDO_REGISTER.
+   Instead use the identity mapping.  */
+#define DWARF_FRAME_REGNUM(REG) REG

 /* Define the classes of registers for register constraints in the
    machine description.  Also define ranges of constants.
diff -urpN -xCVS gnu_gcc/gcc/config/pa/pa64-hpux.h gcc/gcc/config/pa/pa64-hpux.h
--- gnu_gcc/gcc/config/pa/pa64-hpux.h	Wed Dec 31 17:00:00 1969
+++ gcc/gcc/config/pa/pa64-hpux.h	Mon Feb 19 06:54:41 2001
@@ -0,0 +1,342 @@
+/* Definitions of target machine for GNU compiler, for HPs running
+   HPUX using the 64bit runtime model.
+   Copyright (C) 1999, 2000 Free Software Foundation, Inc.
+
+This file is part of GNU CC.
+
+GNU CC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+GNU CC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU CC; see the file COPYING.  If not, write to
+the Free Software Foundation, 59 Temple Place - Suite 330,
+Boston, MA 02111-1307, USA.  */
+
+/* We can debug dynamically linked executables on hpux11; we also
+   want dereferencing of a NULL pointer to cause a SEGV.  */
+#undef LINK_SPEC
+#define LINK_SPEC \
+  "-E %{mlinker-opt:-O} %{!shared:-u main} %{static:-a archive} %{shared:-shared}"
+
+/* Like the default, except no -lg.  */
+#undef LIB_SPEC
+#define LIB_SPEC \
+  "%{!shared:\
+     %{!p:\
+       %{!pg:\
+         %{!threads:-lc}\
+         %{threads:-lcma -lc_r}}\
+       %{p: -L/lib/libp/ -lc}\
+       %{pg: -L/lib/libp/ -lc}}} /usr/lib/pa20_64/milli.a"
+
+/* Under hpux11, the normal location of the `ld' and `as' programs is the
+   /usr/ccs/bin directory.  */
+
+#ifndef CROSS_COMPILE
+#undef MD_EXEC_PREFIX
+#define MD_EXEC_PREFIX "/opt/langtools/bin"
+#endif
+
+/* Under hpux11 the normal location of the various *crt*.o files is the
+   /usr/ccs/lib directory.  */
+
+#ifndef CROSS_COMPILE
+#undef MD_STARTFILE_PREFIX
+#define MD_STARTFILE_PREFIX "/opt/langtools/lib/pa20_64/"
+#endif
+
+/* hpux11 has the new HP assembler.  It's still lousy, but it's a whole lot
+   better than the assembler shipped with older versions of hpux.  */
+#undef NEW_HP_ASSEMBLER
+#define NEW_HP_ASSEMBLER 1
+
+#undef ASM_FILE_START
+#define ASM_FILE_START(FILE) \
+do {  \
+     if (TARGET_64BIT) \
+       fputs("\t.LEVEL 2.0w\n", FILE); \
+     else if (TARGET_PA_20) \
+       fputs("\t.LEVEL 2.0\n", FILE); \
+     else if (TARGET_PA_11) \
+       fputs("\t.LEVEL 1.1\n", FILE); \
+     else \
+       fputs("\t.LEVEL 1.0\n", FILE); \
+     if (profile_flag)\
+       fprintf (FILE, "\t.IMPORT _mcount, CODE\n");\
+     if (write_symbols != NO_DEBUG) \
+       output_file_directive ((FILE), main_input_filename); \
+   } while (0)
+
+/* It looks like DWARF2 will be the easiest debug format to handle on this
+   platform.  */
+#define OBJECT_FORMAT_ELF
+#define DWARF2_DEBUGGING_INFO
+#define PREFERRED_DEBUGGING_TYPE DWARF2_DEBUG
+/* This isn't quite ready yet.  I'm seeing it mess up some line
+   tables.  For example, we're getting lines starting/ending at
+   impossible addresses.  */
+#define DWARF2_ASM_LINE_DEBUG_INFO 1
+
+
+/* The rest of this file is copied from the generic svr4.h.  One day we
+   would like to simply include svr4.h instead of copying all these
+   definitions.  */
+
+/* Support const sections and the ctors and dtors sections for g++.
+   Note that there appears to be two different ways to support const
+   sections at the moment.  You can either #define the symbol
+   READONLY_DATA_SECTION (giving it some code which switches to the
+   readonly data section) or else you can #define the symbols
+   EXTRA_SECTIONS, EXTRA_SECTION_FUNCTIONS, SELECT_SECTION, and
+   SELECT_RTX_SECTION.  We do both here just to be on the safe side.  */
+
+#define USE_CONST_SECTION	1
+
+#define CONST_SECTION_ASM_OP	"\t.section\t.rodata"
+
+/* Define the pseudo-ops used to switch to the .ctors and .dtors sections.
+
+   Note that we want to give these sections the SHF_WRITE attribute
+   because these sections will actually contain data (i.e. tables of
+   addresses of functions in the current root executable or shared library
+   file) and, in the case of a shared library, the relocatable addresses
+   will have to be properly resolved/relocated (and then written into) by
+   the dynamic linker when it actually attaches the given shared library
+   to the executing process.  (Note that on SVR4, you may wish to use the
+   `-z text' option to the ELF linker, when building a shared library, as
+   an additional check that you are doing everything right.  But if you do
+   use the `-z text' option when building a shared library, you will get
+   errors unless the .ctors and .dtors sections are marked as writable
+   via the SHF_WRITE attribute.)  */
+
+#define CTORS_SECTION_ASM_OP	"\t.section\t.ctors,\"aw\""
+#define DTORS_SECTION_ASM_OP	"\t.section\t.dtors,\"aw\""
+
+/* On svr4, we *do* have support for the .init and .fini sections, and we
+   can put stuff in there to be executed before and after `main'.  We let
+   crtstuff.c and other files know this by defining the following symbols.
+   The definitions say how to change sections to the .init and .fini
+   sections.  This is the same for all known svr4 assemblers.  */
+
+/* ??? For the time being, we aren't using init sections. */
+#if 0
+#define INIT_SECTION_ASM_OP	"\t.section\t.init"
+#define FINI_SECTION_ASM_OP	"\t.section\t.fini"
+#endif
+
+/* A default list of other sections which we might be "in" at any given
+   time.  For targets that use additional sections (e.g. .tdesc) you
+   should override this definition in the target-specific file which
+   includes this file.  */
+
+#undef EXTRA_SECTIONS
+#define EXTRA_SECTIONS in_const, in_ctors, in_dtors
+
+/* A default list of extra section function definitions.  For targets
+   that use additional sections (e.g. .tdesc) you should override this
+   definition in the target-specific file which includes this file.  */
+
+#undef EXTRA_SECTION_FUNCTIONS
+#define EXTRA_SECTION_FUNCTIONS						\
+  CONST_SECTION_FUNCTION						\
+  CTORS_SECTION_FUNCTION						\
+  DTORS_SECTION_FUNCTION
+
+#define READONLY_DATA_SECTION() const_section ()
+
+extern void text_section ();
+
+#define CONST_SECTION_FUNCTION						\
+void									\
+const_section ()							\
+{									\
+  if (!USE_CONST_SECTION)						\
+    text_section();							\
+  else if (in_section != in_const)					\
+    {									\
+      fprintf (asm_out_file, "%s\n", CONST_SECTION_ASM_OP);		\
+      in_section = in_const;						\
+    }									\
+}
+
+#define CTORS_SECTION_FUNCTION						\
+void									\
+ctors_section ()							\
+{									\
+  if (in_section != in_ctors)						\
+    {									\
+      fprintf (asm_out_file, "%s\n", CTORS_SECTION_ASM_OP);		\
+      in_section = in_ctors;						\
+    }									\
+}
+
+#define DTORS_SECTION_FUNCTION						\
+void									\
+dtors_section ()							\
+{									\
+  if (in_section != in_dtors)						\
+    {									\
+      fprintf (asm_out_file, "%s\n", DTORS_SECTION_ASM_OP);		\
+      in_section = in_dtors;						\
+    }									\
+}
+
+/* Switch into a generic section.
+
+   We make the section read-only and executable for a function decl,
+   read-only for a const data decl, and writable for a non-const data decl.
+
+   If the section has already been defined, we must not
+   emit the attributes here. The SVR4 assembler does not
+   recognize section redefinitions.
+   If DECL is NULL, no attributes are emitted.  */
+
+#define ASM_OUTPUT_SECTION_NAME(FILE, DECL, NAME, RELOC)		\
+  do									\
+    {									\
+      static htab_t htab;                                               \
+                                                                        \
+      struct section_info                                               \
+      {									\
+	enum sect_enum {SECT_RW, SECT_RO, SECT_EXEC} type;		\
+      };                                                                \
+                                                                        \
+      struct section_info *s;						\
+      const char *mode;							\
+      enum sect_enum type;                                              \
+      PTR* slot;                                                        \
+                                                                        \
+      /* The names we put in the hashtable will always be the unique    \
+	 versions gived to us by the stringtable, so we can just use    \
+	 their addresses as the keys.  */                               \
+      if (!htab)                                                        \
+	htab = htab_create (31,                                         \
+			    htab_hash_pointer,                          \
+			    htab_eq_pointer,                            \
+			    NULL);                                      \
+                                                                        \
+      if (DECL && TREE_CODE (DECL) == FUNCTION_DECL)			\
+	type = SECT_EXEC, mode = "ax";					\
+      else if (DECL && DECL_READONLY_SECTION (DECL, RELOC))		\
+	type = SECT_RO, mode = "a";					\
+      else								\
+	type = SECT_RW, mode = "aw";					\
+      									\
+                                                                        \
+      /* See if we already have an entry for this section.  */          \
+      slot = htab_find_slot (htab, NAME, INSERT);                       \
+      if (!*slot)                                                       \
+	{                                                               \
+	  s = (struct section_info *) xmalloc (sizeof (* s));		\
+	  s->type = type;						\
+	  *slot = s;							\
+	  fprintf (FILE, "\t.section\t%s,\"%s\",@progbits\n",		\
+		   NAME, mode);						\
+	}								\
+      else								\
+	{								\
+	  s = (struct section_info *) *slot;                            \
+	  if (DECL && s->type != type)					\
+	    error_with_decl (DECL,                                      \
+			     "%s causes a section type conflict");      \
+	  								\
+	  fprintf (FILE, "\t.section\t%s\n", NAME);			\
+	}								\
+    }									\
+  while (0)
+
+#define MAKE_DECL_ONE_ONLY(DECL) (DECL_WEAK (DECL) = 1)
+#define UNIQUE_SECTION_P(DECL) (DECL_ONE_ONLY (DECL))
+#define UNIQUE_SECTION(DECL,RELOC)				\
+do {								\
+  int len;							\
+  char *name, *string, *prefix;					\
+								\
+  name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (DECL));	\
+								\
+  if (! DECL_ONE_ONLY (DECL))					\
+    {								\
+      prefix = ".";                                             \
+      if (TREE_CODE (DECL) == FUNCTION_DECL)			\
+	prefix = ".text.";					\
+      else if (DECL_READONLY_SECTION (DECL, RELOC))		\
+	prefix = ".rodata.";					\
+      else							\
+	prefix = ".data.";					\
+    }								\
+  else if (TREE_CODE (DECL) == FUNCTION_DECL)			\
+    prefix = ".gnu.linkonce.t.";				\
+  else if (DECL_READONLY_SECTION (DECL, RELOC))			\
+    prefix = ".gnu.linkonce.r.";				\
+  else								\
+    prefix = ".gnu.linkonce.d.";				\
+								\
+  len = strlen (name) + strlen (prefix);			\
+  string = alloca (len + 1);					\
+  sprintf (string, "%s%s", prefix, name);			\
+								\
+  DECL_SECTION_NAME (DECL) = build_string (len, string);	\
+} while (0)
+
+#define INT_ASM_OP "\t.dword\t"
+/* A C statement (sans semicolon) to output an element in the table of
+   global constructors.  */
+#define ASM_OUTPUT_CONSTRUCTOR(FILE,NAME)				\
+  do {									\
+    ctors_section ();							\
+    fprintf (FILE, "%sP%%", INT_ASM_OP);				\
+    assemble_name (FILE, NAME);						\
+    fprintf (FILE, "\n");						\
+  } while (0)
+
+/* A C statement (sans semicolon) to output an element in the table of
+   global destructors.  */
+#define ASM_OUTPUT_DESTRUCTOR(FILE,NAME)       				\
+  do {									\
+    dtors_section ();                   				\
+    fprintf (FILE, "%sP%%", INT_ASM_OP);				\
+    assemble_name (FILE, NAME);              				\
+    fprintf (FILE, "\n");						\
+  } while (0)
+
+/* ??? For the time being, we aren't using .ctors/.dtors sections. */
+#undef ASM_OUTPUT_DESTRUCTOR
+#undef ASM_OUTPUT_CONSTRUCTOR
+
+/* Define the strings used for the special svr4 .type and .size directives.
+   These strings generally do not vary from one system running svr4 to
+   another, but if a given system (e.g. m88k running svr) needs to use
+   different pseudo-op names for these, they may be overridden in the
+   file which includes this one.  */
+
+#define TYPE_ASM_OP	"\t.type\t"
+#define SIZE_ASM_OP	"\t.size\t"
+
+/* This is how we tell the assembler that a symbol is weak.  */
+
+#define ASM_WEAKEN_LABEL(FILE,NAME) \
+  do { fputs ("\t.weak\t", FILE); assemble_name (FILE, NAME); \
+       fputc ('\n', FILE); } while (0)
+
+/* The following macro defines the format used to output the second
+   operand of the .type assembler directive.  Different svr4 assemblers
+   expect various different forms for this operand.  The one given here
+   is just a default.  You may need to override it in your machine-
+   specific tm.h file (depending upon the particulars of your assembler).  */
+
+#define TYPE_OPERAND_FMT	"@%s"
+
+/* Write the extra assembler code needed to declare a function's result.
+   Most svr4 assemblers don't require any special declaration of the
+   result value, but there are exceptions.  */
+
+#ifndef ASM_DECLARE_RESULT
+#define ASM_DECLARE_RESULT(FILE, RESULT)
+#endif
diff -urpN -xCVS gnu_gcc/gcc/config/pa/pa64-linux.h gcc/gcc/config/pa/pa64-linux.h
--- gnu_gcc/gcc/config/pa/pa64-linux.h	Wed Dec 31 17:00:00 1969
+++ gcc/gcc/config/pa/pa64-linux.h	Fri Apr  6 01:54:11 2001
@@ -0,0 +1,77 @@
+/* Definitions for PA_RISC with ELF format on 64-bit Linux
+   Copyright (C) 1999, 2000 Free Software Foundation, Inc.
+
+This file is part of GNU CC.
+
+GNU CC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+GNU CC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU CC; see the file COPYING.  If not, write to
+the Free Software Foundation, 59 Temple Place - Suite 330,
+Boston, MA 02111-1307, USA.  */
+
+#undef CPP_SPEC
+#define CPP_SPEC "%{fPIC:-D__PIC__ -D__pic__} %{fpic:-D__PIC__ -D__pic__} %{mhppa:-D__hppa__} %{posix:-D_POSIX_SOURCE} -D_PA_RISC2_0 -D__LP64__"
+
+#if 0 /* possibly needs some work, but I think this is OK now */
+/* If defined, this macro specifies a table of register pairs used to
+   eliminate unneeded registers that point into the stack frame.  */
+
+#define ELIMINABLE_REGS							\
+{									\
+  {FRAME_POINTER_REGNUM, STACK_POINTER_REGNUM},				\
+  {ARG_POINTER_REGNUM,	 STACK_POINTER_REGNUM},				\
+  {ARG_POINTER_REGNUM,	 FRAME_POINTER_REGNUM},				\
+}
+
+/* A C expression that returns non-zero if the compiler is allowed to try to
+   replace register number FROM with register number TO.  The frame pointer
+   is automatically handled.  */
+
+#define CAN_ELIMINATE(FROM, TO) \
+  ((FROM) != ARG_POINTER_REGNUM					\
+   || ! (current_function_varargs || current_function_stdarg))
+
+/* This macro is similar to `INITIAL_FRAME_POINTER_OFFSET'.  It
+   specifies the initial difference between the specified pair of
+   registers, immediately after the function prologue.  This macro
+   must be defined if `ELIMINABLE_REGS' is defined.  */
+#define INITIAL_ELIMINATION_OFFSET(FROM, TO, OFFSET) \
+  do								\
+    {								\
+      int fsize;						\
+								\
+      fsize = compute_frame_size (get_frame_size (), 0);	\
+      if ((TO) == FRAME_POINTER_REGNUM				\
+	  && (FROM) == ARG_POINTER_REGNUM)			\
+	{							\
+	  (OFFSET) = 48 - current_function_args_size;		\
+	  break;						\
+	}							\
+								\
+      if ((TO) != STACK_POINTER_REGNUM)				\
+	abort ();						\
+								\
+      switch (FROM)						\
+	{							\
+	case FRAME_POINTER_REGNUM:				\
+	  (OFFSET) = -fsize;					\
+	  break;						\
+								\
+	case ARG_POINTER_REGNUM:				\
+	  (OFFSET) = -fsize + 48 - current_function_args_size;	\
+	  break;						\
+								\
+	default:						\
+	  abort ();						\
+	}							\
+    } while (0)
+#endif
diff -urpN -xCVS gnu_gcc/gcc/config/pa/pa64-regs.h gcc/gcc/config/pa/pa64-regs.h
--- gnu_gcc/gcc/config/pa/pa64-regs.h	Mon Jan 22 03:21:00 2001
+++ gcc/gcc/config/pa/pa64-regs.h	Thu Mar 15 21:54:16 2001
@@ -176,6 +176,11 @@ Boston, MA 02111-1307, USA.  */
   ((REGNO) <= 31 ? (REGNO) :						\
    ((REGNO) > 31 && (REGNO) <= 60 ? (REGNO - 32) * 2 + 72 : 32))

+/* We must not use the DBX register numbers for the DWARF 2 CFA column
+   numbers because that maps to numbers beyond FIRST_PSEUDO_REGISTER.
+   Instead use the identity mapping.  */
+#define DWARF_FRAME_REGNUM(REG) REG
+
 /* Define the classes of registers for register constraints in the
    machine description.  Also define ranges of constants.

diff -urpN -xCVS gnu_gcc/gcc/config/pa/t-linux gcc/gcc/config/pa/t-linux
--- gnu_gcc/gcc/config/pa/t-linux	Fri Jul  7 17:59:13 2000
+++ gcc/gcc/config/pa/t-linux	Wed May 16 19:56:57 2001
@@ -1 +1,30 @@
+LIBGCC1=libgcc1-asm.a
+CROSS_LIBGCC1=libgcc1-asm.a
+
+#Plug millicode routines into libgcc.a  We want these on both native and
+#cross compiles.
+
+LIB1ASMFUNCS =  _divI _divU _remI _remU _multiply \
+	_divI_15 _divI_14 _divI_12 _divI_10 _divI_9 \
+	_divI_7 _divI_6 _divI_5 _divI_3 \
+	_divU_15 _divU_14 _divU_12 _divU_10 _divU_9 \
+	_divU_7 _divU_6 _divU_5 _divU_3 _dyncall
+
+LIB1ASMSRC = pa/milli32.S
+
+# Don't build a shared libgcc_s.so.  Our libgcc contains millicode, and
+# the ABI (linker and dynamic linker really) does not allow millicode
+# to be exported from shared libraries.  Consequently, to successfully
+# link against libgcc_s.so it is necessary to link against _both_
+# libgcc_s.so and libgcc.a.  This is a pain.  It's easier just to disable
+# the shared libgcc.
+SHLIB_LINK =
+
+# Compile crtbeginS.o and crtendS.o as PIC.
+CRTSTUFF_T_CFLAGS_S = -fPIC
+
+# Compile libgcc2.a as PIC.
+# This is also used when compiling libgcc1 if libgcc1 is the asm variety.
+TARGET_LIBGCC2_CFLAGS = -fPIC -DELF=1 -DLINUX=1
+
 ADA_CFLAGS=-mdisable-indexing
diff -urpN -xCVS gnu_gcc/gcc/config/pa/t-linux64 gcc/gcc/config/pa/t-linux64
--- gnu_gcc/gcc/config/pa/t-linux64	Wed Dec 31 17:00:00 1969
+++ gcc/gcc/config/pa/t-linux64	Wed May 16 19:56:57 2001
@@ -0,0 +1,25 @@
+LIBGCC1=libgcc1-asm.a
+CROSS_LIBGCC1=libgcc1-asm.a
+
+#Plug millicode routines into libgcc.a  We want these on both native and
+#cross compiles.
+
+LIB1ASMFUNCS =  _divI _divU _remI _remU _div_const _mulI
+
+LIB1ASMSRC = pa/milli64.S
+
+# Don't build a shared libgcc_s.so.  Our libgcc contains millicode, and
+# the ABI (linker and dynamic linker really) does not allow millicode
+# to be exported from shared libraries.  Consequently, to successfully
+# link against libgcc_s.so it is necessary to link against _both_
+# libgcc_s.so and libgcc.a.  This is a pain.  It's easier just to disable
+# the shared libgcc.
+SHLIB_LINK =
+
+# Compile crtbeginS.o and crtendS.o as PIC.
+# Actually, hppa64 is always PIC but adding -fPIC does no harm.
+CRTSTUFF_T_CFLAGS_S = -fPIC
+
+# Compile libgcc2.a as PIC.
+# This is also used when compiling libgcc1 if libgcc1 is the asm variety.
+TARGET_LIBGCC2_CFLAGS = -fPIC -Dpa64=1 -DELF=1
diff -urpN -xCVS gnu_gcc/gcc/config/pa/t-pa64 gcc/gcc/config/pa/t-pa64
--- gnu_gcc/gcc/config/pa/t-pa64	Fri Jul  7 17:59:17 2000
+++ gcc/gcc/config/pa/t-pa64	Mon Feb 19 06:54:42 2001
@@ -1,9 +1,12 @@
 LIBGCC1=libgcc1.null
-CROSS_LIBGCC1=libgcc1.null
+CROSS_LIBGCC1=libgcc1-asm.a
+
+LIB1ASMFUNCS =  _divI _divU _remI _remU _div_const
+LIB1ASMSRC = pa/milli64.S
+TARGET_LIBGCC2_CFLAGS = -fPIC -Dpa64=1 -DELF=1
+
 ADA_CFLAGS=-mdisable-indexing
 LIB2FUNCS_EXTRA=quadlib.c
-
-TARGET_LIBGCC2_CFLAGS = -fPIC

 # We'll need this once .init sections are enabled on PA64.
 #EXTRA_PARTS = crtbegin.o crtend.o
diff -urpN -xCVS gnu_gcc/gcc/config/pa/xm-linux64.h gcc/gcc/config/pa/xm-linux64.h
--- gnu_gcc/gcc/config/pa/xm-linux64.h	Wed Dec 31 17:00:00 1969
+++ gcc/gcc/config/pa/xm-linux64.h	Thu Mar 22 21:24:06 2001
@@ -0,0 +1,43 @@
+/* Configuration for GNU C-compiler for PA-RISC.
+   Copyright (C) 1988, 1995 Free Software Foundation, Inc.
+
+This file is part of GNU CC.
+
+GNU CC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+GNU CC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU CC; see the file COPYING.  If not, write to
+the Free Software Foundation, 59 Temple Place - Suite 330,
+Boston, MA 02111-1307, USA.  */
+
+
+extern int errno;
+
+/* This describes the machine the compiler is hosted on.  */
+#define HOST_BITS_PER_CHAR 8
+#define HOST_BITS_PER_SHORT 16
+#define HOST_BITS_PER_INT 32
+#define HOST_BITS_PER_LONG 64
+#define HOST_BITS_PER_LONGLONG 64
+
+/* Doubles are stored in memory with the high order word first.  This
+   matters when cross-compiling.  */
+#define HOST_WORDS_BIG_ENDIAN 1
+
+/* target machine dependencies.
+   tm.h is a symbolic link to the actual target specific file.   */
+#include "tm.h"
+
+/* Arguments to use with `exit'.  */
+#define SUCCESS_EXIT_CODE 0
+#define FATAL_EXIT_CODE 33
+
+#include <xm-linux.h>
diff -urpN -xCVS gnu_gcc/gcc/config.gcc gcc/gcc/config.gcc
--- gnu_gcc/gcc/config.gcc	Mon Sep  3 02:50:56 2001
+++ gcc/gcc/config.gcc	Mon Sep  3 03:43:23 2001
@@ -222,7 +222,7 @@ c*-convex-*)
 i[34567]86-*-*)
 	cpu_type=i386
 	;;
-hppa*-*-*)
+hppa*-*-* | parisc*-*-*)
 	cpu_type=pa
 	;;
 m68000-*-*)
@@ -695,12 +695,24 @@ h8300-*-rtems*)
 h8300-*-*)
 	float_format=i32
 	;;
-hppa*-*-linux*)
-	target_cpu_default="(MASK_PA_11 | MASK_GAS | MASK_JUMP_IN_DELAY)"
-	tm_file="${tm_file} pa/elf.h linux.h pa/pa-linux.h"
-	tmake_file="t-slibgcc-elf-ver t-linux pa/t-linux"
-	extra_parts="crtbegin.o crtend.o"
-	xmake_file=none
+hppa*64*-*-linux* | parisc*64*-*-linux*)
+	target_cpu_default="(MASK_PA_11 | MASK_PA_20)"
+	tm_file="pa/pa64-start.h ${tm_file} linux.h pa/pa-linux.h \
+		 pa/pa64-regs.h pa/pa-64.h pa/pa64-linux.h"
+	xm_file=pa/xm-linux64.h
+	tmake_file=pa/t-linux64
+	xmake_file=x-linux
+	extra_parts="crtbegin.o crtbeginS.o crtend.o crtendS.o"
+	gas=yes gnu_ld=yes
+	;;
+hppa*-*-linux* | parisc*-*-linux*)
+	target_cpu_default="MASK_PA_11"
+	tm_file="${tm_file} linux.h pa/pa-linux.h \
+		 pa/pa32-regs.h pa/pa32-linux.h"
+	xm_file=pa/xm-linux.h
+	tmake_file=pa/t-linux
+	xmake_file=x-linux
+	extra_parts="crtbegin.o crtbeginS.o crtend.o crtendS.o"
 	gas=yes gnu_ld=yes
 	if test x$enable_threads = xyes; then
 		thread_file='posix'
@@ -752,10 +764,6 @@ hppa1.0-*-hpux7*)
 	tm_file="pa/pa-oldas.h ${tm_file} pa/pa32-regs.h pa/som.h pa/pa-hpux7.h"
 	xm_file=pa/xm-pahpux.h
 	xmake_file=pa/x-pa-hpux
-	if test x$gas = xyes
-	then
-		tm_file="${tm_file} pa/gas.h"
-	fi
 	install_headers_dir=install-headers-cpio
 	use_collect2=yes
 	;;
@@ -763,10 +771,8 @@ hppa1.0-*-hpux8.0[0-2]*)
 	tm_file="${tm_file} pa/pa32-regs.h pa/som.h pa/pa-hpux.h"
 	xm_file=pa/xm-pahpux.h
 	xmake_file=pa/x-pa-hpux
-	if test x$gas = xyes
+	if test x$gas != xyes
 	then
-		tm_file="${tm_file} pa/pa-gas.h"
-	else
 		tm_file="pa/pa-oldas.h ${tm_file}"
 	fi
 	install_headers_dir=install-headers-cpio
@@ -777,10 +783,8 @@ hppa1.1-*-hpux8.0[0-2]*)
 	tm_file="${tm_file} pa/pa32-regs.h pa/som.h pa/pa-hpux.h"
 	xm_file=pa/xm-pahpux.h
 	xmake_file=pa/x-pa-hpux
-	if test x$gas = xyes
+	if test x$gas != xyes
 	then
-		tm_file="${tm_file} pa/pa-gas.h"
-	else
 		tm_file="pa/pa-oldas.h ${tm_file}"
 	fi
 	install_headers_dir=install-headers-cpio
@@ -791,10 +795,6 @@ hppa1.1-*-hpux8*)
 	tm_file="${tm_file} pa/pa32-regs.h pa/som.h pa/pa-hpux.h"
 	xm_file=pa/xm-pahpux.h
 	xmake_file=pa/x-pa-hpux
-	if test x$gas = xyes
-	then
-		tm_file="${tm_file} pa/pa-gas.h"
-	fi
 	install_headers_dir=install-headers-cpio
 	use_collect2=yes
 	;;
@@ -802,10 +802,6 @@ hppa1.0-*-hpux8*)
 	tm_file="${tm_file} pa/pa32-regs.h pa/som.h pa/pa-hpux.h"
 	xm_file=pa/xm-pahpux.h
 	xmake_file=pa/x-pa-hpux
-	if test x$gas = xyes
-	then
-		tm_file="${tm_file} pa/pa-gas.h"
-	fi
 	install_headers_dir=install-headers-cpio
 	use_collect2=yes
 	;;
@@ -816,10 +812,6 @@ hppa1.1-*-hpux10* | hppa2*-*-hpux10*)
 	xm_file=pa/xm-pahpux.h
 	xmake_file=pa/x-pa-hpux
 	tmake_file=pa/t-pa
-	if test x$gas = xyes
-	then
-		tm_file="${tm_file} pa/pa-gas.h"
-	fi
 	if test x$enable_threads = x; then
 	    enable_threads=$have_pthread_h
 	fi
@@ -837,10 +829,6 @@ hppa1.0-*-hpux10*)
 	xm_file=pa/xm-pahpux.h
 	xmake_file=pa/x-pa-hpux
 	tmake_file=pa/t-pa
-	if test x$gas = xyes
-	then
-		tm_file="${tm_file} pa/pa-gas.h"
-	fi
 	if test x$enable_threads = x; then
 	    enable_threads=$have_pthread_h
 	fi
@@ -856,15 +844,10 @@ hppa*64*-*-hpux11*)
 	xm_file=pa/xm-pa64hpux.h
 	xmake_file=pa/x-pa-hpux
 	tmake_file=pa/t-pa
-	tm_file="pa/pa64-start.h ${tm_file} pa/pa64-regs.h pa/long_double.h pa/elf.h pa/pa-hpux.h pa/pa-hpux11.h pa/pa-64.h"
+	tm_file="pa/pa64-start.h ${tm_file} pa/pa64-regs.h pa/long_double.h pa/elf.h pa/pa-hpux.h pa/pa-hpux11.h pa/pa-64.h pa/pa64-hpux.h"
 	float_format=i128
 	tmake_file=pa/t-pa64
 	target_cpu_default="(MASK_PA_11|MASK_PA_20|MASK_GAS)"
-
-	if [ x$gas = xyes ]
-	then
-		tm_file="${tm_file} pa/pa-gas.h"
-	fi
 #	if [ x$enable_threads = x ]; then
 #	    enable_threads=$have_pthread_h
 #	fi
@@ -882,10 +865,6 @@ hppa1.1-*-hpux11* | hppa2*-*-hpux11*)
 	xm_file=pa/xm-pahpux.h
 	xmake_file=pa/x-pa-hpux
 	tmake_file=pa/t-pa
-	if test x$gas = xyes
-	then
-		tm_file="${tm_file} pa/pa-gas.h"
-	fi
 #	if test x$enable_threads = x; then
 #	    enable_threads=$have_pthread_h
 #	fi
@@ -901,10 +880,6 @@ hppa1.0-*-hpux11*)
 	float_format=i128
 	xm_file=pa/xm-pahpux.h
 	xmake_file=pa/x-pa-hpux
-	if test x$gas = xyes
-	then
-		tm_file="${tm_file} pa/pa-gas.h"
-	fi
 #	if test x$enable_threads = x; then
 #	    enable_threads=$have_pthread_h
 #	fi
@@ -920,10 +895,6 @@ hppa1.1-*-hpux* | hppa2*-*-hpux*)
 	tm_file="${tm_file} pa/pa32-regs.h pa/som.h pa/pa-hpux.h pa/pa-hpux9.h"
 	xm_file=pa/xm-pahpux.h
 	xmake_file=pa/x-pa-hpux
-	if test x$gas = xyes
-	then
-		tm_file="${tm_file} pa/pa-gas.h"
-	fi
 	install_headers_dir=install-headers-cpio
 	use_collect2=yes
 	;;
@@ -931,10 +902,6 @@ hppa1.0-*-hpux*)
 	tm_file="${tm_file} pa/pa32-regs.h pa/som.h pa/pa-hpux.h pa/pa-hpux9.h"
 	xm_file=pa/xm-pahpux.h
 	xmake_file=pa/x-pa-hpux
-	if test x$gas = xyes
-	then
-		tm_file="${tm_file} pa/pa-gas.h"
-	fi
 	install_headers_dir=install-headers-cpio
 	use_collect2=yes
 	;;
@@ -943,10 +910,6 @@ hppa1.1-*-hiux* | hppa2*-*-hiux*)
 	tm_file="${tm_file} pa/pa32-regs.h pa/som.h pa/pa-hpux.h pa/pa-hiux.h"
 	xm_file=pa/xm-pahpux.h
 	xmake_file=pa/x-pa-hpux
-	if test x$gas = xyes
-	then
-		tm_file="${tm_file} pa/pa-gas.h"
-	fi
 	install_headers_dir=install-headers-cpio
 	use_collect2=yes
 	;;
@@ -954,10 +917,6 @@ hppa1.0-*-hiux*)
 	tm_file="${tm_file} pa/pa32-regs.h pa/som.h pa/pa-hpux.h pa/pa-hiux.h"
 	xm_file=pa/xm-pahpux.h
 	xmake_file=pa/x-pa-hpux
-	if test x$gas = xyes
-	then
-		tm_file="${tm_file} pa/pa-gas.h"
-	fi
 	install_headers_dir=install-headers-cpio
 	use_collect2=yes
 	;;
@@ -970,7 +929,8 @@ hppa*-*-mpeix*)
 	tm_file="${tm_file} pa/pa32-regs.h pa/long_double.h pa/som.h pa/pa-mpeix.h"
 	xm_file=pa/xm-pampeix.h
 	xmake_file=pa/x-pa-mpeix
-	echo "You must use gas. Assuming it is already installed."
+	echo "You must use gas. Assuming it is already installed."
+	gas=yes
 	install_headers_dir=install-headers-tar
 	use_collect2=yes
 	;;
@@ -3524,6 +3484,13 @@ arm*-*-*)
 			fi
 			;;
 	esac
+	;;
+
+hppa*-*-* | parisc*-*-*)
+	if test x$gas = xyes
+	then
+		target_cpu_default2="MASK_GAS|MASK_JUMP_IN_DELAY"
+	fi
 	;;

 mips*-*-ecoff* | mips*-*-elf*)


diff -urpN -xCVS gnu_gcc/gcc/configure.in gcc/gcc/configure.in
--- gnu_gcc/gcc/configure.in	Mon Sep  3 02:50:58 2001
+++ gcc/gcc/configure.in	Mon Sep  3 03:43:23 2001
@@ -1569,7 +1569,8 @@ gcc_cv_as_dwarf2_debug_line=no
 # ??? Once 2.11 is released, probably need to add first known working
 # version to the per-target configury.
 case "$target" in
-  i?86*-*-* | mips*-*-* | alpha*-*-* | powerpc*-*-* | sparc*-*-* | m68*-*-*)
+  i?86*-*-* | mips*-*-* | alpha*-*-* | powerpc*-*-* | sparc*-*-* | m68*-*-* \
+	    | hppa*-*-* | parisc*-*-*)
     insn="nop"
     ;;
   ia64*-*-*)
diff -urpN -xCVS gnu_gcc/libstdc++-v3/config/cpu/hppa/bits/atomicity.h gcc/libstdc++-v3/config/cpu/hppa/bits/atomicity.h
--- gnu_gcc/libstdc++-v3/config/cpu/hppa/bits/atomicity.h	Wed Dec 31 17:00:00 1969
+++ gcc/libstdc++-v3/config/cpu/hppa/bits/atomicity.h	Wed May 30 09:16:49 2001
@@ -0,0 +1,127 @@
+/* Low-level functions for atomic operations.  PA-RISC version. -*- C++ -*-
+   Copyright 2001 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Library General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Library General Public License for more details.
+
+   You should have received a copy of the GNU Library General Public
+   License along with the GNU C Library; see the file COPYING.LIB.  If not,
+   write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+   Boston, MA 02111-1307, USA.  */
+
+#ifndef _BITS_ATOMICITY_H
+#define _BITS_ATOMICITY_H	1
+
+/* Define this to 1 when everyone is using a sensible glibc.  */
+#undef MALLOC_ALIGNS_TO_16BYTE
+
+/* Load and clear, the only PA-RISC atomic read-write operation.  Some
+   cpus only support ldcw on 16 byte aligned words.  *sigh*.  */
+static inline int
+__pa_ldcw (volatile int* lock)
+{
+  int ret;
+  __asm__ __volatile__ ("ldcw 0(%1),%0" : "=r" (ret) : "r" (lock));
+  return ret;
+}
+
+static inline int
+__pa_spin_lock (volatile int* lock)
+{
+  int ret;
+  while ((ret = __pa_ldcw (lock)) == 0)
+    /* If we don't grab the lock, don't clobber the bus with ldcw.  */
+    while (*lock == 0) /* spin */ ;
+  return ret;
+}
+
+
+class _Atomic_counter {
+ public:
+  int lock;
+  int count;
+#if ! MALLOC_ALIGNS_TO_16BYTE
+  /* Two locks???  Well, malloc allocates to 8 byte boundaries, and
+     thus either lock or alt_lock will be 16 byte aligned.  Use
+     whichever one gets the right alignment.  */
+  int alt_lock;
+#endif
+
+  inline
+  _Atomic_counter()
+  {
+    lock = 1;
+#if ! MALLOC_ALIGNS_TO_16BYTE
+    alt_lock = 1;
+#endif
+  }
+
+  inline int
+  operator=(const int __val) { return this->count = __val; }
+
+  inline bool
+  operator<(const int __rhs) const
+    { return this->count < __rhs; }
+
+  inline bool
+  operator>(const int __rhs) const
+    { return this->count > __rhs; }
+};
+
+typedef class _Atomic_counter _Atomic_word;
+
+/* This is horrible, but how else can we get the static initializer
+   correct with our lock(s) set non-zero? */
+#if MALLOC_ALIGNS_TO_16BYTE
+#define _S_EMPTY_REP_INIT = { 0, 0, 1, 0, 0 }
+#else
+#define _S_EMPTY_REP_INIT = { 0, 0, 1, 0, 1, 0 }
+#endif
+
+
+static inline int
+__attribute__ ((__unused__))
+__exchange_and_add (_Atomic_word* mem, int val)
+{
+  int result;
+  int tmp;
+#if MALLOC_ALIGNS_TO_16BYTE
+  int *lock = &mem->lock;
+#else
+  int *lock = (int *) ((long) &mem->alt_lock & ~15);
+#endif
+
+  tmp = __pa_spin_lock (lock);
+  result = mem->count;
+  mem->count += val;
+  __asm__ __volatile__("");
+  *lock = tmp;
+  return result;
+}
+
+static inline void
+__attribute__ ((__unused__))
+__atomic_add (_Atomic_word* mem, int val)
+{
+  int tmp;
+#if MALLOC_ALIGNS_TO_16BYTE
+  int *lock = &mem->lock;
+#else
+  int *lock = (int *) ((long) &mem->alt_lock & ~15);
+#endif
+
+  tmp = __pa_spin_lock (lock);
+  mem->count += val;
+  __asm__ __volatile__("");
+  *lock = tmp;
+}
+
+#endif
diff -urpN -xCVS gnu_gcc/libstdc++-v3/configure.target gcc/libstdc++-v3/configure.target
--- gnu_gcc/libstdc++-v3/configure.target	Mon Sep  3 02:53:26 2001
+++ gcc/libstdc++-v3/configure.target	Mon Sep  3 03:45:10 2001
@@ -30,6 +30,9 @@ case "${target_cpu}" in
   arm*)
     cpu_include_dir="config/cpu/arm"
     ;;
+  hppa*)
+    cpu_include_dir="config/cpu/hppa"
+    ;;
   ia64)
     cpu_include_dir="config/cpu/ia64"
     ;;
diff -urpN -xCVS gnu_gcc/libstdc++-v3/include/bits/basic_string.h gcc/libstdc++-v3/include/bits/basic_string.h
--- gnu_gcc/libstdc++-v3/include/bits/basic_string.h	Mon Sep  3 02:53:29 2001
+++ gcc/libstdc++-v3/include/bits/basic_string.h	Mon Sep  3 03:45:27 2001
@@ -139,7 +139,7 @@ namespace std
 	size_type 		_M_length;
 	size_type 		_M_capacity;
 	_Atomic_word		_M_references;
-
+
         bool
 	_M_is_leaked() const
         { return _M_references < 0; }
@@ -154,7 +154,7 @@ namespace std

         void
 	_M_set_sharable()
-        { _M_references = 0; }
+	{ _M_references = 0; }

 	_CharT*
 	_M_refdata() throw()
@@ -231,7 +231,10 @@ namespace std

       // The following storage is init'd to 0 by the linker, resulting
       // (carefully) in an empty string with one reference.
-      static size_type _S_empty_rep_storage[(sizeof(_Rep) + sizeof(_CharT) + sizeof(size_type) - 1)/sizeof(size_type)];
+      static size_type _S_empty_rep_storage[
+	(sizeof(_Rep) + sizeof(_CharT)
+	 + sizeof(size_type) - 1)/sizeof(size_type)]
+      __attribute__ ((__aligned__ (16)));

       _CharT*
       _M_data() const
diff -urpN -xCVS gnu_gcc/libstdc++-v3/include/bits/basic_string.tcc gcc/libstdc++-v3/include/bits/basic_string.tcc
--- gnu_gcc/libstdc++-v3/include/bits/basic_string.tcc	Mon Sep  3 02:53:31 2001
+++ gcc/libstdc++-v3/include/bits/basic_string.tcc	Mon Sep  3 03:45:27 2001
@@ -61,7 +61,12 @@ namespace std
   template<typename _CharT, typename _Traits, typename _Alloc>
     typename basic_string<_CharT, _Traits, _Alloc>::size_type
     basic_string<_CharT, _Traits, _Alloc>::_S_empty_rep_storage[
-    (sizeof(_Rep) + sizeof(_CharT) + sizeof(size_type) - 1)/sizeof(size_type)];
+    (sizeof(_Rep) + sizeof(_CharT) + sizeof(size_type) - 1)/sizeof(size_type)]
+    __attribute__ ((__aligned__ (16)))
+#ifdef _S_EMPTY_REP_INIT
+    _S_EMPTY_REP_INIT
+#endif
+  ;

   // NB: This is the special case for Input Iterators, used in
   // istreambuf_iterators, etc.
