Logo Search packages:      
Sourcecode: wapiti version File versions  Download package

def lswww::lswww::correctlink (   self,
  lien,
  current,
  currentdir,
  proto 
)

Transform relatives urls in absolutes ones

Definition at line 256 of file lswww.py.

00256                                                          :
            """Transform relatives urls in absolutes ones"""
            # No leading or trailing whitespaces
            lien=lien.strip()
            # bad protocols
            if lien.find("telnet:",0)==0 or lien.find("ftp:",0)==0 or lien.find("mailto:",0)==0 or \
            lien.find("javascript:",0)==0 or lien.find("news:",0)==0 or lien.find("file:",0)==0 or \
            lien.find("gopher:",0)==0 or lien.find("irc:",0)==0 or lien=="":
                  return None
            # Good protocols or relatives links
            else:
                  # full url, nothing to do :)
                  if (lien.find("http://",0)==0) or (lien.find("https://",0)==0):
                        pass
                  else:
                        # root-url related link
                        if(lien[0]=='/'):
                              lien=proto+"://"+self.server+lien
                        else:
                              # same page + query string
                              if(lien[0]=='?'):
                                    lien=current+lien
                              # current directory related link
                              else:
                                    lien=currentdir+lien
                  # No destination anchor
                  if lien.find("#")!=-1:
                        lien=lien.split("#")[0]
                  # reorganize parameters in alphabetical order
                  if lien.find("?") != -1:
                        args=lien.split("?")[1]
                        if args.find("&") != -1 :
                              args=args.split("&")
                              args.sort()
                              args=[i for i in args if i!="" and i.find("=")>=0]
                              for i in self.bad_params:
                                    for j in args:
                                          if j.startswith(i+"="): args.remove(j)
                              args="&".join(args)

                        # a hack for auto-generated Apache directory index
                        if args in ["C=D;O=A","C=D;O=D","C=M;O=A","C=M;O=D","C=N;O=A","C=N;O=D","C=S;O=A","C=S;O=D"]:
                              lien=lien.split("?")[0]
                        else:
                              lien=lien.split("?")[0]+"?"+args
                  # Remove the trailing '?' if its presence doesn't make sense
                  if lien[-1:]=="?":
                        lien=lien[:-1]
                  # remove useless slashes
                  if lien.find("?")!=-1:
                        file=lien.split("?")[0]
                        file=re.sub("[^:]//+","/",file)
                        lien=file+"?"+lien.split("?")[1]
                  # links going to a parrent directory (..)
                  while re.search("/([~:!,;a-zA-Z0-9\.\-+_]+)/\.\./",lien)!=None:
                        lien=re.sub("/([~:!,;a-zA-Z0-9\.\-+_]+)/\.\./","/",lien)
                  lien=re.sub("/\./","/",lien)
                  # Everything is good here
                  return lien

      def checklink(self,url):


Generated by  Doxygen 1.6.0   Back to index