/*************************************************************************
  Uniq        Removes duplicate lines, even if non consecutive

  Author:     SemWare

  Date:       Feb  7, 1997  (SEM) initial version
              Mar  4, 1997  (SEM) added ability to limit text comparison
                            to text within a column block.
              May  8, 1998  (SEM) Save/restore block marking status
              Mar 17, 1999  (SEM) Fix bug in column block handling
              Feb 18, 2015  (SEM) Added delete parent option
              Mar 02, 2020  (SEM) Bug: delete parent not working. Fixed.
                                  Thanks to Fred Olson for the report.

  Overview:

  This macro will remove duplicate lines from a file or block, starting at the
  current position or at the beginning of the file or block.  The duplicated
  lines need not occur consecutively.  The relative order of the file is left
  unchanged.

  To run this macro, select it from the Potpourri, or run it by name
  via the ExecMacro command (default: <ctrl x>)

  Usage notes:

  If the cursor is within a marked block, the macro prompts to confirm using
  the block vs. the entire file.

  If the cursor is not at the beginning of the file/block, the macro prompts
  whether to start at the current position or at the beginning of the
  file/block.

  If a column block is marked, then optionally the comparison can be limited
  to text within the column block.

  Limitations:

  If the 'duplicate text in column blocks only' option is used, the column
  block width must be less than MAXFINDLEN (see below) characters.

  Copyright 1992-1999 SemWare Corporation.  All Rights Reserved Worldwide.

  Use, modification, and distribution of this SAL macro is encouraged by
  SemWare provided that this statement, including the above copyright
  notice, is not removed; and provided that no fee or other remuneration
  is received for distribution.  You may add your own copyright notice
  to cover new matter you add to the macro, but SemWare Corporation will
  neither support nor assume legal responsibility for any material added
  or any changes made to the macro.

*************************************************************************/

constant MAXSTRLEN = 255

constant MAXFINDLEN = 255

constant MAX_COLUMN_WIDTH = MAXFINDLEN

integer proc HowMany(integer lines_to_check, integer use_block)
    integer lines_left

    lines_left = iif(use_block, Query(BlockEndLine), NumLines()) - CurrLine()
    return (100 - (lines_left * 100 / lines_to_check))
end

menu StartWhereMenu()
    "&Start at Beginning of File/Block"
    "&Work from the Current Line Onwards"
    "&Cancel"
end

integer proc StartHere(integer use_block)
    if (use_block and CurrLine() > Query(BlockBegLine)) or
        (CurrLine() > 1)
        case StartWhereMenu("Uniq")
            when 0, 3
                return (-1)
            when 1
                return (FALSE)
            when 2
        endcase
    endif
    return (TRUE)
end

integer proc LimitToColumns()
    return (MsgBox("Uniq", "Limit comparison to text in column block?", _YES_NO_CANCEL_))
end

integer proc LimitToBlock()
    return (MsgBox("Uniq", "Limit operation to lines in currently marked block?", _YES_NO_CANCEL_))
end

integer proc UseBlock(var integer limit_to_columns)
    integer result, blocktype

    blocktype = isCurrLineInBlock()
    limit_to_columns = FALSE
    if blocktype
        if blocktype == _COLUMN_ and Query(BlockEndCol) - Query(BlockBegCol) < MAXFINDLEN
            if LimitToColumns()
                limit_to_columns = TRUE
                return (TRUE)
            endif
        endif
        result = LimitToBlock()
        case result
            when 0
                return (-1)
            when 1
                return (TRUE)
        endcase
    endif
    return (FALSE)
end

// first MAXFINDLEN chars match, len > 255
integer proc ThisLineEqual(integer line1)
    integer len_left, line2, p
    string s2[255]

    line2 = CurrLine()
    len_left = CurrLineLen() - MAXFINDLEN
    p = MAXFINDLEN + 1

    repeat
        s2 = GetText(p, len_left)
        GotoLine(line1)
        if s2 <> GetText(p, sizeof(s2))
            GotoLine(line2)
            return (FALSE)
        endif

        GotoLine(line2)

        p = p + Length(s2)
        len_left = len_left - Length(s2)
    until len_left == 0
    return (TRUE)
end

/**************************************************************************
  If pos_col1 is non-zero,
      we are only checking text inside a column block.
      also, we can assume that text width <= MAXFINDLEN
  Else
      text width can be > MAXFINDLEN

 **************************************************************************/
integer proc Match(string s1, string fopts, integer line1, integer len1, integer pos_col1)
    integer tlen

    // Exception!!! empty lines
    if Length(s1) == 0
        if pos_col1 == 0
            return (lFind("^$", "x"))
        endif

        if CurrPos() > pos_col1 and not Down()
            return (FALSE)
        endif

        while isCurrLineInBlock()
            if CurrLineLen() < pos_col1
                return (TRUE)
            endif
            if not Down()
                break
            endif
        endwhile
        return (FALSE)
    elseif len1 <= MAXFINDLEN
        return (lFind(s1, fopts + "^$"))
    elseif not lFind(s1, fopts + "^")
        return (FALSE)
    elseif CurrLineLen() <= MAXSTRLEN
        tlen = CurrLineLen() - MAXFINDLEN
        return (GetText(MAXFINDLEN + 1, tlen) == s1[MAXFINDLEN + 1: tlen])
    else
        return (ThisLineEqual(line1))
    endif
    return (FALSE)      // to satisfy overly picky compiler
end

proc show_status(integer lines_to_check, integer use_block, var integer tm)
    if GetTime() - tm >= 100
        tm = GetTime()
        if KeyPressed()
            GetKey()
        endif
        Message(HowMany(lines_to_check, use_block), '% complete')
    endif
end

proc main()
    integer use_block, limit_to_columns, line1, len1, lines_to_check, start_here, count,
        col1, col2, pos_col1, marking, delete_parent, line_deleted, tm
    string fopts[12], s1[255]

    Set(Break, ON)

    use_block = UseBlock(limit_to_columns)
    if use_block == -1
        return ()
    endif

    start_here = StartHere(use_block)
    if start_here == -1
        return ()
    endif

    PushKey(<CursorRight>)
    case MsgBox("Uniq", "Delete the parent also?", _YES_NO_CANCEL_)
        when 0 return ()
        when 1 delete_parent = true
        when 2 delete_parent = false
    endcase

    if not limit_to_columns
        col1 = 1
        pos_col1 = 0
    else
        col1 = Query(BlockBegCol)
        col2 = Query(BlockEndCol)
        len1 = col2 - col1 + 1

        if len1 > MAXFINDLEN
            Warn("Column width of ", len1, " is greater than max supported by this macro (", MAXFINDLEN, ")")
            Halt
        endif

        PushPosition()
        GotoColumn(col1)
        pos_col1 = CurrPos()
        PopPosition()
    endif

    PushPosition()
    PushBlock()
    marking = Set(Marking, OFF)

    if not start_here
        if use_block
            GotoBlockBegin()
        else
            BegFile()
        endif
    endif

    fopts = "^"
    if use_block
        fopts = fopts + "l"
    endif

    lines_to_check = iif(use_block, Query(BlockEndLine), NumLines()) - CurrLine()
    count = 0

    tm = GetTime()
    repeat
        PushPosition()

        if limit_to_columns
            PushBlock()
            MarkColumn(CurrLine(), col1, CurrLine(), col2)
            s1 = GetMarkedText()
            PopBlock()
        else
            s1 = GetText(1, MAXSTRLEN)
            line1 = CurrLine()
            len1 = CurrLineLen()
        endif

        // Display percentage left
        show_status(lines_to_check, use_block, tm)
        GotoColumn(col1 + 1)                   // in order to skip the first line
        line_deleted = false
        while Match(s1, fopts, line1, len1, pos_col1) and (use_block == FALSE or isCurrLineInBlock())
            KillLine()
            line_deleted = true
            count = count + 1
            show_status(lines_to_check, use_block, tm)
            GotoColumn(col1)            // look for match starting on curr line
        endwhile

        PopPosition()

        if delete_parent and line_deleted
            KillLine()
        elseif not Down()
            break
        endif
    until (use_block and not isCurrLineInBlock())

    Message(count, " duplicate line(s) deleted")
    PopPosition()
    PopBlock()
    Set(Marking, marking)
end
