User:CleanupListingBot/Source VB2

Added table posting feature. Also removed directory write for intersections...use massive array instead=D. The inefficiency of vb.net is starting to show for larger categories.Smallman12q (talk) 00:53, 1 September 2010 (UTC)

Imports DotNetWikiBot
Imports System.Collections
Imports System.IO.File
Imports System.IO



Module Module1

    'Structure articlesubcategory
    '    'declaring a structure named articlesubcategory
    '    Public article As String
    '    Public subcategory As ArrayList
    'End Structure


    Dim enWiki As Site = Nothing
    Dim catfilecounter As Integer = 0
    Dim appbase As String = AppDomain.CurrentDomain.SetupInformation.ApplicationBase()
    Dim categorytochecktitles As New ArrayList
    Dim report As String = Nothing
    Dim directoryindex() As String = Nothing
    Dim articlesarraycategories As New ArrayList  'for use with articlesubcategory
    Dim articlesarraytitles As New ArrayList  'for use with articlesubcategory

    Sub Main()
        '''''''''''''''''''''''''''''
        'Print out header
        Console.WriteLine("######################################")
        Console.WriteLine("# CleanupListingBot Version Beta 2.0 #")
        Console.WriteLine("# Public Domain August 2010          #")
        Console.WriteLine("# Written in VB.net 2.0 by Smallman  #")
        Console.WriteLine("# Requires the DotNetWikiBot library #")
        Console.WriteLine("######################################")
        Console.WriteLine()
        Console.WriteLine()
        Console.WriteLine("For the latest source code visit:")
        Console.WriteLine("http://en.wikipedia.org/wiki/User:CleanupListingBot/Source_Index")
        Console.WriteLine()
        Console.WriteLine("This is a BETA version. USE AT YOUR OWN RISK.") 'Fancy liability waiver here
        pause()
        Console.Clear()

        'My.Application.CommandLineArgs( 'Command line args

        '''''''''''''''''''''''''''''
        'Log in
        Console.WriteLine("You will be required to enter your Wikipedia username and password.")
        Console.WriteLine("They are used to log into the insecure API of Wikipedia.")

        Dim username, password As String
        Try
            Console.Write("Please enter username: ")
            username = Console.ReadLine()
            Console.Write("Please enter password: ")
            password = Console.ReadLine()
            Console.Clear()
            Console.WriteLine("Attempting to log into the english wiki as: " + username)
            Console.WriteLine("Please note that https is not used.")

            enWiki = New Site("http://en.wikipedia.org", username, password) 'Log into wikipedia
        Catch e As Exception
            Console.WriteLine("Login error: " + e.Message)
            quit()
        End Try

        Console.WriteLine("Log in successful....clearing user name and password for security...")

        'Clear username and password for security...
        username = Nothing
        password = Nothing
        Console.Clear()
        Console.WriteLine("You are now logged in.")
        ''''''''''''''''''''''''''''''''''''''''''''''''''''''

        '''''''''''''''''''''''''''''''''''''''''''''''''''''
        'Get the cleanup categories
        'Create a directory for subcategory of them
        'Copy articles from each cat to a text file
        'Load text files and compare
        Console.WriteLine("Checking for category: Wikipedia maintenance categories sorted by month")

        Dim categoryname As String = "Wikipedia maintenance categories sorted by month"
        Dim currentdirectory As String = appbase + "\" + categoryname

        If Not Directory.Exists(currentdirectory) Then

            Directory.CreateDirectory(currentdirectory)
            Console.WriteLine("Category not found locally...begin downloading...")
            getcat(categoryname, currentdirectory)
            Console.WriteLine("Done downloading cleanup...")
            pause()
        Else
            Console.WriteLine("Wikipedia maintenance directory found.")
            'Debug.WriteLine(
        End If

        '''''''''''''''''''''''''''''''''''''''''
        'Get category to check
        Console.Clear()
        Console.WriteLine("Please enter the category you wish to create a cleanup listing for.")
        Console.Write("Enter category to check: ")
        Dim categorytocheck As String = Console.ReadLine()
        If categorytocheck <> "done" Then

            'Get the category
            Dim categorytochecklist As PageList = Nothing 'As New PageList(enWiki) '
            Try
                Console.WriteLine("Loading category pages...")
                categorytochecklist = New PageList(enWiki)
                categorytochecklist.FillAllFromCategoryTree(categorytocheck) 'Get category titles
                Console.WriteLine("Category loading complete....removing non-articles.")
                categorytochecklist.FilterNamespaces({0, 1}) 'Remove non-articles , keep talk pages
                For Each one As Page In categorytochecklist
                    one.RemoveNSPrefix() 'Remove "Talk" from titles
                Next
                Console.WriteLine("Category filtering...complete.")
                Console.WriteLine("There are " + categorytochecklist.Count.ToString + " articles.")
            Catch ex As Exception
                exceptionquit("Loading category error", ex)
            End Try

            'Send the page titles to an arraylist
            'Dim categorytochecktitles As New ArrayList 'Global
            Console.WriteLine("Sending category to arraylist...")
            For Each article As Page In categorytochecklist
                categorytochecktitles.Add(article.title)
            Next
            Console.WriteLine("Arraylist complete...")
            categorytochecklist = Nothing 'Clear out pagelist
        End If

        ''''''''''''''''''''''''''''''''''''''''
        'Compare and generate cleanup lisitng
        Console.WriteLine("Will now generate Cleanup lisiting...")
        pause()

        'Load index
        Console.WriteLine("Loading directory index...")
        Dim index As New ArrayList
        'Read from file 'http://msdn.microsoft.com/en-us/library/db5x7c0d.aspx
        Try
            ' Create an instance of StreamReader to read from a file.
            ' The using statement also closes the StreamReader.
            Using sr As New StreamReader(appbase + "\Category directory.txt")
                Dim line As String
                ' Read and display lines from the file until the end of
                ' the file is reached.
                Do
                    line = sr.ReadLine()
                    If Not (line Is Nothing) Then
                        Dim parts() As String = line.Split("#") 'c, 2) 'remove the number #
                        index.Add(parts(1))
                    End If
                Loop Until line Is Nothing
            End Using
        Catch e As Exception
            exceptionquit("The index could not be read:", e)
        End Try

        directoryindex = arraylisttostring(index)
        index = Nothing 'Clear()
        Console.WriteLine("Directory index loaded: " + directoryindex.Length.ToString + " categories.")
        Console.WriteLine()
        Console.WriteLine("Intersecting categories and generating report...")
        ''''
        'Compare and Report
        'http://www.java2s.com/Code/VB/Data-Structure/ListallDirectoriesunderadirectory.htm
        report += "The following is a cleanup report generated on " + Date.UtcNow.ToString
        currentdirectory = appbase + "\Wikipedia maintenance categories sorted by month"
        intersectandreportcat(currentdirectory, 0)

        Console.WriteLine(articlesarraycategories.Count.ToString + " is length")
        Console.WriteLine("Please enter the title of where to save the report on Wikipedia.")
        Console.Write("Enter title: ")
        Dim pagename As String = Console.ReadLine()

        Console.WriteLine("Writing to Wikipedia to " + pagename)
        Dim ab As New Page(enWiki, pagename)
        'ab.Load()
        ab.text = report
        ab.Save("CleanupListingBot Report for" + categorytocheck, False)

        report = Nothing
        ab.text = Nothing

        'Make the table
        report = "{|class=""wikitable sortable""" + vbNewLine + _
        "!Article" + vbNewLine + _
        "!Cleanup count" + vbNewLine + _
        "!Categories" + vbNewLine + _
        "|-" + vbNewLine

        Dim counter As Integer = 0
        For Each setofcats As ArrayList In articlesarraycategories
            report += "|[[" + articlesarraytitles(counter).ToString + "]]" + vbNewLine
            Dim x() As String = arraylisttostring(setofcats)
            report += "|" + x.Length.ToString + vbNewLine + "|"
            For Each thing As String In x
                report += thing + ", "
            Next
            report = report.Remove(report.Length - 2, 2) ' remove last comma and space
            report += vbNewLine + "|-" + vbNewLine
            counter += 1
        Next
        report += "|}"
        Dim ac As New Page(enWiki, pagename + " (Table)")
        ac.text = report
        ac.Save("CleanupListingBot Report Table for " + categorytocheck, False)







        'Write to file
        'Dim objWriter As New System.IO.StreamWriter(appbase + "\Report1.txt", True) 'Append
        'objWriter.WriteLine(report) ' + vbNewLine)'it's appended
        'Console.WriteLine("Writing...")
        'objWriter.Close()

        Console.WriteLine("We're Done...")
        quit()

        'Logout missing?
    End Sub


    'combines intersect with report
    Sub intersectandreportcat(ByRef thecurrentdirectory As String, ByVal depth As Integer)
        Dim Root As New DirectoryInfo(thecurrentdirectory)
        Dim Dirs As DirectoryInfo() = Root.GetDirectories()

        'check this cat's articles
        Dim f As New IO.FileInfo(thecurrentdirectory + "\articles.txt")
        If (f.Exists = True) Then
            Dim pages As New ArrayList
            'Read from file 'http://msdn.microsoft.com/en-us/library/db5x7c0d.aspx
            Try
                ' Create an instance of StreamReader to read from a file.
                ' The using statement also closes the StreamReader.
                Using sr As New StreamReader(thecurrentdirectory + "\articles.txt")
                    Dim line As String
                    ' Read and display lines from the file until the end of
                    ' the file is reached.
                    Do
                        line = sr.ReadLine()
                        If Not (line Is Nothing) Then
                            pages.Add(line)
                        End If
                    Loop Until line Is Nothing
                End Using
            Catch e As Exception
                ' Let the user know what went wrong.
                Console.WriteLine("The file could not be read:")
                Console.WriteLine(e.Message)
            End Try

            'Intersect the categories (.net 4 may be better?)
            Dim intersection() As String
            intersection = intersect(categorytochecktitles, pages)

            pages = Nothing

            'Add intersection to report
            If intersection.Length <> 0 Then 'Make sure something was found

                Dim rootname As String = Root.Name
                If (rootname.IndexOf("#") <> -1) Then 'take the # out of directory name
                    rootname = rootname.Replace("#", "")
                End If

                Dim catname As String = directoryindex(Int32.Parse(rootname)).ToString
                Console.WriteLine("On cat: " + catname)
                report += vbNewLine + header(catname, depth) + vbNewLine

                'Add for checking which one has has what
                'Dim articlesarraycategories As New ArrayList  'for use with articlesubcategory
                'Dim articlesarraytitles As New ArrayList
                Try

                    For Each item In intersection

                        Dim index As Integer = articlesarraytitles.IndexOf(item)
                        If (index = -1) Then
                            articlesarraytitles.Add(item)
                            Dim x As New ArrayList
                            x.Add(catname)
                            articlesarraycategories.Add(x)
                        Else
                            Dim x As ArrayList = articlesarraycategories(index)
                            x.Add(catname)
                            articlesarraycategories(index) = x
                        End If

                        'add item to articlesarray

                        'If articlesarray.Contains(s) = False Then
                        '    s.subcategory.Add(catname)
                        '    articlesarray.Add(s)
                        'Else
                        'Dim found As Integer = False
                        'For Each thing As articlesubcategory In articlesarray
                        '    If thing.article = item Then
                        '        thing.subcategory.Add(catname)
                        '        found = True
                        '        Exit For
                        '    End If
                        'Next
                        'If found = False Then
                        '    Dim s As New articlesubcategory
                        '    s.article = item
                        '    s.subcategory.Add(catname)
                        '    articlesarray.Add(s)
                        'End If

                        'End If

                        'add item to report
                        report += "* [[" + item + "]]" + vbNewLine
                    Next
                Catch e As Exception
                    exceptionquit("Error ading item to array", e)
                End Try

            End If
        End If

        'Each subcat
        For Each DirectoryName As DirectoryInfo In Dirs
            Try
                intersectandreportcat(DirectoryName.FullName, depth + 1)
            Catch E As Exception
                Console.WriteLine("Error in directory fetch.")
                pause()
            End Try
        Next
    End Sub


    Function intersect(ByRef list1 As ArrayList, ByRef list2 As ArrayList) As String()
        Dim intersection As New ArrayList
        If (list1.Count > list2.Count) Then
            'use list2
            For Each piece In list2
                If list1.Contains(piece) Then
                    intersection.Add(piece)
                End If
            Next
        Else
            'user list1
            For Each piece In list1
                If list2.Contains(piece) Then
                    intersection.Add(piece)
                End If
            Next
        End If
        Return arraylisttostring(intersection)
    End Function

    Function arraylisttostring(ByRef array As ArrayList) As String()
        Return DirectCast(array.ToArray(GetType(String)), String())
    End Function

    Function header(ByVal input As String, ByVal depth As Integer) As String
        Try
            'Return ("=" * depth) + input + ("=" * depth)'this would've worked in Python *.*

            Dim equals As String = "==================================="
            Dim equalstoadd = equals.Substring(0, depth)
            'Dim index As Integer = Int32.Parse(input) 'Convert.ToInt32(input)
            'Console.WriteLine("Index: " + index.ToString + "|" + input)
            Return equalstoadd + input + equalstoadd
        Catch e As Exception
            Console.WriteLine("Error:Header exception: Index: " + input + "|" + input)
            pause()
        End Try
    End Function

    'unused sub
    Sub reportoncat(ByVal currentdirectory As String, ByVal depth As Integer)
        Try
            Dim Root As New DirectoryInfo(currentdirectory)
            Dim Dirs As DirectoryInfo() = Root.GetDirectories()
            Console.WriteLine("z dirs:" + Dirs.Length.ToString)

            'Find category real name
            Dim rootname As String = Root.Name
            Console.WriteLine("za")
            If (rootname.IndexOf("#") <> -1) Then
                Console.WriteLine("zb")
                Console.WriteLine("zc")
                rootname = rootname.Replace("#", "")
                Console.WriteLine("zd")
                Console.WriteLine("Root:" + rootname)
                'report += vbNewLine + header(rootname, depth) + vbNewLine 'you don't want blank sections
            End If
            Console.WriteLine("z0")
            'Console.WriteLine(rootname)
            'Dim x As String = header(rootname, depth)
            'Dim x As Integer = Int32.Parse(rootname)
            ' Console.WriteLine(x.ToString)
            ' Console.WriteLine("z0 - 1")
            'pause()



            'check this cat's articles
            Console.WriteLine("z1")
            Dim f As New IO.FileInfo(currentdirectory + "\0.txt")
            Console.WriteLine("z2")
            If (f.Exists = True) Then
                report += vbNewLine + header(rootname, depth) + vbNewLine
                Console.WriteLine("z2.1")
                'read(File)
                Dim pages As New ArrayList
                Console.WriteLine("z2.2")
                'Read from file 'http://msdn.microsoft.com/en-us/library/db5x7c0d.aspx
                Try
                    ' Create an instance of StreamReader to read from a file.
                    ' The using statement also closes the StreamReader.
                    Console.WriteLine("z2.3")
                    Using sr As New StreamReader(currentdirectory + "\0.txt")
                        Console.WriteLine("z2.4")
                        Dim line As String
                        ' Read and display lines from the file until the end of
                        ' the file is reached.
                        Do
                            line = sr.ReadLine()
                            If Not (line Is Nothing) Then
                                'Console.WriteLine("Adding " + line)
                                'pages.Add(line)
                                report += "* [[" + line + "]]" + vbNewLine
                            End If
                            Console.WriteLine("z2.5")
                        Loop Until line Is Nothing
                        Console.WriteLine("z2.6")
                    End Using
                    Console.WriteLine("z2.7")
                Catch e As Exception
                    ' Let the user know what went wrong.
                    Console.WriteLine("The file could not be read:")
                    Console.WriteLine(e.Message)
                End Try
            Else
                Console.WriteLine("z2.8")

            End If


            'Each subcat
            ' Dim DirectoryName As DirectoryInfo = Root ' = Nothing 'As DirectoryInfo = Nothing
            Try
                'Console.Write("z2.9" + DirectoryName.FullName)


                For Each DirectoryName As DirectoryInfo In Dirs
                    reportoncat(DirectoryName.FullName, depth + 1)
                Next
            Catch E As Exception
                exceptionquit("Erorr: Report on Cat error: Accessing ", E) '+ DirectoryName.FullName, E)

            End Try

        Catch e As Exception
            exceptionquit("Erorr: Report on Cat error with " + currentdirectory + " depth: " + depth.ToString, e)
        End Try
    End Sub

    'unused function
    Sub intersectcat(ByVal thecurrentdirectory As String)
        Dim Root As New DirectoryInfo(thecurrentdirectory)
        Dim Dirs As DirectoryInfo() = Root.GetDirectories()

        'check this cat's articles
        Dim f As New IO.FileInfo(thecurrentdirectory + "\articles.txt")
        If (f.Exists = True) Then
            'read(File)
            Dim pages As New ArrayList
            'Read from file 'http://msdn.microsoft.com/en-us/library/db5x7c0d.aspx
            Try
                ' Create an instance of StreamReader to read from a file.
                ' The using statement also closes the StreamReader.
                Using sr As New StreamReader(thecurrentdirectory + "\articles.txt")
                    Dim line As String
                    ' Read and display lines from the file until the end of
                    ' the file is reached.
                    Do
                        line = sr.ReadLine()
                        If Not (line Is Nothing) Then
                            'Console.WriteLine("Adding " + line)
                            pages.Add(line)
                        End If
                    Loop Until line Is Nothing
                End Using
            Catch e As Exception
                ' Let the user know what went wrong.
                Console.WriteLine("The file could not be read:")
                Console.WriteLine(e.Message)
            End Try

            'Now intersect them
            ''''''''''''''''''''''''''''Need .Net 4.0 for this
            Dim intersection() As String
            intersection = intersect(categorytochecktitles, pages)

            pages = Nothing

            'We now write the intresection to file
            If intersection.Length > 0 Then
                Dim x As New PageList(enWiki, intersection)
                x.SaveTitlesToFile(thecurrentdirectory + "/0.txt")
            End If
            'categorytochecktitles.inte()
            'ArrayA.Intersect(ArrayB).Any()
            'Dim intersection =

        End If

        'Each subcat
        For Each DirectoryName As DirectoryInfo In Dirs
            Try
                intersectcat(DirectoryName.FullName)
            Catch E As Exception
                Console.WriteLine("Error accessing")
            End Try
        Next
    End Sub

    Sub getcat(ByVal categoryname As String, ByVal thecurrrentdirectory As String)
        Dim maintenancecategory, maintenancesubcategory As New PageList(enWiki)
        maintenancecategory.FillFromCategory(categoryname)
        maintenancesubcategory.FillSubsFromCategory(categoryname)
        thecurrrentdirectory += "\" + catfilecounter.ToString + "#"
        createandrecorddirectory(categoryname, thecurrrentdirectory)
        For Each one As Page In maintenancesubcategory
            one.RemoveNSPrefix()
        Next


        'Write articles to file in directory
        If (maintenancecategory.Count > 0) Then
            maintenancecategory.SaveTitlesToFile(thecurrrentdirectory + "\articles.txt")
            Console.WriteLine("Saved files of category" + categoryname)
            maintenancecategory = Nothing 'clear out category when done
        End If


        'Check the subcategories
        For Each subcat As Page In maintenancesubcategory
            Console.WriteLine("Getting subcategory: '" + subcat.title + " of '" + categoryname + "'")
            getcat(subcat.title, thecurrrentdirectory)
        Next

    End Sub

    Sub createandrecorddirectory(ByVal categoryname As String, ByVal thecurrentdirectory As String)
        Try
            Dim piece As String
            piece = catfilecounter.ToString + "#"
            catfilecounter += 1
            Directory.CreateDirectory(thecurrentdirectory) ' + "\" + piece)
            Dim objWriter As New System.IO.StreamWriter(appbase + "\Category directory.txt", True) 'Append
            objWriter.WriteLine(piece + categoryname) ' + vbNewLine)'it's appended
            'Console.WriteLine("Writing...")
            objWriter.Close()
        Catch e As Exception
            exceptionquit("Error: Create and record directory", e)
        End Try

    End Sub

    Sub quit()
        Console.WriteLine("Press any key to quit...")
        Console.ReadLine()
        End
    End Sub

    Sub pause()
        Console.WriteLine("Press any key to continue...")
        Console.ReadLine()
    End Sub

    Sub exceptionquit(ByVal errorwith As String, ByVal ex As Exception)
        Console.WriteLine(errorwith + ":" + ex.Message)
        quit()
    End Sub

End Module

Content Disclaimer

Informasi ini disarikan dari Wikipedia dan disajikan kembali untuk tujuan edukasi. Konten tersedia di bawah lisensi CC BY-SA 3.0. Kami tidak bertanggung jawab atas ketidakakuratan data yang bersumber dari kontribusi publik tersebut.

  1. The information displayed on this website is sourced in part or in whole from Wikipedia and has been adapted for the purpose of restating it. We strive to provide accurate and relevant information, however:
  2. There is no guarantee of absolute accuracy. Wikipedia is an open, collaborative project that can be edited by anyone, so information is subject to change.
  3. It is not intended to constitute professional advice. The content displayed is for informational and educational purposes only. For important decisions (e.g., medical, legal, or financial), please consult a professional.
  4. Content copyright. Wikipedia is licensed under the Creative Commons Attribution-ShareAlike License (CC BY-SA). This means that content may be reused with appropriate attribution and shared under a similar license.
  5. Responsible use. Any risk arising from the use of information from this website is entirely the responsibility of the user.