User:CleanupListingBot/Source VB2
Added table posting feature. Also removed directory write for intersections...use massive array instead=D. The inefficiency of vb.net is starting to show for larger categories.Smallman12q (talk) 00:53, 1 September 2010 (UTC)
Imports DotNetWikiBot
Imports System.Collections
Imports System.IO.File
Imports System.IO
Module Module1
'Structure articlesubcategory
' 'declaring a structure named articlesubcategory
' Public article As String
' Public subcategory As ArrayList
'End Structure
Dim enWiki As Site = Nothing
Dim catfilecounter As Integer = 0
Dim appbase As String = AppDomain.CurrentDomain.SetupInformation.ApplicationBase()
Dim categorytochecktitles As New ArrayList
Dim report As String = Nothing
Dim directoryindex() As String = Nothing
Dim articlesarraycategories As New ArrayList 'for use with articlesubcategory
Dim articlesarraytitles As New ArrayList 'for use with articlesubcategory
Sub Main()
'''''''''''''''''''''''''''''
'Print out header
Console.WriteLine("######################################")
Console.WriteLine("# CleanupListingBot Version Beta 2.0 #")
Console.WriteLine("# Public Domain August 2010 #")
Console.WriteLine("# Written in VB.net 2.0 by Smallman #")
Console.WriteLine("# Requires the DotNetWikiBot library #")
Console.WriteLine("######################################")
Console.WriteLine()
Console.WriteLine()
Console.WriteLine("For the latest source code visit:")
Console.WriteLine("http://en.wikipedia.org/wiki/User:CleanupListingBot/Source_Index")
Console.WriteLine()
Console.WriteLine("This is a BETA version. USE AT YOUR OWN RISK.") 'Fancy liability waiver here
pause()
Console.Clear()
'My.Application.CommandLineArgs( 'Command line args
'''''''''''''''''''''''''''''
'Log in
Console.WriteLine("You will be required to enter your Wikipedia username and password.")
Console.WriteLine("They are used to log into the insecure API of Wikipedia.")
Dim username, password As String
Try
Console.Write("Please enter username: ")
username = Console.ReadLine()
Console.Write("Please enter password: ")
password = Console.ReadLine()
Console.Clear()
Console.WriteLine("Attempting to log into the english wiki as: " + username)
Console.WriteLine("Please note that https is not used.")
enWiki = New Site("http://en.wikipedia.org", username, password) 'Log into wikipedia
Catch e As Exception
Console.WriteLine("Login error: " + e.Message)
quit()
End Try
Console.WriteLine("Log in successful....clearing user name and password for security...")
'Clear username and password for security...
username = Nothing
password = Nothing
Console.Clear()
Console.WriteLine("You are now logged in.")
''''''''''''''''''''''''''''''''''''''''''''''''''''''
'''''''''''''''''''''''''''''''''''''''''''''''''''''
'Get the cleanup categories
'Create a directory for subcategory of them
'Copy articles from each cat to a text file
'Load text files and compare
Console.WriteLine("Checking for category: Wikipedia maintenance categories sorted by month")
Dim categoryname As String = "Wikipedia maintenance categories sorted by month"
Dim currentdirectory As String = appbase + "\" + categoryname
If Not Directory.Exists(currentdirectory) Then
Directory.CreateDirectory(currentdirectory)
Console.WriteLine("Category not found locally...begin downloading...")
getcat(categoryname, currentdirectory)
Console.WriteLine("Done downloading cleanup...")
pause()
Else
Console.WriteLine("Wikipedia maintenance directory found.")
'Debug.WriteLine(
End If
'''''''''''''''''''''''''''''''''''''''''
'Get category to check
Console.Clear()
Console.WriteLine("Please enter the category you wish to create a cleanup listing for.")
Console.Write("Enter category to check: ")
Dim categorytocheck As String = Console.ReadLine()
If categorytocheck <> "done" Then
'Get the category
Dim categorytochecklist As PageList = Nothing 'As New PageList(enWiki) '
Try
Console.WriteLine("Loading category pages...")
categorytochecklist = New PageList(enWiki)
categorytochecklist.FillAllFromCategoryTree(categorytocheck) 'Get category titles
Console.WriteLine("Category loading complete....removing non-articles.")
categorytochecklist.FilterNamespaces({0, 1}) 'Remove non-articles , keep talk pages
For Each one As Page In categorytochecklist
one.RemoveNSPrefix() 'Remove "Talk" from titles
Next
Console.WriteLine("Category filtering...complete.")
Console.WriteLine("There are " + categorytochecklist.Count.ToString + " articles.")
Catch ex As Exception
exceptionquit("Loading category error", ex)
End Try
'Send the page titles to an arraylist
'Dim categorytochecktitles As New ArrayList 'Global
Console.WriteLine("Sending category to arraylist...")
For Each article As Page In categorytochecklist
categorytochecktitles.Add(article.title)
Next
Console.WriteLine("Arraylist complete...")
categorytochecklist = Nothing 'Clear out pagelist
End If
''''''''''''''''''''''''''''''''''''''''
'Compare and generate cleanup lisitng
Console.WriteLine("Will now generate Cleanup lisiting...")
pause()
'Load index
Console.WriteLine("Loading directory index...")
Dim index As New ArrayList
'Read from file 'http://msdn.microsoft.com/en-us/library/db5x7c0d.aspx
Try
' Create an instance of StreamReader to read from a file.
' The using statement also closes the StreamReader.
Using sr As New StreamReader(appbase + "\Category directory.txt")
Dim line As String
' Read and display lines from the file until the end of
' the file is reached.
Do
line = sr.ReadLine()
If Not (line Is Nothing) Then
Dim parts() As String = line.Split("#") 'c, 2) 'remove the number #
index.Add(parts(1))
End If
Loop Until line Is Nothing
End Using
Catch e As Exception
exceptionquit("The index could not be read:", e)
End Try
directoryindex = arraylisttostring(index)
index = Nothing 'Clear()
Console.WriteLine("Directory index loaded: " + directoryindex.Length.ToString + " categories.")
Console.WriteLine()
Console.WriteLine("Intersecting categories and generating report...")
''''
'Compare and Report
'http://www.java2s.com/Code/VB/Data-Structure/ListallDirectoriesunderadirectory.htm
report += "The following is a cleanup report generated on " + Date.UtcNow.ToString
currentdirectory = appbase + "\Wikipedia maintenance categories sorted by month"
intersectandreportcat(currentdirectory, 0)
Console.WriteLine(articlesarraycategories.Count.ToString + " is length")
Console.WriteLine("Please enter the title of where to save the report on Wikipedia.")
Console.Write("Enter title: ")
Dim pagename As String = Console.ReadLine()
Console.WriteLine("Writing to Wikipedia to " + pagename)
Dim ab As New Page(enWiki, pagename)
'ab.Load()
ab.text = report
ab.Save("CleanupListingBot Report for" + categorytocheck, False)
report = Nothing
ab.text = Nothing
'Make the table
report = "{|class=""wikitable sortable""" + vbNewLine + _
"!Article" + vbNewLine + _
"!Cleanup count" + vbNewLine + _
"!Categories" + vbNewLine + _
"|-" + vbNewLine
Dim counter As Integer = 0
For Each setofcats As ArrayList In articlesarraycategories
report += "|[[" + articlesarraytitles(counter).ToString + "]]" + vbNewLine
Dim x() As String = arraylisttostring(setofcats)
report += "|" + x.Length.ToString + vbNewLine + "|"
For Each thing As String In x
report += thing + ", "
Next
report = report.Remove(report.Length - 2, 2) ' remove last comma and space
report += vbNewLine + "|-" + vbNewLine
counter += 1
Next
report += "|}"
Dim ac As New Page(enWiki, pagename + " (Table)")
ac.text = report
ac.Save("CleanupListingBot Report Table for " + categorytocheck, False)
'Write to file
'Dim objWriter As New System.IO.StreamWriter(appbase + "\Report1.txt", True) 'Append
'objWriter.WriteLine(report) ' + vbNewLine)'it's appended
'Console.WriteLine("Writing...")
'objWriter.Close()
Console.WriteLine("We're Done...")
quit()
'Logout missing?
End Sub
'combines intersect with report
Sub intersectandreportcat(ByRef thecurrentdirectory As String, ByVal depth As Integer)
Dim Root As New DirectoryInfo(thecurrentdirectory)
Dim Dirs As DirectoryInfo() = Root.GetDirectories()
'check this cat's articles
Dim f As New IO.FileInfo(thecurrentdirectory + "\articles.txt")
If (f.Exists = True) Then
Dim pages As New ArrayList
'Read from file 'http://msdn.microsoft.com/en-us/library/db5x7c0d.aspx
Try
' Create an instance of StreamReader to read from a file.
' The using statement also closes the StreamReader.
Using sr As New StreamReader(thecurrentdirectory + "\articles.txt")
Dim line As String
' Read and display lines from the file until the end of
' the file is reached.
Do
line = sr.ReadLine()
If Not (line Is Nothing) Then
pages.Add(line)
End If
Loop Until line Is Nothing
End Using
Catch e As Exception
' Let the user know what went wrong.
Console.WriteLine("The file could not be read:")
Console.WriteLine(e.Message)
End Try
'Intersect the categories (.net 4 may be better?)
Dim intersection() As String
intersection = intersect(categorytochecktitles, pages)
pages = Nothing
'Add intersection to report
If intersection.Length <> 0 Then 'Make sure something was found
Dim rootname As String = Root.Name
If (rootname.IndexOf("#") <> -1) Then 'take the # out of directory name
rootname = rootname.Replace("#", "")
End If
Dim catname As String = directoryindex(Int32.Parse(rootname)).ToString
Console.WriteLine("On cat: " + catname)
report += vbNewLine + header(catname, depth) + vbNewLine
'Add for checking which one has has what
'Dim articlesarraycategories As New ArrayList 'for use with articlesubcategory
'Dim articlesarraytitles As New ArrayList
Try
For Each item In intersection
Dim index As Integer = articlesarraytitles.IndexOf(item)
If (index = -1) Then
articlesarraytitles.Add(item)
Dim x As New ArrayList
x.Add(catname)
articlesarraycategories.Add(x)
Else
Dim x As ArrayList = articlesarraycategories(index)
x.Add(catname)
articlesarraycategories(index) = x
End If
'add item to articlesarray
'If articlesarray.Contains(s) = False Then
' s.subcategory.Add(catname)
' articlesarray.Add(s)
'Else
'Dim found As Integer = False
'For Each thing As articlesubcategory In articlesarray
' If thing.article = item Then
' thing.subcategory.Add(catname)
' found = True
' Exit For
' End If
'Next
'If found = False Then
' Dim s As New articlesubcategory
' s.article = item
' s.subcategory.Add(catname)
' articlesarray.Add(s)
'End If
'End If
'add item to report
report += "* [[" + item + "]]" + vbNewLine
Next
Catch e As Exception
exceptionquit("Error ading item to array", e)
End Try
End If
End If
'Each subcat
For Each DirectoryName As DirectoryInfo In Dirs
Try
intersectandreportcat(DirectoryName.FullName, depth + 1)
Catch E As Exception
Console.WriteLine("Error in directory fetch.")
pause()
End Try
Next
End Sub
Function intersect(ByRef list1 As ArrayList, ByRef list2 As ArrayList) As String()
Dim intersection As New ArrayList
If (list1.Count > list2.Count) Then
'use list2
For Each piece In list2
If list1.Contains(piece) Then
intersection.Add(piece)
End If
Next
Else
'user list1
For Each piece In list1
If list2.Contains(piece) Then
intersection.Add(piece)
End If
Next
End If
Return arraylisttostring(intersection)
End Function
Function arraylisttostring(ByRef array As ArrayList) As String()
Return DirectCast(array.ToArray(GetType(String)), String())
End Function
Function header(ByVal input As String, ByVal depth As Integer) As String
Try
'Return ("=" * depth) + input + ("=" * depth)'this would've worked in Python *.*
Dim equals As String = "==================================="
Dim equalstoadd = equals.Substring(0, depth)
'Dim index As Integer = Int32.Parse(input) 'Convert.ToInt32(input)
'Console.WriteLine("Index: " + index.ToString + "|" + input)
Return equalstoadd + input + equalstoadd
Catch e As Exception
Console.WriteLine("Error:Header exception: Index: " + input + "|" + input)
pause()
End Try
End Function
'unused sub
Sub reportoncat(ByVal currentdirectory As String, ByVal depth As Integer)
Try
Dim Root As New DirectoryInfo(currentdirectory)
Dim Dirs As DirectoryInfo() = Root.GetDirectories()
Console.WriteLine("z dirs:" + Dirs.Length.ToString)
'Find category real name
Dim rootname As String = Root.Name
Console.WriteLine("za")
If (rootname.IndexOf("#") <> -1) Then
Console.WriteLine("zb")
Console.WriteLine("zc")
rootname = rootname.Replace("#", "")
Console.WriteLine("zd")
Console.WriteLine("Root:" + rootname)
'report += vbNewLine + header(rootname, depth) + vbNewLine 'you don't want blank sections
End If
Console.WriteLine("z0")
'Console.WriteLine(rootname)
'Dim x As String = header(rootname, depth)
'Dim x As Integer = Int32.Parse(rootname)
' Console.WriteLine(x.ToString)
' Console.WriteLine("z0 - 1")
'pause()
'check this cat's articles
Console.WriteLine("z1")
Dim f As New IO.FileInfo(currentdirectory + "\0.txt")
Console.WriteLine("z2")
If (f.Exists = True) Then
report += vbNewLine + header(rootname, depth) + vbNewLine
Console.WriteLine("z2.1")
'read(File)
Dim pages As New ArrayList
Console.WriteLine("z2.2")
'Read from file 'http://msdn.microsoft.com/en-us/library/db5x7c0d.aspx
Try
' Create an instance of StreamReader to read from a file.
' The using statement also closes the StreamReader.
Console.WriteLine("z2.3")
Using sr As New StreamReader(currentdirectory + "\0.txt")
Console.WriteLine("z2.4")
Dim line As String
' Read and display lines from the file until the end of
' the file is reached.
Do
line = sr.ReadLine()
If Not (line Is Nothing) Then
'Console.WriteLine("Adding " + line)
'pages.Add(line)
report += "* [[" + line + "]]" + vbNewLine
End If
Console.WriteLine("z2.5")
Loop Until line Is Nothing
Console.WriteLine("z2.6")
End Using
Console.WriteLine("z2.7")
Catch e As Exception
' Let the user know what went wrong.
Console.WriteLine("The file could not be read:")
Console.WriteLine(e.Message)
End Try
Else
Console.WriteLine("z2.8")
End If
'Each subcat
' Dim DirectoryName As DirectoryInfo = Root ' = Nothing 'As DirectoryInfo = Nothing
Try
'Console.Write("z2.9" + DirectoryName.FullName)
For Each DirectoryName As DirectoryInfo In Dirs
reportoncat(DirectoryName.FullName, depth + 1)
Next
Catch E As Exception
exceptionquit("Erorr: Report on Cat error: Accessing ", E) '+ DirectoryName.FullName, E)
End Try
Catch e As Exception
exceptionquit("Erorr: Report on Cat error with " + currentdirectory + " depth: " + depth.ToString, e)
End Try
End Sub
'unused function
Sub intersectcat(ByVal thecurrentdirectory As String)
Dim Root As New DirectoryInfo(thecurrentdirectory)
Dim Dirs As DirectoryInfo() = Root.GetDirectories()
'check this cat's articles
Dim f As New IO.FileInfo(thecurrentdirectory + "\articles.txt")
If (f.Exists = True) Then
'read(File)
Dim pages As New ArrayList
'Read from file 'http://msdn.microsoft.com/en-us/library/db5x7c0d.aspx
Try
' Create an instance of StreamReader to read from a file.
' The using statement also closes the StreamReader.
Using sr As New StreamReader(thecurrentdirectory + "\articles.txt")
Dim line As String
' Read and display lines from the file until the end of
' the file is reached.
Do
line = sr.ReadLine()
If Not (line Is Nothing) Then
'Console.WriteLine("Adding " + line)
pages.Add(line)
End If
Loop Until line Is Nothing
End Using
Catch e As Exception
' Let the user know what went wrong.
Console.WriteLine("The file could not be read:")
Console.WriteLine(e.Message)
End Try
'Now intersect them
''''''''''''''''''''''''''''Need .Net 4.0 for this
Dim intersection() As String
intersection = intersect(categorytochecktitles, pages)
pages = Nothing
'We now write the intresection to file
If intersection.Length > 0 Then
Dim x As New PageList(enWiki, intersection)
x.SaveTitlesToFile(thecurrentdirectory + "/0.txt")
End If
'categorytochecktitles.inte()
'ArrayA.Intersect(ArrayB).Any()
'Dim intersection =
End If
'Each subcat
For Each DirectoryName As DirectoryInfo In Dirs
Try
intersectcat(DirectoryName.FullName)
Catch E As Exception
Console.WriteLine("Error accessing")
End Try
Next
End Sub
Sub getcat(ByVal categoryname As String, ByVal thecurrrentdirectory As String)
Dim maintenancecategory, maintenancesubcategory As New PageList(enWiki)
maintenancecategory.FillFromCategory(categoryname)
maintenancesubcategory.FillSubsFromCategory(categoryname)
thecurrrentdirectory += "\" + catfilecounter.ToString + "#"
createandrecorddirectory(categoryname, thecurrrentdirectory)
For Each one As Page In maintenancesubcategory
one.RemoveNSPrefix()
Next
'Write articles to file in directory
If (maintenancecategory.Count > 0) Then
maintenancecategory.SaveTitlesToFile(thecurrrentdirectory + "\articles.txt")
Console.WriteLine("Saved files of category" + categoryname)
maintenancecategory = Nothing 'clear out category when done
End If
'Check the subcategories
For Each subcat As Page In maintenancesubcategory
Console.WriteLine("Getting subcategory: '" + subcat.title + " of '" + categoryname + "'")
getcat(subcat.title, thecurrrentdirectory)
Next
End Sub
Sub createandrecorddirectory(ByVal categoryname As String, ByVal thecurrentdirectory As String)
Try
Dim piece As String
piece = catfilecounter.ToString + "#"
catfilecounter += 1
Directory.CreateDirectory(thecurrentdirectory) ' + "\" + piece)
Dim objWriter As New System.IO.StreamWriter(appbase + "\Category directory.txt", True) 'Append
objWriter.WriteLine(piece + categoryname) ' + vbNewLine)'it's appended
'Console.WriteLine("Writing...")
objWriter.Close()
Catch e As Exception
exceptionquit("Error: Create and record directory", e)
End Try
End Sub
Sub quit()
Console.WriteLine("Press any key to quit...")
Console.ReadLine()
End
End Sub
Sub pause()
Console.WriteLine("Press any key to continue...")
Console.ReadLine()
End Sub
Sub exceptionquit(ByVal errorwith As String, ByVal ex As Exception)
Console.WriteLine(errorwith + ":" + ex.Message)
quit()
End Sub
End Module
Content Disclaimer
Informasi ini disarikan dari Wikipedia dan disajikan kembali untuk tujuan edukasi. Konten tersedia di bawah lisensi CC BY-SA 3.0. Kami tidak bertanggung jawab atas ketidakakuratan data yang bersumber dari kontribusi publik tersebut.
- The information displayed on this website is sourced in part or in whole from Wikipedia and has been adapted for the purpose of restating it. We strive to provide accurate and relevant information, however:
- There is no guarantee of absolute accuracy. Wikipedia is an open, collaborative project that can be edited by anyone, so information is subject to change.
- It is not intended to constitute professional advice. The content displayed is for informational and educational purposes only. For important decisions (e.g., medical, legal, or financial), please consult a professional.
- Content copyright. Wikipedia is licensed under the Creative Commons Attribution-ShareAlike License (CC BY-SA). This means that content may be reused with appropriate attribution and shared under a similar license.
- Responsible use. Any risk arising from the use of information from this website is entirely the responsibility of the user.