# Classical Approaches

In [1]:
import pandas as pd
import numpy as np
import re
from typing import Optional, Set, List
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
import nltk
import rank_bm25

#nltk.download('stopwords')
STOPWORDS = set(nltk.corpus.stopwords.words('english'))

## TfIdf

In [2]:
# Read the dataframes
unlabeled = pd.read_csv("../../data/prepared/unlabeled.csv", index_col=False)
train = pd.read_csv("../../data/prepared/train.csv", index_col=False)
valid = pd.read_csv("../../data/prepared/valid.csv", index_col=False)
test = pd.read_csv("../../data/prepared/test.csv", index_col=False)

# Concatenate them together
df = pd.concat([unlabeled, train, valid], ignore_index=True)
df = df[['ticket', 'problem_abstract']]
df = df.drop_duplicates()

In [3]:
def prepare_statistical_text(text: Optional[str], stopwords: Set[str]) -> List[str]:

    if not text or pd.isna(text):
        return []
    words = re.findall(r'\b\w+\b', text.lower())
    return [w for w in words if w not in stopwords]

import gensim
from gensim import corpora
from gensim.models import TfidfModel
from gensim.similarities import MatrixSimilarity
from typing import Optional, Set, List
import pandas as pd
import re

class TfidfRecommender:
    def __init__(self, data: pd.DataFrame, stopwords: Set[str]):
        self._data = data
        self._stopwords = stopwords
        self._dictionary, self._tfidf, self._index = self._build_tfidf_index()

    def _build_tfidf_index(self):
        # Process the data using the given function
        processed_data = self._data['problem_abstract'].apply(lambda x: prepare_statistical_text(x, self._stopwords)).tolist()
        
        # Create a dictionary and a corpus
        dictionary = corpora.Dictionary(processed_data)
        corpus = [dictionary.doc2bow(text) for text in processed_data]
        
        # Build the TF-IDF model
        tfidf = TfidfModel(corpus)
        index = MatrixSimilarity(tfidf[corpus])

        return dictionary, tfidf, index

    def recommend(self, abstract: str, n: int = 10) -> List[int]:
        # Process the input abstract
        processed_abstract = prepare_statistical_text(abstract, self._stopwords)
        vec_bow = self._dictionary.doc2bow(processed_abstract)
        vec_tfidf = self._tfidf[vec_bow]

        # Compute cosine similarity
        cosine_similarities = self._index[vec_tfidf]
        
        # Get the top n similar tickets
        related_docs_indices = cosine_similarities.argsort()[-n:][::-1]
        return self._data.iloc[related_docs_indices]

recommender_tfidf = TfidfRecommender(df, STOPWORDS)
recommended_tfidf = [
    (test.iloc[i]["problem_abstract"], recommender_tfidf.recommend(test.iloc[i]["problem_abstract"]))
    for i 
    in range(10)
]
recommended_tfidf

[('GSSI EMEA Terminal Server Container NOT Resolving Model Type',
            ticket                                   problem_abstract
  9863   303916730  TWB Portal - Corrupt GPS Sync Report - GSSI EM...
  8545   308257599     Require GTAC report for IBM GSSI / GSSI domain
  9370   308172957  Access issue to specific Hosting terminal serv...
  9335   311839996                  EMEA NetBrain server running slow
  2180   308459397                                Change Account Type
  9871   307802588  Looking for number of transactions for the GSS...
  9861   306170479  Can you please extract "Run GTAC Audit Reports...
  3234   309634025                             Update of Account type
  1849   308404796  NetBrain EMEA server not responding for BOI ac...
  10090  304016917  Request for Creating new sites in Voyence GSNI...),
 ('Credentials for new customer Siemens Energy',
            ticket                                   problem_abstract
  8375   311658856                         

In [4]:
recommended_tfidf[0]

('GSSI EMEA Terminal Server Container NOT Resolving Model Type',
           ticket                                   problem_abstract
 9863   303916730  TWB Portal - Corrupt GPS Sync Report - GSSI EM...
 8545   308257599     Require GTAC report for IBM GSSI / GSSI domain
 9370   308172957  Access issue to specific Hosting terminal serv...
 9335   311839996                  EMEA NetBrain server running slow
 2180   308459397                                Change Account Type
 9871   307802588  Looking for number of transactions for the GSS...
 9861   306170479  Can you please extract "Run GTAC Audit Reports...
 3234   309634025                             Update of Account type
 1849   308404796  NetBrain EMEA server not responding for BOI ac...
 10090  304016917  Request for Creating new sites in Voyence GSNI...)

In [5]:
recommended_tfidf[1]

('Credentials for new customer Siemens Energy',
           ticket                                   problem_abstract
 8375   311658856                                     Siemens Energy
 10931  309825371                           New Security credentials
 8948   312612394                                 SNMPv3 Credentials
 105    306487733  Please cancel OL 7862952 / Advanced Energy Ind...
 8862   308106279                      Security Credentials Creation
 11440  314373286  Additional Credentials for Sysco Corp Customer...
 9146   308052313                              Credentials for Sally
 8307   313576459                     SNMP credentials for Eli Lilly
 3629   310027961        ?*OGE ENERGY FLD<WFA/LEC# KI000618>TST TX-1
 8660   310493396                        Credentials for Micro Focus)

In [6]:
recommended_tfidf[2]

('Request to provide Telco Inventory (BMP & WFA Inventory)',
           ticket                                   problem_abstract
 283    306704557  US Air Force - SR# 7916659. Review Inventory t...
 401    306691041                Sites not showing in site inventory
 7583   309538081                  Data Incomplete In Inventory Cube
 8336   313492839               Add enterprise ID to sdwan inventory
 10505  300760130                         CA & SM Customer Inventory
 948    307137389           HWREP - Review Inventory to be cancelled
 9282   311021200  Need inventory of devices with Syslog server a...
 3997   310346103  Not able to open or create inventory in UAT en...
 6649   310848875  CALNET inventory does not allow adding DM's to...
 5230   311847476  Netbrain UniCredit instance has issues with in...)

In [7]:
recommended_tfidf[3]

('ACTION Web will not show me router information',
           ticket                                   problem_abstract
 7033   312415345       Unable to open Action URL action.web.att.com
 7374   310375328              Missing circuit information in Action
 8586   307622643                            Request for information
 7472   309974304             Your ID is not in the Web Access Group
 9979   309286106                            Unable to access router
 8721   312239136  Unable to upload certificate web site down rca...
 9585   313536021                         web authentication pending
 10431  309789807                        Add feature to the GTAC web
 8565   314398924              WEB AOTS & UD are not loading tickets
 7931   308762609                Not able to pull device information)

In [8]:
recommended_tfidf[4]

('Trying to change password get popup with fatal error',
           ticket                                   problem_abstract
 10984  304145595                        password change fatal error
 9062   309197971      Get error when trying to change GTAC password
 8495   309637027  When trying to change password getting pop up ...
 11635  306626018                  Trying to change password - Error
 9140   309341142  Encountering Fatal Error when trying to change...
 9085   305089627    Getting an error when trying to change password
 8865   304891119  Getting error while trying to change my GTAC p...
 8405   308954475                           Error on Password Change
 7792   309169280  When trying to view ticket details we get an e...
 11080  306532398  Unable to change password and it gives me "Fat...)

In [9]:
recommended_tfidf[5]

('Add the new Wireless Controller to the Cisco ISE servers and share the Key',
           ticket                                   problem_abstract
 9759   312447919  Add new Cisco Wireless controller to the Cisco...
 9016   313020079             Add Wireless controller to ISE servers
 10649  313206266       Need to add Wireless controller to cisco ISE
 8834   314278357  Add the Wireless Controller below to the IBM R...
 9457   313136986            Need to add new controller to Cisco ISE
 9485   313507473        Add the eWC controller to Cisco ISE servers
 9081   307189047  ISE IBM IGA | Add the controller to the Cisco ...
 8447   313136942                  Need to add new controller to ISE
 9934   314461616               Request to add new controller to ISE
 8084   314462410               Request to add new controller to ISE)

In [10]:
recommended_tfidf[6]

('Kyndryl config backup is missing from the directories',
           ticket                                   problem_abstract
 7542   309686335         Missing backup config for multiple devices
 7902   308829455             don't see backup config in action-tool
 8090   312788475                Config missing from action database
 11021  315300651        Missing configuration backup file on poller
 9272   305548788  Need config backup for network device for cust...
 7517   309799776  URGENT!!! - Action config backup not working p...
 7516   309799931  URGENT!!! - Action config backup not working p...
 7840   309049489  Device pulls config with no error but actual c...
 10247  310352005  Could you monitor the following directories fo...
 7865   308986639  Config backup is not happening for Firewall j1...)

In [11]:
recommended_tfidf[7]

('Customer is unable to login to firewall device . ID is configured in device with GTAC radius auth.',
           ticket                                   problem_abstract
 10876  306303912               Unable to login any device with GTAC
 9884   307111289                        Not able to login to device
 11533  307292526                        Not able to login on device
 8945   312658637                                         Device Add
 3915   310245865                                     Add the device
 8496   309850130  unable to login to device using GTAC/ RSA cred...
 3351   309786178        Device to be added and device to be removed
 11542  306163440    Unable to access any device using GTAC password
 8280   306625571  MSS Domain GTAC not working. Unable to login t...
 11307  306104165                   None of the device able to login)

In [12]:
recommended_tfidf[8]

('.cfg files not available on the poller',
           ticket                                   problem_abstract
 8109   312873303                                  No data available
 4194   310564352                                IOS mismatch in CFG
 6864   311823861                 router with COS 108 have CFG issue
 5221   311823985           CISCO router with COS 108 have CFG issue
 10826  307773194                Not updating device files correctly
 9964   307054396              ATTN Mohan - Issue with opening files
 6370   313080070                IOS mismatch in CFG for 4431 router
 269    306467756            IOS mismatch on CFG for 4331 with BVOIP
 10659  307867022                  Action v3 portal is not available
 8047   308397618  Files attached by customers is not visible in UD.)

In [13]:
recommended_tfidf[9]

('down',
           ticket                                   problem_abstract
 11683  310720303  Hi , I am part of IN-GCSC-SDXN group in Unifie...
 3901   310232849         WNBA CHICAGO WASHINGTON 5/22/22 7AM - 2000
 3899   310233519                               Reference V220500926
 3898   310232847         WNBA CHICAGO/WASHINGTON 5/22/22 7AM - 2000
 3897   310233724  First mile check V220500548 D-88301 Seattle So...
 3896   310233516                               Reference V220500925
 3895   310233514                               Reference V220500924
 3894   310233513                               Reference V220500922
 3893   310233511                               Reference V220500921
 3892   310233510                               Reference V220500920)

## BM25

In [14]:
class BM25Recommender:
    def __init__(self, data: pd.DataFrame, stopwords: Set[str]):
        self._data = data
        self._stopwords = stopwords
        self._processed_data = self._prepare_data()
        self._bm25 = self._build_bm25_model()

    def _prepare_data(self):
        return self._data['problem_abstract'].apply(lambda x: prepare_statistical_text(x, self._stopwords)).tolist()

    def _build_bm25_model(self):
        return rank_bm25.BM25Okapi(self._processed_data)

    def recommend(self, abstract: str, n: int = 10) -> List[int]:
        # Process the input abstract
        processed_abstract = prepare_statistical_text(abstract, self._stopwords)
        
        # Compute BM25 scores
        scores = self._bm25.get_scores(processed_abstract)
        
        # Get the top n similar tickets
        related_docs_indices = np.argsort(scores)[-n:][::-1]
        return self._data.iloc[related_docs_indices]

In [15]:
recommender_bm25 = TfidfRecommender(df, STOPWORDS)
recommended_bm25 = [
    (test.iloc[i]["problem_abstract"], recommender_bm25.recommend(test.iloc[i]["problem_abstract"]))
    for i 
    in range(10)
]
recommended_bm25

[('GSSI EMEA Terminal Server Container NOT Resolving Model Type',
            ticket                                   problem_abstract
  9863   303916730  TWB Portal - Corrupt GPS Sync Report - GSSI EM...
  8545   308257599     Require GTAC report for IBM GSSI / GSSI domain
  9370   308172957  Access issue to specific Hosting terminal serv...
  9335   311839996                  EMEA NetBrain server running slow
  2180   308459397                                Change Account Type
  9871   307802588  Looking for number of transactions for the GSS...
  9861   306170479  Can you please extract "Run GTAC Audit Reports...
  3234   309634025                             Update of Account type
  1849   308404796  NetBrain EMEA server not responding for BOI ac...
  10090  304016917  Request for Creating new sites in Voyence GSNI...),
 ('Credentials for new customer Siemens Energy',
            ticket                                   problem_abstract
  8375   311658856                         

In [16]:
recommended_bm25[0]

('GSSI EMEA Terminal Server Container NOT Resolving Model Type',
           ticket                                   problem_abstract
 9863   303916730  TWB Portal - Corrupt GPS Sync Report - GSSI EM...
 8545   308257599     Require GTAC report for IBM GSSI / GSSI domain
 9370   308172957  Access issue to specific Hosting terminal serv...
 9335   311839996                  EMEA NetBrain server running slow
 2180   308459397                                Change Account Type
 9871   307802588  Looking for number of transactions for the GSS...
 9861   306170479  Can you please extract "Run GTAC Audit Reports...
 3234   309634025                             Update of Account type
 1849   308404796  NetBrain EMEA server not responding for BOI ac...
 10090  304016917  Request for Creating new sites in Voyence GSNI...)

In [17]:
recommended_bm25[1]

('Credentials for new customer Siemens Energy',
           ticket                                   problem_abstract
 8375   311658856                                     Siemens Energy
 10931  309825371                           New Security credentials
 8948   312612394                                 SNMPv3 Credentials
 105    306487733  Please cancel OL 7862952 / Advanced Energy Ind...
 8862   308106279                      Security Credentials Creation
 11440  314373286  Additional Credentials for Sysco Corp Customer...
 9146   308052313                              Credentials for Sally
 8307   313576459                     SNMP credentials for Eli Lilly
 3629   310027961        ?*OGE ENERGY FLD<WFA/LEC# KI000618>TST TX-1
 8660   310493396                        Credentials for Micro Focus)

In [18]:
recommended_bm25[2]

('Request to provide Telco Inventory (BMP & WFA Inventory)',
           ticket                                   problem_abstract
 283    306704557  US Air Force - SR# 7916659. Review Inventory t...
 401    306691041                Sites not showing in site inventory
 7583   309538081                  Data Incomplete In Inventory Cube
 8336   313492839               Add enterprise ID to sdwan inventory
 10505  300760130                         CA & SM Customer Inventory
 948    307137389           HWREP - Review Inventory to be cancelled
 9282   311021200  Need inventory of devices with Syslog server a...
 3997   310346103  Not able to open or create inventory in UAT en...
 6649   310848875  CALNET inventory does not allow adding DM's to...
 5230   311847476  Netbrain UniCredit instance has issues with in...)

In [19]:
recommended_bm25[3]

('ACTION Web will not show me router information',
           ticket                                   problem_abstract
 7033   312415345       Unable to open Action URL action.web.att.com
 7374   310375328              Missing circuit information in Action
 8586   307622643                            Request for information
 7472   309974304             Your ID is not in the Web Access Group
 9979   309286106                            Unable to access router
 8721   312239136  Unable to upload certificate web site down rca...
 9585   313536021                         web authentication pending
 10431  309789807                        Add feature to the GTAC web
 8565   314398924              WEB AOTS & UD are not loading tickets
 7931   308762609                Not able to pull device information)

In [20]:
recommended_bm25[4]

('Trying to change password get popup with fatal error',
           ticket                                   problem_abstract
 10984  304145595                        password change fatal error
 9062   309197971      Get error when trying to change GTAC password
 8495   309637027  When trying to change password getting pop up ...
 11635  306626018                  Trying to change password - Error
 9140   309341142  Encountering Fatal Error when trying to change...
 9085   305089627    Getting an error when trying to change password
 8865   304891119  Getting error while trying to change my GTAC p...
 8405   308954475                           Error on Password Change
 7792   309169280  When trying to view ticket details we get an e...
 11080  306532398  Unable to change password and it gives me "Fat...)

In [21]:
recommended_bm25[5]

('Add the new Wireless Controller to the Cisco ISE servers and share the Key',
           ticket                                   problem_abstract
 9759   312447919  Add new Cisco Wireless controller to the Cisco...
 9016   313020079             Add Wireless controller to ISE servers
 10649  313206266       Need to add Wireless controller to cisco ISE
 8834   314278357  Add the Wireless Controller below to the IBM R...
 9457   313136986            Need to add new controller to Cisco ISE
 9485   313507473        Add the eWC controller to Cisco ISE servers
 9081   307189047  ISE IBM IGA | Add the controller to the Cisco ...
 8447   313136942                  Need to add new controller to ISE
 9934   314461616               Request to add new controller to ISE
 8084   314462410               Request to add new controller to ISE)

In [22]:
recommended_bm25[6]

('Kyndryl config backup is missing from the directories',
           ticket                                   problem_abstract
 7542   309686335         Missing backup config for multiple devices
 7902   308829455             don't see backup config in action-tool
 8090   312788475                Config missing from action database
 11021  315300651        Missing configuration backup file on poller
 9272   305548788  Need config backup for network device for cust...
 7517   309799776  URGENT!!! - Action config backup not working p...
 7516   309799931  URGENT!!! - Action config backup not working p...
 7840   309049489  Device pulls config with no error but actual c...
 10247  310352005  Could you monitor the following directories fo...
 7865   308986639  Config backup is not happening for Firewall j1...)

In [23]:
recommended_bm25[7]

('Customer is unable to login to firewall device . ID is configured in device with GTAC radius auth.',
           ticket                                   problem_abstract
 10876  306303912               Unable to login any device with GTAC
 9884   307111289                        Not able to login to device
 11533  307292526                        Not able to login on device
 8945   312658637                                         Device Add
 3915   310245865                                     Add the device
 8496   309850130  unable to login to device using GTAC/ RSA cred...
 3351   309786178        Device to be added and device to be removed
 11542  306163440    Unable to access any device using GTAC password
 8280   306625571  MSS Domain GTAC not working. Unable to login t...
 11307  306104165                   None of the device able to login)

In [24]:
recommended_bm25[8]

('.cfg files not available on the poller',
           ticket                                   problem_abstract
 8109   312873303                                  No data available
 4194   310564352                                IOS mismatch in CFG
 6864   311823861                 router with COS 108 have CFG issue
 5221   311823985           CISCO router with COS 108 have CFG issue
 10826  307773194                Not updating device files correctly
 9964   307054396              ATTN Mohan - Issue with opening files
 6370   313080070                IOS mismatch in CFG for 4431 router
 269    306467756            IOS mismatch on CFG for 4331 with BVOIP
 10659  307867022                  Action v3 portal is not available
 8047   308397618  Files attached by customers is not visible in UD.)

In [25]:
recommended_bm25[9]

('down',
           ticket                                   problem_abstract
 11683  310720303  Hi , I am part of IN-GCSC-SDXN group in Unifie...
 3901   310232849         WNBA CHICAGO WASHINGTON 5/22/22 7AM - 2000
 3899   310233519                               Reference V220500926
 3898   310232847         WNBA CHICAGO/WASHINGTON 5/22/22 7AM - 2000
 3897   310233724  First mile check V220500548 D-88301 Seattle So...
 3896   310233516                               Reference V220500925
 3895   310233514                               Reference V220500924
 3894   310233513                               Reference V220500922
 3893   310233511                               Reference V220500921
 3892   310233510                               Reference V220500920)