MACHINE LEARNING
1.SUBLIST OR NOT
#Using
intersection()
list1=[10,20,30,40,50,'geeks']
list2=[10,20,'geeks']
if(set(list1).intersection(set(list2))==set(list2)):
print("Sublist exist")
else:
print("Sublist not exist")
#Getting
i/p from user
l1=list(input(""))
print("List1:",l1)
l2=list(input(""))
print("List2:",l2)
flag=False
for i in range(len(l1)-len(l2)+1):
if l1[i:i+len(l2)]==l2:
flag=True
break
print("Is sublist present in
list....",flag)
#Using
for loop
list1=[10,20,30,40,50]
list2=[10,20]
list=False
for i in range(0,len(list1)):
j=0
while((i+j)<len(list1) and j<len(list2) and list1[i+j] ==
list2[j]):
j+=1
if j==len(list2):
list=True
break
if(list):
print("Sublist exist")
else:
print("Sublist not exist")
#Using
subset()
list1=[10,20,30,40,50]
list2=[10,20]
print(set(list2).issubset(set(list1)))
OUTPUT:
Sublist exist
123456
List1: ['1', '2', '3', '4', '5',
'6']
123
List2: ['1', '2', '3']
Is sublist present in list.... True
Sublist exist
True
2) REPLACE DICTIONARY VALUES WITH
THEIR AVERAGE
def
dict_avg_val(list_items):
for d in list_items:
n1=d.pop('M1')
n2=d.pop('M2')
d['Marks[M1+M2]']=(n1+n2)/2
return list_items
Student_list=[{'id':1,'name':"XXX",'M1':72,'M2':70},
{'id':2,'name':"YYY",'M1':80,'M2':80},
{'id':3,'name':"ZZZ",'M1':92,'M2':90}]
print(dict_avg_val(Student_list))
Output:
[{'id': 1, 'name': 'XXX',
'Marks[M1+M2]': 71.0}, {'id': 2, 'name': 'YYY', 'Marks[M1+M2]': 80.0}, {'id':
3, 'name': 'ZZZ', 'Marks[M1+M2]': 91.0}]
3) PERFORMING MANIPULATION OF TUPLE
ELEMENT.
t1=(1,4,5,6,11,30,40,56)
t2=('Python','java',)
print("Tuple
elements:",t1+t2)
print("Length:",len(t1))
print("3rd
element:",t1[3])
print("Value at
t2[-1]:",t2[-1])
print("Repetition:",t2*3)
print("Slicing:",t1[2:5])
print("Slicing:",t1[1:])
print("Slicing:",t1[:4])
print("Slicing:",t1[2:5:2])
print("Slicing:",t1[::-2])
l1=[23,34,45]
print("List1:",tuple(l1))
OUTPUT:
Tuple elements: (1, 4, 5, 6, 11,
30, 40, 56, 'Python', 'java')
Length: 8
3rd element: 6
Value at t2[-1]: java
Repetition: ('Python', 'java',
'Python', 'java', 'Python', 'java')
Slicing: (5, 6, 11)
Slicing: (4, 5, 6, 11, 30, 40, 56)
Slicing: (1, 4, 5, 6)
Slicing: (5, 11)
Slicing: (56, 30, 6, 4)
List1: (23, 34, 45)
4)
PANDAS AND NUMPY TO GET THE POWERS OF AN ARRAY VALUES ELEMENT-WISE. FIRST ARRAY
ELEMENTS RAISED TO POWERS OF SECOND ARRAY ELEMENT.
import numpy as np
import pandas as pd
arr1 = np.array([[1, 2, 3], [4, 5,
6]])
arr2 = np.array([[1, 2, 3], [4, 5,
6]])
result_numpy = np.power(arr1, arr2)
print("Numpy power
result:")
print(result_numpy)
print("Numpy ** operator
result:")
print(arr1 ** arr2)
series1 = pd.Series(arr1.flatten())
series2 = pd.Series(arr2.flatten())
result_pandas =
series1.pow(series2)
print("Pandas power
result:")
print(result_pandas)
5) CHECK WHETHER A STRING IS
PANAGRAM OR NOT
def ispangram(str):
alphabet = "abcdefghijklmnopqrstuvwxyz"
for char in alphabet:
if char not in str.lower():
return False
return True
string = input(“Enter a string:”)
if(ispangram(string) == True):
print("This is pangram")
else:
print("This is not pangram")
OUTPUT:
Enter a
string:qwertyuiopasdfghjklzxcvbnm
This is pangram
6) K-FOLD CROSS VALIDATION
ALGORITHM.
from sklearn import datasets
from sklearn.tree import
DecisionTreeClassifier
from sklearn.model_selection import
KFold, cross_val_score
X, y =
datasets.load_iris(return_X_y=True)
clf =
DecisionTreeClassifier(random_state=42)
k_folds = KFold(n_splits = 5)
scores = cross_val_score(clf, X, y,
cv = k_folds)
print("Cross Validation
Scores: ", scores)
print("Average CV Score:
", scores.mean())
print("Number of CV Scores
used in Average: ", len(scores))
7) TO READ EACH ROW A GIVEN CSV
FILE AND PRINT A LIST OF STRINGS.
import csv
with open('File1.csv', newline='')
as csvfile:
data = csv.reader(csvfile, delimiter=' ',
quotechar='|')
for row in data:
print(', '.join(row))
File1.csv:
RollNo,Name,Dept
23mca001,Abinaya,MCA
23mca002,Amuthavalli,MCA
23mca003,AnupShankar,MCA
23mca004,Ashika,MCA
23mca005,Bhuvaneshwari,MCA
Output:
RollNo,Name,Dept
23mca001,Abinaya,MCA
23mca002,Amuthavalli,MCA
23mca003,AnupShankar,MCA
23mca004,Ashika,MCA
23mca005,Bhuvaneshwari,MCA
8.K-MEANS ALGORITHM
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
x = [1, 2, 3, 4, 5]
y = [1, 2, 3, 4, 5]
data = list(zip(x, y))
inertias = []
for i in range(1, len(data) + 1):
kmeans =
KMeans(n_clusters=i, random_state=42)
kmeans.fit(data)
inertias.append(kmeans.inertia_)
plt.plot(range(1, len(data) + 1),
inertias, marker='o')
plt.title('Elbow Method')
plt.xlabel('Number of clusters')
plt.ylabel('Inertia')
plt.show()
Output:
9. NAÏVE BAYES ALGORITHM
import numpy as np
from sklearn.model_selection import
train_test_split
from sklearn.naive_bayes import
GaussianNB
from sklearn.metrics import
accuracy_score
X = np.array([[1, 2], [2, 3], [3,
4], [4, 5], [5, 6],
[6, 7], [7, 8], [8, 9], [9, 10],
[10, 11]])
y = np.array([0, 0, 0, 0, 0, 1, 1,
1, 1, 1])
X_train, X_test, y_train, y_test =
train_test_split(X, y, test_size=0.3, random_state=42)
model = GaussianNB()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test,
y_pred)
print(f"Accuracy: {accuracy *
100:.2f}%")
Output:
Accuracy: 100.00%
10.LOGISTIC
REGRESSION
import numpy as np
import pandas as pd
from sklearn.model_selection import
train_test_split
from sklearn.linear_model import
LogisticRegression
from sklearn.metrics import
accuracy_score, classification_report, confusion_matrix
import matplotlib.pyplot as plt
from sklearn.datasets import
load_iris
data = load_iris()
X = data.data
y = data.target
binary_indices = np.where(y != 2)
X = X[binary_indices]
y = y[binary_indices]
X_train, X_test, y_train, y_test =
train_test_split(X, y, test_size=0.3, random_state=42)
model = LogisticRegression()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test,
y_pred)
print(f'Accuracy: {accuracy:.2f}')
print('Classification Report:')
print(classification_report(y_test,
y_pred))
print('Confusion Matrix:')
print(confusion_matrix(y_test,
y_pred))
X_train_2D = X_train[:, :2]
X_test_2D = X_test[:, :2]
model_2D = LogisticRegression()
model_2D.fit(X_train_2D, y_train)
h = .02
x_min, x_max = X_train_2D[:,
0].min() - 1, X_train_2D[:, 0].max() + 1
y_min, y_max = X_train_2D[:,
1].min() - 1, X_train_2D[:, 1].max() + 1
xx, yy =
np.meshgrid(np.arange(x_min, x_max, h),
np.arange(y_min, y_max,
h))
Z =
model_2D.predict(np.c_[xx.ravel(), yy.ravel()])
Z = Z.reshape(xx.shape)
plt.contourf(xx, yy, Z, alpha=0.8)
plt.scatter(X_train_2D[:, 0],
X_train_2D[:, 1], c=y_train, edgecolor='k', marker='o')
plt.xlabel('Feature 1')
plt.ylabel('Feature 2')
plt.title('Decision Boundary')
plt.show()
Output:
11.STACKED
GENERALIZATION
import numpy as np
from sklearn.datasets import
load_iris
from sklearn.model_selection import
train_test_split
from sklearn.ensemble import
StackingClassifier
from sklearn.linear_model import
LogisticRegression
from sklearn.tree import
DecisionTreeClassifier
from sklearn.neighbors import
KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.metrics import
accuracy_score, confusion_matrix, classification_report
data = load_iris()
X = data.data
y = data.target
X_train, X_test, y_train, y_test =
train_test_split(X, y, test_size=0.3, random_state=42)
base_models = [
('decision_tree', DecisionTreeClassifier()),
('knn', KNeighborsClassifier()),
('svc', SVC(probability=True))
]
meta_model = LogisticRegression()
stacking_model =
StackingClassifier(estimators=base_models, final_estimator=meta_model, cv=5)
stacking_model.fit(X_train,
y_train)
y_pred =
stacking_model.predict(X_test)
accuracy = accuracy_score(y_test,
y_pred)
print(f"Accuracy: {accuracy *
100:.2f}%")
print("Confusion
Matrix:")
print(confusion_matrix(y_test,
y_pred))
print("Classification
Report:")
print(classification_report(y_test,
y_pred))
Output:
Accuracy: 100.00%
Confusion Matrix:
[[19 0 0]
[ 0 13
0]
[ 0 0
13]]
Classification Report:
precision recall
f1-score support
0 1.00
1.00 1.00 19
1 1.00
1.00 1.00 13
2 1.00
1.00 1.00 13
accuracy
1.00 45
macro avg 1.00 1.00
1.00 45
weighted avg 1.00
1.00 1.00 45
12.SUPPORT VECTOR MACHINES
from sklearn.datasets import
make_blobs
import matplotlib.pyplot as plt
from sklearn.svm import SVC
import numpy as np
import pandas as pd
X, Y = make_blobs(n_samples=500,
centers=2, random_state=0, cluster_std=0.40)
plt.scatter(X[:, 0], X[:, 1], c=Y,
s=50, cmap='spring')
plt.title('Synthetic Data')
plt.xlabel('Feature 1')
plt.ylabel('Feature 2')
plt.show()
clf = SVC(kernel='linear')
x =
pd.read_csv("path/to/cancer.csv")
if 'malignant' in x.columns and
'benign' in x.columns:
y =
x.iloc[:, 30].values # or y = x['class_column_name'].values if you know
the column name
x_features =
np.column_stack((x['malignant'], x['benign'])) # Ensure these column
names are correct
clf.fit(x_features,
y)
prediction1 = clf.predict([[120, 990]])
prediction2 =
clf.predict([[85, 550]])
print(f"Prediction for [120, 990]: {prediction1}")
print(f"Prediction for [85, 550]: {prediction2}")
else:
print("Columns
'malignant' and 'benign' not found in the CSV file")
xfit = np.linspace(-1, 3.5)
plt.scatter(X[:, 0], X[:, 1], c=Y,
s=50, cmap='spring')
for m, b, d in [(1, 0.65, 0.33),
(0.5, 1.6, 0.55), (-0.2, 2.9, 0.2)]:
yfit = m * xfit + b
plt.plot(xfit, yfit,
'-k')
plt.fill_between(xfit, yfit - d, yfit + d, edgecolor='none', color='#AAAAAA',
alpha=0.4)
plt.xlim(-1, 3.5)
plt.title('Decision Boundaries')
plt.xlabel('Feature 1')
plt.ylabel('Feature 2')
plt.show()
13.DEEP
LEARNING PYTHON LIBRARIES
#
Import necessary libraries
import
tensorflow as tf
from
tensorflow.keras.models import Sequential
from
tensorflow.keras.layers import Dense, Flatten
from
tensorflow.keras.datasets import mnist
#
Load the MNIST dataset
(x_train,
y_train), (x_test, y_test) = mnist.load_data()
#
Normalize the data (scale pixel values to range between 0 and 1)
x_train
= x_train / 255.0
x_test
= x_test / 255.0
#
Build the neural network model
model
= Sequential()
#
Flatten the input (28x28 images) to a 1D vector (784 pixels)
model.add(Flatten(input_shape=(28,
28)))
#
Add a dense layer with 128 neurons and ReLU activation function
model.add(Dense(128,
activation='relu'))
#
Add another dense layer with 64 neurons and ReLU activation function
model.add(Dense(64,
activation='relu'))
#
Output layer with 10 neurons (for 10 classes) and softmax activation
model.add(Dense(10,
activation='softmax'))
#
Compile the model: define the loss function, optimizer, and evaluation metric
model.compile(optimizer='adam',
loss='sparse_categorical_crossentropy', metrics=['accuracy'])
#
Train the model on the training data
model.fit(x_train,
y_train, epochs=5, batch_size=32)
#
Evaluate the model on the test data
test_loss,
test_acc = model.evaluate(x_test, y_test)
#
Print the test accuracy
print(f"Test
accuracy: {test_acc:.4f}")
output:
Epoch
1/5
1875/1875
━━━━━━━━━━━━━
7s 3ms/step - accuracy: 0.8766 - loss: 0.4260
Epoch
2/5
1875/1875
━━━━━━━━━━━━━
6s 3ms/step - accuracy: 0.9666 - loss: 0.1038
Epoch
3/5
1875/1875
━━━━━━━━━━━━━
7s 4ms/step - accuracy: 0.9780 - loss: 0.0697
Epoch
4/5
1875/1875
━━━━━━━━━━━━━
7s 4ms/step - accuracy: 0.9852 - loss: 0.0475
Epoch
5/5
1875/1875
━━━━━━━━━━━━━
8s 4ms/step - accuracy: 0.9884 - loss: 0.0365
313/313
━━━━━━━━━━━━━━
1s 3ms/step - accuracy: 0.9721 - loss: 0.0904
Test
accuracy: 0.9769
Comments
Post a Comment